hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
  2. hpcflow/_version.py +1 -1
  3. hpcflow/app.py +1 -0
  4. hpcflow/data/scripts/bad_script.py +2 -0
  5. hpcflow/data/scripts/do_nothing.py +2 -0
  6. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  7. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  8. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  11. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  12. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  13. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  15. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  16. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  23. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
  24. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  25. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
  26. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  27. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  28. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  29. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  30. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  31. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  32. hpcflow/data/scripts/script_exit_test.py +5 -0
  33. hpcflow/data/template_components/environments.yaml +1 -1
  34. hpcflow/sdk/__init__.py +26 -15
  35. hpcflow/sdk/app.py +2192 -768
  36. hpcflow/sdk/cli.py +506 -296
  37. hpcflow/sdk/cli_common.py +105 -7
  38. hpcflow/sdk/config/__init__.py +1 -1
  39. hpcflow/sdk/config/callbacks.py +115 -43
  40. hpcflow/sdk/config/cli.py +126 -103
  41. hpcflow/sdk/config/config.py +674 -318
  42. hpcflow/sdk/config/config_file.py +131 -95
  43. hpcflow/sdk/config/errors.py +125 -84
  44. hpcflow/sdk/config/types.py +148 -0
  45. hpcflow/sdk/core/__init__.py +25 -1
  46. hpcflow/sdk/core/actions.py +1771 -1059
  47. hpcflow/sdk/core/app_aware.py +24 -0
  48. hpcflow/sdk/core/cache.py +139 -79
  49. hpcflow/sdk/core/command_files.py +263 -287
  50. hpcflow/sdk/core/commands.py +145 -112
  51. hpcflow/sdk/core/element.py +828 -535
  52. hpcflow/sdk/core/enums.py +192 -0
  53. hpcflow/sdk/core/environment.py +74 -93
  54. hpcflow/sdk/core/errors.py +455 -52
  55. hpcflow/sdk/core/execute.py +207 -0
  56. hpcflow/sdk/core/json_like.py +540 -272
  57. hpcflow/sdk/core/loop.py +751 -347
  58. hpcflow/sdk/core/loop_cache.py +164 -47
  59. hpcflow/sdk/core/object_list.py +370 -207
  60. hpcflow/sdk/core/parameters.py +1100 -627
  61. hpcflow/sdk/core/rule.py +59 -41
  62. hpcflow/sdk/core/run_dir_files.py +21 -37
  63. hpcflow/sdk/core/skip_reason.py +7 -0
  64. hpcflow/sdk/core/task.py +1649 -1339
  65. hpcflow/sdk/core/task_schema.py +308 -196
  66. hpcflow/sdk/core/test_utils.py +191 -114
  67. hpcflow/sdk/core/types.py +440 -0
  68. hpcflow/sdk/core/utils.py +485 -309
  69. hpcflow/sdk/core/validation.py +82 -9
  70. hpcflow/sdk/core/workflow.py +2544 -1178
  71. hpcflow/sdk/core/zarr_io.py +98 -137
  72. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  73. hpcflow/sdk/demo/cli.py +53 -33
  74. hpcflow/sdk/helper/cli.py +18 -15
  75. hpcflow/sdk/helper/helper.py +75 -63
  76. hpcflow/sdk/helper/watcher.py +61 -28
  77. hpcflow/sdk/log.py +122 -71
  78. hpcflow/sdk/persistence/__init__.py +8 -31
  79. hpcflow/sdk/persistence/base.py +1360 -606
  80. hpcflow/sdk/persistence/defaults.py +6 -0
  81. hpcflow/sdk/persistence/discovery.py +38 -0
  82. hpcflow/sdk/persistence/json.py +568 -188
  83. hpcflow/sdk/persistence/pending.py +382 -179
  84. hpcflow/sdk/persistence/store_resource.py +39 -23
  85. hpcflow/sdk/persistence/types.py +318 -0
  86. hpcflow/sdk/persistence/utils.py +14 -11
  87. hpcflow/sdk/persistence/zarr.py +1337 -433
  88. hpcflow/sdk/runtime.py +44 -41
  89. hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
  90. hpcflow/sdk/submission/jobscript.py +1651 -692
  91. hpcflow/sdk/submission/schedulers/__init__.py +167 -39
  92. hpcflow/sdk/submission/schedulers/direct.py +121 -81
  93. hpcflow/sdk/submission/schedulers/sge.py +170 -129
  94. hpcflow/sdk/submission/schedulers/slurm.py +291 -268
  95. hpcflow/sdk/submission/schedulers/utils.py +12 -2
  96. hpcflow/sdk/submission/shells/__init__.py +14 -15
  97. hpcflow/sdk/submission/shells/base.py +150 -29
  98. hpcflow/sdk/submission/shells/bash.py +283 -173
  99. hpcflow/sdk/submission/shells/os_version.py +31 -30
  100. hpcflow/sdk/submission/shells/powershell.py +228 -170
  101. hpcflow/sdk/submission/submission.py +1014 -335
  102. hpcflow/sdk/submission/types.py +140 -0
  103. hpcflow/sdk/typing.py +182 -12
  104. hpcflow/sdk/utils/arrays.py +71 -0
  105. hpcflow/sdk/utils/deferred_file.py +55 -0
  106. hpcflow/sdk/utils/hashing.py +16 -0
  107. hpcflow/sdk/utils/patches.py +12 -0
  108. hpcflow/sdk/utils/strings.py +33 -0
  109. hpcflow/tests/api/test_api.py +32 -0
  110. hpcflow/tests/conftest.py +27 -6
  111. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  112. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  113. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  114. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
  115. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  116. hpcflow/tests/scripts/test_main_scripts.py +866 -85
  117. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  118. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  119. hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
  120. hpcflow/tests/unit/test_action.py +262 -75
  121. hpcflow/tests/unit/test_action_rule.py +9 -4
  122. hpcflow/tests/unit/test_app.py +33 -6
  123. hpcflow/tests/unit/test_cache.py +46 -0
  124. hpcflow/tests/unit/test_cli.py +134 -1
  125. hpcflow/tests/unit/test_command.py +71 -54
  126. hpcflow/tests/unit/test_config.py +142 -16
  127. hpcflow/tests/unit/test_config_file.py +21 -18
  128. hpcflow/tests/unit/test_element.py +58 -62
  129. hpcflow/tests/unit/test_element_iteration.py +50 -1
  130. hpcflow/tests/unit/test_element_set.py +29 -19
  131. hpcflow/tests/unit/test_group.py +4 -2
  132. hpcflow/tests/unit/test_input_source.py +116 -93
  133. hpcflow/tests/unit/test_input_value.py +29 -24
  134. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  135. hpcflow/tests/unit/test_json_like.py +44 -35
  136. hpcflow/tests/unit/test_loop.py +1396 -84
  137. hpcflow/tests/unit/test_meta_task.py +325 -0
  138. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  139. hpcflow/tests/unit/test_object_list.py +17 -12
  140. hpcflow/tests/unit/test_parameter.py +29 -7
  141. hpcflow/tests/unit/test_persistence.py +237 -42
  142. hpcflow/tests/unit/test_resources.py +20 -18
  143. hpcflow/tests/unit/test_run.py +117 -6
  144. hpcflow/tests/unit/test_run_directories.py +29 -0
  145. hpcflow/tests/unit/test_runtime.py +2 -1
  146. hpcflow/tests/unit/test_schema_input.py +23 -15
  147. hpcflow/tests/unit/test_shell.py +23 -2
  148. hpcflow/tests/unit/test_slurm.py +8 -7
  149. hpcflow/tests/unit/test_submission.py +38 -89
  150. hpcflow/tests/unit/test_task.py +352 -247
  151. hpcflow/tests/unit/test_task_schema.py +33 -20
  152. hpcflow/tests/unit/test_utils.py +9 -11
  153. hpcflow/tests/unit/test_value_sequence.py +15 -12
  154. hpcflow/tests/unit/test_workflow.py +114 -83
  155. hpcflow/tests/unit/test_workflow_template.py +0 -1
  156. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  157. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  158. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  159. hpcflow/tests/unit/utils/test_patches.py +5 -0
  160. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  161. hpcflow/tests/workflows/__init__.py +0 -0
  162. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  163. hpcflow/tests/workflows/test_jobscript.py +334 -1
  164. hpcflow/tests/workflows/test_run_status.py +198 -0
  165. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  166. hpcflow/tests/workflows/test_submission.py +140 -0
  167. hpcflow/tests/workflows/test_workflows.py +160 -15
  168. hpcflow/tests/workflows/test_zip.py +18 -0
  169. hpcflow/viz_demo.ipynb +6587 -3
  170. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
  171. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  172. hpcflow/sdk/core/parallel.py +0 -21
  173. hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
  174. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  175. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  176. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -1,26 +1,29 @@
1
1
  """
2
2
  Actions are base components of elements.
3
3
  Element action runs (EARs) are the basic components of any enactment;
4
- they may be grouped together within a jobscript for efficiency.
4
+ they may be grouped together within a jobscript for efficiency.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
+ from collections.abc import Mapping
8
9
  import copy
9
10
  from dataclasses import dataclass
10
- from datetime import datetime
11
- import enum
12
11
  import json
12
+ import contextlib
13
+ from collections import defaultdict
13
14
  from pathlib import Path
14
15
  import re
15
16
  from textwrap import indent, dedent
16
- from typing import Any, Dict, List, Optional, Tuple, Union
17
-
18
- from valida.conditions import ConditionLike
17
+ from typing import cast, final, overload, TYPE_CHECKING
18
+ from typing_extensions import override
19
19
 
20
20
  from watchdog.utils.dirsnapshot import DirectorySnapshotDiff
21
21
 
22
- from hpcflow.sdk import app
23
22
  from hpcflow.sdk.core import ABORT_EXIT_CODE
23
+ from hpcflow.sdk.core.app_aware import AppAware
24
+ from hpcflow.sdk.core.enums import ActionScopeType, EARStatus
25
+ from hpcflow.sdk.core.skip_reason import SkipReason
26
+ from hpcflow.sdk.core.task import WorkflowTask
24
27
  from hpcflow.sdk.core.errors import (
25
28
  ActionEnvironmentMissingNameError,
26
29
  MissingCompatibleActionEnvironment,
@@ -28,138 +31,93 @@ from hpcflow.sdk.core.errors import (
28
31
  UnknownScriptDataKey,
29
32
  UnknownScriptDataParameter,
30
33
  UnsupportedScriptDataFormat,
34
+ UnsetParameterDataError,
35
+ UnsetParameterFractionLimitExceededError,
36
+ UnsetParameterNumberLimitExceededError,
31
37
  )
32
38
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
39
+ from hpcflow.sdk.core.parameters import ParameterValue
40
+ from hpcflow.sdk.typing import ParamSource, hydrate
33
41
  from hpcflow.sdk.core.utils import (
34
42
  JSONLikeDirSnapShot,
35
43
  split_param_label,
36
44
  swap_nested_dict_keys,
45
+ get_relative_path,
37
46
  )
38
47
  from hpcflow.sdk.log import TimeIt
39
48
  from hpcflow.sdk.core.run_dir_files import RunDirAppFiles
40
-
49
+ from hpcflow.sdk.submission.enums import SubmissionStatus
50
+ from hpcflow.sdk.submission.submission import Submission
51
+ from hpcflow.sdk.utils.hashing import get_hash
52
+
53
+ if TYPE_CHECKING:
54
+ from collections.abc import Callable, Container, Iterable, Iterator, Sequence
55
+ from datetime import datetime
56
+ from re import Pattern
57
+ from typing import Any, ClassVar, Literal
58
+ from typing_extensions import Self
59
+ from valida.conditions import ConditionLike # type: ignore
60
+
61
+ from ..typing import DataIndex, ParamSource
62
+ from ..submission.shells import Shell
63
+ from ..submission.jobscript import Jobscript
64
+ from .commands import Command
65
+ from .command_files import InputFileGenerator, OutputFileParser, FileSpec
66
+ from .element import (
67
+ Element,
68
+ ElementIteration,
69
+ ElementInputs,
70
+ ElementOutputs,
71
+ ElementResources,
72
+ ElementInputFiles,
73
+ ElementOutputFiles,
74
+ )
75
+ from .environment import Environment
76
+ from .parameters import SchemaParameter, Parameter
77
+ from .rule import Rule
78
+ from .task import WorkflowTask
79
+ from .task_schema import TaskSchema
80
+ from .types import ParameterDependence, ScriptData, BlockActionKey
81
+ from .workflow import Workflow
82
+ from .object_list import EnvironmentsList
41
83
 
42
84
  ACTION_SCOPE_REGEX = r"(\w*)(?:\[(.*)\])?"
43
85
 
44
86
 
45
- class ActionScopeType(enum.Enum):
46
- """
47
- Types of action scope.
48
- """
49
-
50
- #: Scope that applies to anything.
51
- ANY = 0
52
- #: Scope that only applies to main scripts.
53
- MAIN = 1
54
- #: Scope that applies to processing steps.
55
- PROCESSING = 2
56
- #: Scope that applies to input file generators.
57
- INPUT_FILE_GENERATOR = 3
58
- #: Scope that applies to output file parsers.
59
- OUTPUT_FILE_PARSER = 4
60
-
61
-
62
- #: Keyword arguments permitted for particular scopes.
63
- ACTION_SCOPE_ALLOWED_KWARGS = {
64
- ActionScopeType.ANY.name: set(),
65
- ActionScopeType.MAIN.name: set(),
66
- ActionScopeType.PROCESSING.name: set(),
67
- ActionScopeType.INPUT_FILE_GENERATOR.name: {"file"},
68
- ActionScopeType.OUTPUT_FILE_PARSER.name: {"output"},
69
- }
87
+ @dataclass
88
+ class UnsetParamTracker:
89
+ """Class to track run IDs that are the sources of unset parameter data for some input
90
+ parameter type.
70
91
 
92
+ Attributes
93
+ ----------
94
+ run_ids
95
+ Set of integer run IDs that have been tracked.
96
+ group_size
97
+ The size of the group, if the associated SchemaInput in question is a group.
71
98
 
72
- class EARStatus(enum.Enum):
73
- """Enumeration of all possible EAR statuses, and their associated status colour."""
99
+ Notes
100
+ -----
101
+ Objects of this class are instantiated within
102
+ `WorkflowTask._get_merged_parameter_data` when we are tracking unset parameters.
74
103
 
75
- def __new__(cls, value, symbol, colour, doc=None):
76
- member = object.__new__(cls)
77
- member._value_ = value
78
- member.colour = colour
79
- member.symbol = symbol
80
- member.__doc__ = doc
81
- return member
104
+ """
82
105
 
83
- #: Not yet associated with a submission.
84
- pending = (
85
- 0,
86
- ".",
87
- "grey46",
88
- "Not yet associated with a submission.",
89
- )
90
- #: Associated with a prepared submission that is not yet submitted.
91
- prepared = (
92
- 1,
93
- ".",
94
- "grey46",
95
- "Associated with a prepared submission that is not yet submitted.",
96
- )
97
- #: Submitted for execution.
98
- submitted = (
99
- 2,
100
- ".",
101
- "grey46",
102
- "Submitted for execution.",
103
- )
104
- #: Executing now.
105
- running = (
106
- 3,
107
- "●",
108
- "dodger_blue1",
109
- "Executing now.",
110
- )
111
- #: Not attempted due to a failure of an upstream action on which this depends,
112
- #: or a loop termination condition being satisfied.
113
- skipped = (
114
- 4,
115
- "s",
116
- "dark_orange",
117
- (
118
- "Not attempted due to a failure of an upstream action on which this depends, "
119
- "or a loop termination condition being satisfied."
120
- ),
121
- )
122
- #: Aborted by the user; downstream actions will be attempted.
123
- aborted = (
124
- 5,
125
- "A",
126
- "deep_pink4",
127
- "Aborted by the user; downstream actions will be attempted.",
128
- )
129
- #: Probably exited successfully.
130
- success = (
131
- 6,
132
- "■",
133
- "green3",
134
- "Probably exited successfully.",
135
- )
136
- #: Probably failed.
137
- error = (
138
- 7,
139
- "E",
140
- "red3",
141
- "Probably failed.",
142
- )
106
+ run_ids: set[int]
107
+ group_size: int
143
108
 
144
- @classmethod
145
- def get_non_running_submitted_states(cls):
146
- """Return the set of all non-running states, excluding those before submission."""
147
- return {
148
- cls.skipped,
149
- cls.aborted,
150
- cls.success,
151
- cls.error,
152
- }
153
109
 
154
- @property
155
- def rich_repr(self):
156
- """
157
- The rich representation of the value.
158
- """
159
- return f"[{self.colour}]{self.symbol}[/{self.colour}]"
110
+ #: Keyword arguments permitted for particular scopes.
111
+ ACTION_SCOPE_ALLOWED_KWARGS: Mapping[str, frozenset[str]] = {
112
+ ActionScopeType.ANY.name: frozenset(),
113
+ ActionScopeType.MAIN.name: frozenset(),
114
+ ActionScopeType.PROCESSING.name: frozenset(),
115
+ ActionScopeType.INPUT_FILE_GENERATOR.name: frozenset({"file"}),
116
+ ActionScopeType.OUTPUT_FILE_PARSER.name: frozenset({"output"}),
117
+ }
160
118
 
161
119
 
162
- class ElementActionRun:
120
+ class ElementActionRun(AppAware):
163
121
  """
164
122
  The Element Action Run (EAR) is an atomic unit of an enacted workflow, representing
165
123
  one unit of work (e.g., particular submitted job to run a program) within that
@@ -204,26 +162,26 @@ class ElementActionRun:
204
162
  Where to run the EAR (if not locally).
205
163
  """
206
164
 
207
- _app_attr = "app"
208
-
209
165
  def __init__(
210
166
  self,
211
167
  id_: int,
212
168
  is_pending: bool,
213
- element_action,
169
+ element_action: ElementAction,
214
170
  index: int,
215
- data_idx: Dict,
216
- commands_idx: List[int],
217
- start_time: Union[datetime, None],
218
- end_time: Union[datetime, None],
219
- snapshot_start: Union[Dict, None],
220
- snapshot_end: Union[Dict, None],
221
- submission_idx: Union[int, None],
222
- success: Union[bool, None],
223
- skip: bool,
224
- exit_code: Union[int, None],
225
- metadata: Dict,
226
- run_hostname: Union[str, None],
171
+ data_idx: DataIndex,
172
+ commands_idx: list[int],
173
+ start_time: datetime | None,
174
+ end_time: datetime | None,
175
+ snapshot_start: dict[str, Any] | None,
176
+ snapshot_end: dict[str, Any] | None,
177
+ submission_idx: int | None,
178
+ commands_file_ID: int | None,
179
+ success: bool | None,
180
+ skip: int,
181
+ exit_code: int | None,
182
+ metadata: dict[str, Any],
183
+ run_hostname: str | None,
184
+ port_number: int | None,
227
185
  ) -> None:
228
186
  self._id = id_
229
187
  self._is_pending = is_pending
@@ -234,6 +192,7 @@ class ElementActionRun:
234
192
  self._start_time = start_time
235
193
  self._end_time = end_time
236
194
  self._submission_idx = submission_idx
195
+ self._commands_file_ID = commands_file_ID
237
196
  self._success = success
238
197
  self._skip = skip
239
198
  self._snapshot_start = snapshot_start
@@ -241,18 +200,19 @@ class ElementActionRun:
241
200
  self._exit_code = exit_code
242
201
  self._metadata = metadata
243
202
  self._run_hostname = run_hostname
203
+ self._port_number = port_number
244
204
 
245
205
  # assigned on first access of corresponding properties:
246
- self._inputs = None
247
- self._outputs = None
248
- self._resources = None
249
- self._input_files = None
250
- self._output_files = None
251
- self._ss_start_obj = None
252
- self._ss_end_obj = None
253
- self._ss_diff_obj = None
206
+ self._inputs: ElementInputs | None = None
207
+ self._outputs: ElementOutputs | None = None
208
+ self._resources: ElementResources | None = None
209
+ self._input_files: ElementInputFiles | None = None
210
+ self._output_files: ElementOutputFiles | None = None
211
+ self._ss_start_obj: JSONLikeDirSnapShot | None = None
212
+ self._ss_end_obj: JSONLikeDirSnapShot | None = None
213
+ self._ss_diff_obj: DirectorySnapshotDiff | None = None
254
214
 
255
- def __repr__(self):
215
+ def __repr__(self) -> str:
256
216
  return (
257
217
  f"{self.__class__.__name__}("
258
218
  f"id={self.id_!r}, index={self.index!r}, "
@@ -274,110 +234,122 @@ class ElementActionRun:
274
234
  return self._is_pending
275
235
 
276
236
  @property
277
- def element_action(self):
237
+ def element_action(self) -> ElementAction:
278
238
  """
279
239
  The particular element action that this is a run of.
280
240
  """
281
241
  return self._element_action
282
242
 
283
243
  @property
284
- def index(self):
244
+ def index(self) -> int:
285
245
  """Run index."""
286
246
  return self._index
287
247
 
288
248
  @property
289
- def action(self):
249
+ def action(self) -> Action:
290
250
  """
291
251
  The action this is a run of.
292
252
  """
293
253
  return self.element_action.action
294
254
 
295
255
  @property
296
- def element_iteration(self):
256
+ def element_iteration(self) -> ElementIteration:
297
257
  """
298
258
  The iteration information of this run.
299
259
  """
300
260
  return self.element_action.element_iteration
301
261
 
302
262
  @property
303
- def element(self):
263
+ def element(self) -> Element:
304
264
  """
305
265
  The element this is a run of.
306
266
  """
307
267
  return self.element_iteration.element
308
268
 
309
269
  @property
310
- def workflow(self):
270
+ def workflow(self) -> Workflow:
311
271
  """
312
272
  The workflow this is a run of.
313
273
  """
314
274
  return self.element_iteration.workflow
315
275
 
316
276
  @property
317
- def data_idx(self):
277
+ def data_idx(self) -> DataIndex:
318
278
  """
319
279
  Used for looking up input data to the EAR.
320
280
  """
321
281
  return self._data_idx
322
282
 
323
283
  @property
324
- def commands_idx(self):
284
+ def commands_idx(self) -> Sequence[int]:
325
285
  """
326
286
  Indices of commands to apply.
327
287
  """
328
288
  return self._commands_idx
329
289
 
330
290
  @property
331
- def metadata(self):
291
+ def metadata(self) -> Mapping[str, Any]:
332
292
  """
333
293
  Metadata about the EAR.
334
294
  """
335
295
  return self._metadata
336
296
 
337
297
  @property
338
- def run_hostname(self):
298
+ def run_hostname(self) -> str | None:
339
299
  """
340
300
  Where to run the EAR, if known/specified.
341
301
  """
342
302
  return self._run_hostname
343
303
 
344
304
  @property
345
- def start_time(self):
305
+ def port_number(self):
306
+ return self._port_number
307
+
308
+ @property
309
+ def start_time(self) -> datetime | None:
346
310
  """
347
311
  When the EAR started.
348
312
  """
349
313
  return self._start_time
350
314
 
351
315
  @property
352
- def end_time(self):
316
+ def end_time(self) -> datetime | None:
353
317
  """
354
318
  When the EAR finished.
355
319
  """
356
320
  return self._end_time
357
321
 
358
322
  @property
359
- def submission_idx(self):
323
+ def submission_idx(self) -> int | None:
360
324
  """
361
325
  What actual submission index was this?
362
326
  """
363
327
  return self._submission_idx
364
328
 
365
329
  @property
366
- def success(self):
330
+ def commands_file_ID(self):
331
+ return self._commands_file_ID
332
+
333
+ @property
334
+ def success(self) -> bool | None:
367
335
  """
368
336
  Did the EAR succeed?
369
337
  """
370
338
  return self._success
371
339
 
372
340
  @property
373
- def skip(self):
341
+ def skip(self) -> int:
374
342
  """
375
343
  Was the EAR skipped?
376
344
  """
377
345
  return self._skip
378
346
 
379
347
  @property
380
- def snapshot_start(self):
348
+ def skip_reason(self):
349
+ return SkipReason(self.skip)
350
+
351
+ @property
352
+ def snapshot_start(self) -> JSONLikeDirSnapShot | None:
381
353
  """
382
354
  The snapshot of the data directory at the start of the run.
383
355
  """
@@ -389,7 +361,7 @@ class ElementActionRun:
389
361
  return self._ss_start_obj
390
362
 
391
363
  @property
392
- def snapshot_end(self):
364
+ def snapshot_end(self) -> JSONLikeDirSnapShot | None:
393
365
  """
394
366
  The snapshot of the data directory at the end of the run.
395
367
  """
@@ -398,32 +370,34 @@ class ElementActionRun:
398
370
  return self._ss_end_obj
399
371
 
400
372
  @property
401
- def dir_diff(self) -> DirectorySnapshotDiff:
373
+ def dir_diff(self) -> DirectorySnapshotDiff | None:
402
374
  """
403
375
  The changes to the EAR working directory due to the execution of this EAR.
404
376
  """
405
- if self._ss_diff_obj is None and self.snapshot_end:
406
- self._ss_diff_obj = DirectorySnapshotDiff(
407
- self.snapshot_start, self.snapshot_end
408
- )
377
+ if (
378
+ not self._ss_diff_obj
379
+ and (ss := self.snapshot_start)
380
+ and (se := self.snapshot_end)
381
+ ):
382
+ self._ss_diff_obj = DirectorySnapshotDiff(ss, se)
409
383
  return self._ss_diff_obj
410
384
 
411
385
  @property
412
- def exit_code(self):
386
+ def exit_code(self) -> int | None:
413
387
  """
414
388
  The exit code of the underlying program run by the EAR, if known.
415
389
  """
416
390
  return self._exit_code
417
391
 
418
392
  @property
419
- def task(self):
393
+ def task(self) -> WorkflowTask:
420
394
  """
421
395
  The task that this EAR is part of the implementation of.
422
396
  """
423
397
  return self.element_action.task
424
398
 
425
399
  @property
426
- def status(self):
400
+ def status(self) -> EARStatus:
427
401
  """
428
402
  The state of this EAR.
429
403
  """
@@ -445,18 +419,16 @@ class ElementActionRun:
445
419
  elif self.submission_idx is not None:
446
420
  wk_sub_stat = self.workflow.submissions[self.submission_idx].status
447
421
 
448
- if wk_sub_stat.name == "PENDING":
422
+ if wk_sub_stat == SubmissionStatus.PENDING:
449
423
  return EARStatus.prepared
450
-
451
- elif wk_sub_stat.name == "SUBMITTED":
424
+ elif wk_sub_stat == SubmissionStatus.SUBMITTED:
452
425
  return EARStatus.submitted
453
-
454
426
  else:
455
427
  RuntimeError(f"Workflow submission status not understood: {wk_sub_stat}.")
456
428
 
457
429
  return EARStatus.pending
458
430
 
459
- def get_parameter_names(self, prefix: str) -> List[str]:
431
+ def get_parameter_names(self, prefix: str) -> Sequence[str]:
460
432
  """Get parameter types associated with a given prefix.
461
433
 
462
434
  For inputs, labels are ignored. See `Action.get_parameter_names` for more
@@ -466,11 +438,10 @@ class ElementActionRun:
466
438
  ----------
467
439
  prefix
468
440
  One of "inputs", "outputs", "input_files", "output_files".
469
-
470
441
  """
471
442
  return self.action.get_parameter_names(prefix)
472
443
 
473
- def get_data_idx(self, path: str = None):
444
+ def get_data_idx(self, path: str | None = None) -> DataIndex:
474
445
  """
475
446
  Get the data index of a value in the most recent iteration.
476
447
 
@@ -485,14 +456,37 @@ class ElementActionRun:
485
456
  run_idx=self.index,
486
457
  )
487
458
 
459
+ @overload
460
+ def get_parameter_sources(
461
+ self,
462
+ *,
463
+ path: str | None = None,
464
+ typ: str | None = None,
465
+ as_strings: Literal[False] = False,
466
+ use_task_index: bool = False,
467
+ ) -> Mapping[str, ParamSource | list[ParamSource]]:
468
+ ...
469
+
470
+ @overload
471
+ def get_parameter_sources(
472
+ self,
473
+ *,
474
+ path: str | None = None,
475
+ typ: str | None = None,
476
+ as_strings: Literal[True],
477
+ use_task_index: bool = False,
478
+ ) -> Mapping[str, str]:
479
+ ...
480
+
488
481
  @TimeIt.decorator
489
482
  def get_parameter_sources(
490
483
  self,
491
- path: str = None,
492
- typ: str = None,
484
+ *,
485
+ path: str | None = None,
486
+ typ: str | None = None,
493
487
  as_strings: bool = False,
494
488
  use_task_index: bool = False,
495
- ):
489
+ ) -> Mapping[str, str] | Mapping[str, ParamSource | list[ParamSource]]:
496
490
  """
497
491
  Get the source or sources of a parameter in the most recent iteration.
498
492
 
@@ -507,22 +501,31 @@ class ElementActionRun:
507
501
  use_task_index:
508
502
  Whether to use the task index.
509
503
  """
504
+ if as_strings:
505
+ return self.element_iteration.get_parameter_sources(
506
+ path,
507
+ action_idx=self.element_action.action_idx,
508
+ run_idx=self.index,
509
+ typ=typ,
510
+ as_strings=True,
511
+ use_task_index=use_task_index,
512
+ )
510
513
  return self.element_iteration.get_parameter_sources(
511
514
  path,
512
515
  action_idx=self.element_action.action_idx,
513
516
  run_idx=self.index,
514
517
  typ=typ,
515
- as_strings=as_strings,
518
+ as_strings=False,
516
519
  use_task_index=use_task_index,
517
520
  )
518
521
 
519
522
  def get(
520
523
  self,
521
- path: str = None,
522
- default: Any = None,
524
+ path: str | None = None,
525
+ default: Any | None = None,
523
526
  raise_on_missing: bool = False,
524
527
  raise_on_unset: bool = False,
525
- ):
528
+ ) -> Any:
526
529
  """
527
530
  Get a value (parameter, input, output, etc.) from the most recent iteration.
528
531
 
@@ -548,153 +551,198 @@ class ElementActionRun:
548
551
  raise_on_unset=raise_on_unset,
549
552
  )
550
553
 
551
- @TimeIt.decorator
552
- def get_EAR_dependencies(self, as_objects=False):
553
- """Get EARs that this EAR depends on."""
554
+ @overload
555
+ def get_EAR_dependencies(self, as_objects: Literal[False] = False) -> set[int]:
556
+ ...
557
+
558
+ @overload
559
+ def get_EAR_dependencies(self, as_objects: Literal[True]) -> list[ElementActionRun]:
560
+ ...
554
561
 
555
- out = []
562
+ @TimeIt.decorator
563
+ def get_EAR_dependencies(self, as_objects=False) -> list[ElementActionRun] | set[int]:
564
+ """Get EARs that this EAR depends on, or just their IDs."""
565
+ out: set[int] = set()
556
566
  for src in self.get_parameter_sources(typ="EAR_output").values():
557
- if not isinstance(src, list):
558
- src = [src]
559
- for src_i in src:
560
- EAR_ID_i = src_i["EAR_ID"]
567
+ for src_i in src if isinstance(src, list) else [src]:
568
+ EAR_ID_i: int = src_i["EAR_ID"]
561
569
  if EAR_ID_i != self.id_:
562
570
  # don't record a self dependency!
563
- out.append(EAR_ID_i)
564
-
565
- out = sorted(out)
571
+ out.add(EAR_ID_i)
566
572
 
567
573
  if as_objects:
568
- out = self.workflow.get_EARs_from_IDs(out)
569
-
574
+ return self.workflow.get_EARs_from_IDs(sorted(out))
570
575
  return out
571
576
 
572
- def get_input_dependencies(self):
577
+ def get_input_dependencies(self) -> Mapping[str, ParamSource]:
573
578
  """Get information about locally defined input, sequence, and schema-default
574
579
  values that this EAR depends on. Note this does not get values from this EAR's
575
580
  task/schema, because the aim of this method is to help determine which upstream
576
581
  tasks this EAR depends on."""
577
582
 
578
- out = {}
579
- for k, v in self.get_parameter_sources().items():
580
- if not isinstance(v, list):
581
- v = [v]
582
- for v_i in v:
583
- if (
584
- v_i["type"] in ["local_input", "default_input"]
585
- and v_i["task_insert_ID"] != self.task.insert_ID
586
- ):
587
- out[k] = v_i
583
+ wanted_types = ("local_input", "default_input")
584
+ return {
585
+ k: v_i
586
+ for k, v in self.get_parameter_sources().items()
587
+ for v_i in (v if isinstance(v, list) else [v])
588
+ if (
589
+ v_i["type"] in wanted_types
590
+ and v_i["task_insert_ID"] != self.task.insert_ID
591
+ )
592
+ }
593
+
594
+ @overload
595
+ def get_dependent_EARs(self, as_objects: Literal[False] = False) -> set[int]:
596
+ ...
588
597
 
589
- return out
598
+ @overload
599
+ def get_dependent_EARs(self, as_objects: Literal[True]) -> list[ElementActionRun]:
600
+ ...
590
601
 
591
602
  def get_dependent_EARs(
592
- self, as_objects=False
593
- ) -> List[Union[int, app.ElementActionRun]]:
603
+ self, as_objects: bool = False
604
+ ) -> list[ElementActionRun] | set[int]:
594
605
  """Get downstream EARs that depend on this EAR."""
595
- deps = []
596
- for task in self.workflow.tasks[self.task.index :]:
597
- for elem in task.elements[:]:
598
- for iter_ in elem.iterations:
599
- for run in iter_.action_runs:
600
- for dep_EAR_i in run.get_EAR_dependencies(as_objects=True):
601
- # does dep_EAR_i belong to self?
602
- if dep_EAR_i.id_ == self._id:
603
- deps.append(run.id_)
604
- deps = sorted(deps)
606
+ deps = {
607
+ run.id_
608
+ for task in self.workflow.tasks[self.task.index :]
609
+ for elem in task.elements[:]
610
+ for iter_ in elem.iterations
611
+ for run in iter_.action_runs
612
+ # does EAR dependency belong to self?
613
+ if self._id in run.get_EAR_dependencies()
614
+ }
605
615
  if as_objects:
606
- deps = self.workflow.get_EARs_from_IDs(deps)
607
-
616
+ return self.workflow.get_EARs_from_IDs(sorted(deps))
608
617
  return deps
609
618
 
610
619
  @property
611
- def inputs(self):
620
+ def inputs(self) -> ElementInputs:
612
621
  """
613
622
  The inputs to this EAR.
614
623
  """
615
624
  if not self._inputs:
616
- self._inputs = self.app.ElementInputs(element_action_run=self)
625
+ self._inputs = self._app.ElementInputs(element_action_run=self)
617
626
  return self._inputs
618
627
 
619
628
  @property
620
- def outputs(self):
629
+ def outputs(self) -> ElementOutputs:
621
630
  """
622
631
  The outputs from this EAR.
623
632
  """
624
633
  if not self._outputs:
625
- self._outputs = self.app.ElementOutputs(element_action_run=self)
634
+ self._outputs = self._app.ElementOutputs(element_action_run=self)
626
635
  return self._outputs
627
636
 
628
637
  @property
629
638
  @TimeIt.decorator
630
- def resources(self):
639
+ def resources(self) -> ElementResources:
631
640
  """
632
641
  The resources to use with (or used by) this EAR.
633
642
  """
634
643
  if not self._resources:
635
- self._resources = self.app.ElementResources(**self.get_resources())
644
+ self._resources = self.__get_resources_obj()
636
645
  return self._resources
637
646
 
638
647
  @property
639
- def input_files(self):
648
+ def input_files(self) -> ElementInputFiles:
640
649
  """
641
650
  The input files to the controlled program.
642
651
  """
643
652
  if not self._input_files:
644
- self._input_files = self.app.ElementInputFiles(element_action_run=self)
653
+ self._input_files = self._app.ElementInputFiles(element_action_run=self)
645
654
  return self._input_files
646
655
 
647
656
  @property
648
- def output_files(self):
657
+ def output_files(self) -> ElementOutputFiles:
649
658
  """
650
659
  The output files from the controlled program.
651
660
  """
652
661
  if not self._output_files:
653
- self._output_files = self.app.ElementOutputFiles(element_action_run=self)
662
+ self._output_files = self._app.ElementOutputFiles(element_action_run=self)
654
663
  return self._output_files
655
664
 
656
665
  @property
657
- def env_spec(self) -> Dict[str, Any]:
666
+ @TimeIt.decorator
667
+ def env_spec(self) -> Mapping[str, Any]:
658
668
  """
659
669
  Environment details.
660
670
  """
661
- return self.resources.environments[self.action.get_environment_name()]
671
+ if (envs := self.resources.environments) is None:
672
+ return {}
673
+ return envs[self.action.get_environment_name()]
674
+
675
+ @property
676
+ @TimeIt.decorator
677
+ def env_spec_hashable(self) -> tuple:
678
+ return self.action.env_spec_to_hashable(self.env_spec)
679
+
680
+ def get_directory(self) -> Path | None:
681
+ """
682
+ Get the working directory, if one is required.
683
+ """
684
+ return self.workflow.get_run_directories(run_ids=[self.id_])[0]
685
+
686
+ def get_app_log_path(self) -> Path:
687
+ assert self.submission_idx is not None
688
+ return Submission.get_app_log_file_path(
689
+ self.workflow.submissions_path,
690
+ self.submission_idx,
691
+ self.id_,
692
+ )
693
+
694
+ def get_app_std_path(self) -> Path:
695
+ assert self.submission_idx is not None
696
+ std_dir = Submission.get_app_std_path(
697
+ self.workflow.submissions_path,
698
+ self.submission_idx,
699
+ )
700
+ return std_dir / f"{self.id_}.txt" # TODO: refactor
662
701
 
663
702
  @TimeIt.decorator
664
- def get_resources(self):
703
+ def get_resources(self) -> Mapping[str, Any]:
665
704
  """Resolve specific resources for this EAR, considering all applicable scopes and
666
705
  template-level resources."""
667
706
  return self.element_iteration.get_resources(self.action)
668
707
 
669
- def get_environment_spec(self) -> str:
708
+ @TimeIt.decorator
709
+ def __get_resources_obj(self) -> ElementResources:
710
+ """Resolve specific resources for this EAR, considering all applicable scopes and
711
+ template-level resources."""
712
+ return self.element_iteration.get_resources_obj(self.action)
713
+
714
+ def get_environment_spec(self) -> Mapping[str, Any]:
670
715
  """
671
716
  What environment to run in?
672
717
  """
673
718
  return self.action.get_environment_spec()
674
719
 
675
- def get_environment(self) -> app.Environment:
720
+ def get_environment(self) -> Environment:
676
721
  """
677
722
  What environment to run in?
678
723
  """
679
724
  return self.action.get_environment()
680
725
 
681
- def get_all_previous_iteration_runs(self, include_self: bool = True):
726
+ def get_all_previous_iteration_runs(
727
+ self, include_self: bool = True
728
+ ) -> list[ElementActionRun]:
682
729
  """Get a list of run over all iterations that correspond to this run, optionally
683
730
  including this run."""
684
731
  self_iter = self.element_iteration
685
732
  self_elem = self_iter.element
686
733
  self_act_idx = self.element_action.action_idx
687
- max_idx = self_iter.index + 1 if include_self else self_iter.index
688
- all_runs = []
689
- for iter_i in self_elem.iterations[:max_idx]:
690
- all_runs.append(iter_i.actions[self_act_idx].runs[-1])
691
- return all_runs
734
+ max_idx = self_iter.index + (1 if include_self else 0)
735
+ return [
736
+ iter_i.actions[self_act_idx].runs[-1]
737
+ for iter_i in self_elem.iterations[:max_idx]
738
+ ]
692
739
 
693
740
  def get_input_values(
694
741
  self,
695
- inputs: Optional[Union[List[str], Dict[str, Dict]]] = None,
742
+ inputs: Sequence[str] | Mapping[str, Mapping[str, Any]] | None = None,
696
743
  label_dict: bool = True,
697
- ) -> Dict[str, Any]:
744
+ raise_on_unset: bool = False,
745
+ ) -> Mapping[str, Mapping[str, Any]]:
698
746
  """Get a dict of (optionally a subset of) inputs values for this run.
699
747
 
700
748
  Parameters
@@ -714,75 +762,83 @@ class ElementActionRun:
714
762
  if not inputs:
715
763
  inputs = self.get_parameter_names("inputs")
716
764
 
717
- out = {}
765
+ out: dict[str, dict[str, Any]] = {}
718
766
  for inp_name in inputs:
719
- path_i, label_i = split_param_label(inp_name)
720
-
721
- try:
722
- all_iters = inputs[inp_name]["all_iterations"]
723
- except (TypeError, KeyError):
724
- all_iters = False
725
-
726
- if all_iters:
727
- all_runs = self.get_all_previous_iteration_runs(include_self=True)
767
+ if self.__all_iters(inputs, inp_name):
728
768
  val_i = {
729
769
  f"iteration_{run_i.element_iteration.index}": {
730
770
  "loop_idx": run_i.element_iteration.loop_idx,
731
- "value": run_i.get(f"inputs.{inp_name}"),
771
+ "value": run_i.get(
772
+ f"inputs.{inp_name}", raise_on_unset=raise_on_unset
773
+ ),
732
774
  }
733
- for run_i in all_runs
775
+ for run_i in self.get_all_previous_iteration_runs(include_self=True)
734
776
  }
735
777
  else:
736
- val_i = self.get(f"inputs.{inp_name}")
737
-
738
- key = inp_name
739
- if label_dict and label_i:
740
- key = path_i # exclude label from key
778
+ val_i = self.get(f"inputs.{inp_name}", raise_on_unset=raise_on_unset)
741
779
 
742
- if "." in key:
743
- # for sub-parameters, take only the final part as the dict key:
744
- key = key.split(".")[-1]
745
-
746
- if label_dict and label_i:
747
- if key not in out:
748
- out[key] = {}
749
- out[key][label_i] = val_i
780
+ key, label_i = self.__split_input_name(inp_name, label_dict)
781
+ if label_i:
782
+ out.setdefault(key, {})[label_i] = val_i
750
783
  else:
751
784
  out[key] = val_i
752
785
 
753
786
  if self.action.script_pass_env_spec:
754
- out["env_spec"] = self.env_spec
787
+ out["env_spec"] = cast("Any", self.env_spec)
755
788
 
756
789
  return out
757
790
 
758
- def get_input_values_direct(self, label_dict: bool = True):
791
+ @staticmethod
792
+ def __all_iters(
793
+ inputs: Sequence[str] | Mapping[str, Mapping[str, Any]], inp_name: str
794
+ ) -> bool:
795
+ try:
796
+ return isinstance(inputs, Mapping) and bool(
797
+ inputs[inp_name]["all_iterations"]
798
+ )
799
+ except (TypeError, KeyError):
800
+ return False
801
+
802
+ @staticmethod
803
+ def __split_input_name(inp_name: str, label_dict: bool) -> tuple[str, str | None]:
804
+ key = inp_name
805
+ path, label = split_param_label(key)
806
+ if label_dict and path:
807
+ key = path # exclude label from key
808
+ # for sub-parameters, take only the final part as the dict key:
809
+ return key.split(".")[-1], (label if label_dict else None)
810
+
811
+ def get_input_values_direct(
812
+ self, label_dict: bool = True, raise_on_unset: bool = False
813
+ ) -> Mapping[str, Mapping[str, Any]]:
759
814
  """Get a dict of input values that are to be passed directly to a Python script
760
815
  function."""
761
816
  inputs = self.action.script_data_in_grouped.get("direct", {})
762
- return self.get_input_values(inputs=inputs, label_dict=label_dict)
817
+ return self.get_input_values(
818
+ inputs=inputs, label_dict=label_dict, raise_on_unset=raise_on_unset
819
+ )
763
820
 
764
- def get_IFG_input_values(self) -> Dict[str, Any]:
821
+ def get_IFG_input_values(self, raise_on_unset: bool = False) -> Mapping[str, Any]:
765
822
  """
766
823
  Get a dict of input values that are to be passed via an input file generator.
767
824
  """
768
825
  if not self.action._from_expand:
769
826
  raise RuntimeError(
770
- f"Cannot get input file generator inputs from this EAR because the "
771
- f"associated action is not expanded, meaning multiple IFGs might exists."
827
+ "Cannot get input file generator inputs from this EAR because the "
828
+ "associated action is not expanded, meaning multiple IFGs might exists."
772
829
  )
773
830
  input_types = [i.typ for i in self.action.input_file_generators[0].inputs]
774
- inputs = {}
775
- for i in self.inputs:
776
- typ = i.path[len("inputs.") :]
777
- if typ in input_types:
778
- inputs[typ] = i.value
831
+ inputs = {
832
+ typ_i: self.get(f"inputs.{typ_i}", raise_on_unset=raise_on_unset)
833
+ for typ_i in input_types
834
+ }
779
835
 
780
836
  if self.action.script_pass_env_spec:
781
837
  inputs["env_spec"] = self.env_spec
782
838
 
783
839
  return inputs
784
840
 
785
- def get_OFP_output_files(self) -> Dict[str, Union[str, List[str]]]:
841
+ def get_OFP_output_files(self) -> Mapping[str, Path]:
786
842
  """
787
843
  Get a dict of output files that are going to be parsed to generate one or more
788
844
  outputs.
@@ -790,118 +846,236 @@ class ElementActionRun:
790
846
  # TODO: can this return multiple files for a given FileSpec?
791
847
  if not self.action._from_expand:
792
848
  raise RuntimeError(
793
- f"Cannot get output file parser files from this from EAR because the "
794
- f"associated action is not expanded, meaning multiple OFPs might exist."
849
+ "Cannot get output file parser files from this from EAR because the "
850
+ "associated action is not expanded, meaning multiple OFPs might exist."
795
851
  )
796
- out_files = {}
797
- for file_spec in self.action.output_file_parsers[0].output_files:
798
- out_files[file_spec.label] = Path(file_spec.name.value())
799
- return out_files
852
+ return {
853
+ file_spec.label: Path(cast("str", file_spec.name.value()))
854
+ for file_spec in self.action.output_file_parsers[0].output_files
855
+ }
800
856
 
801
- def get_OFP_inputs(self) -> Dict[str, Union[str, List[str]]]:
857
+ def get_OFP_inputs(
858
+ self, raise_on_unset: bool = False
859
+ ) -> Mapping[str, str | list[str] | Mapping[str, Any]]:
802
860
  """
803
861
  Get a dict of input values that are to be passed to output file parsers.
804
862
  """
805
863
  if not self.action._from_expand:
806
864
  raise RuntimeError(
807
- f"Cannot get output file parser inputs from this from EAR because the "
808
- f"associated action is not expanded, meaning multiple OFPs might exist."
865
+ "Cannot get output file parser inputs from this from EAR because the "
866
+ "associated action is not expanded, meaning multiple OFPs might exist."
809
867
  )
810
- inputs = {}
868
+ inputs: dict[
869
+ str, str | list[str] | Mapping[str, Any]
870
+ ] = {} # not sure this type is correct
811
871
  for inp_typ in self.action.output_file_parsers[0].inputs or []:
812
- inputs[inp_typ] = self.get(f"inputs.{inp_typ}")
872
+ inputs[inp_typ] = self.get(f"inputs.{inp_typ}", raise_on_unset=raise_on_unset)
813
873
 
814
874
  if self.action.script_pass_env_spec:
815
875
  inputs["env_spec"] = self.env_spec
816
876
 
817
877
  return inputs
818
878
 
819
- def get_OFP_outputs(self) -> Dict[str, Union[str, List[str]]]:
879
+ def get_OFP_outputs(
880
+ self, raise_on_unset: bool = False
881
+ ) -> Mapping[str, str | list[str]]:
820
882
  """
821
- Get the outputs obtained by parsing an output file.
883
+ Get the outputs that are required to execute an output file parser.
822
884
  """
823
885
  if not self.action._from_expand:
824
886
  raise RuntimeError(
825
- f"Cannot get output file parser outputs from this from EAR because the "
826
- f"associated action is not expanded, meaning multiple OFPs might exist."
887
+ "Cannot get output file parser outputs from this from EAR because the "
888
+ "associated action is not expanded, meaning multiple OFPs might exist."
827
889
  )
828
- outputs = {}
890
+ outputs: dict[str, str | list[str]] = {} # not sure this type is correct
829
891
  for out_typ in self.action.output_file_parsers[0].outputs or []:
830
- outputs[out_typ] = self.get(f"outputs.{out_typ}")
892
+ outputs[out_typ] = self.get(
893
+ f"outputs.{out_typ}", raise_on_unset=raise_on_unset
894
+ )
831
895
  return outputs
832
896
 
833
- def write_source(self, js_idx: int, js_act_idx: int):
897
+ def get_py_script_func_kwargs(
898
+ self,
899
+ raise_on_unset: bool = False,
900
+ add_script_files: bool = False,
901
+ blk_act_key: BlockActionKey | None = None,
902
+ ) -> Mapping[str, Any]:
903
+ """Get function arguments to run the Python script associated with this action.
904
+
905
+ Parameters
906
+ ----------
907
+ raise_on_unset
908
+ If True, raise if unset parameter data is found when trying to retrieve input
909
+ data.
910
+ add_script_files
911
+ If True, include additional keys "_input_files" and "_output_files" that will
912
+ be dicts mapping file formats to file names for script input and output files.
913
+ If True, `js_blk_act_key` must be provided.
914
+ js_blk_act_key
915
+ A three-tuple of integers corresponding to the jobscript index, block index,
916
+ and block-action index.
917
+ """
918
+ kwargs: dict[str, Any] = {}
919
+ if self.action.is_IFG:
920
+ ifg = self.action.input_file_generators[0]
921
+ path = ifg.input_file.name.value()
922
+ assert isinstance(path, str)
923
+ kwargs["path"] = Path(path)
924
+ kwargs.update(self.get_IFG_input_values(raise_on_unset=raise_on_unset))
925
+
926
+ elif self.action.is_OFP:
927
+ kwargs.update(self.get_OFP_output_files())
928
+ kwargs.update(self.get_OFP_inputs(raise_on_unset=raise_on_unset))
929
+ kwargs.update(self.get_OFP_outputs(raise_on_unset=raise_on_unset))
930
+
931
+ if (
932
+ not any((self.action.is_IFG, self.action.is_OFP))
933
+ and self.action.script_data_in_has_direct
934
+ ):
935
+ kwargs.update(self.get_input_values_direct(raise_on_unset=raise_on_unset))
936
+
937
+ if add_script_files:
938
+ assert blk_act_key
939
+ in_out_names = self.action.get_script_input_output_file_paths(blk_act_key)
940
+ in_names, out_names = in_out_names["inputs"], in_out_names["outputs"]
941
+ if in_names:
942
+ kwargs["_input_files"] = in_names
943
+ if out_names:
944
+ kwargs["_output_files"] = out_names
945
+
946
+ return kwargs
947
+
948
+ def write_script_input_files(self, block_act_key: BlockActionKey) -> None:
834
949
  """
835
950
  Write values to files in standard formats.
836
951
  """
837
- import h5py
838
-
839
952
  for fmt, ins in self.action.script_data_in_grouped.items():
840
- if fmt == "json":
841
- in_vals = self.get_input_values(inputs=ins, label_dict=False)
842
- dump_path = self.action.get_param_dump_file_path_JSON(js_idx, js_act_idx)
843
- in_vals_processed = {}
844
- for k, v in in_vals.items():
845
- try:
846
- v = v.prepare_JSON_dump()
847
- except (AttributeError, NotImplementedError):
848
- pass
849
- in_vals_processed[k] = v
850
-
851
- with dump_path.open("wt") as fp:
852
- json.dump(in_vals_processed, fp)
953
+ in_vals = self.get_input_values(
954
+ inputs=ins, label_dict=False, raise_on_unset=False
955
+ )
956
+ if writer := self.__source_writer_map.get(fmt):
957
+ writer(self, in_vals, block_act_key)
853
958
 
854
- elif fmt == "hdf5":
855
- in_vals = self.get_input_values(inputs=ins, label_dict=False)
856
- dump_path = self.action.get_param_dump_file_path_HDF5(js_idx, js_act_idx)
857
- with h5py.File(dump_path, mode="w") as f:
858
- for k, v in in_vals.items():
859
- grp_k = f.create_group(k)
860
- v.dump_to_HDF5_group(grp_k)
861
-
862
- # write the script if it is specified as a app data script, otherwise we assume
863
- # the script already exists in the working directory:
864
- snip_path = self.action.get_snippet_script_path(self.action.script, self.env_spec)
865
- if snip_path:
866
- script_name = snip_path.name
867
- source_str = self.action.compose_source(snip_path)
868
- with Path(script_name).open("wt", newline="\n") as fp:
869
- fp.write(source_str)
870
-
871
- def _param_save(self, js_idx: int, js_act_idx: int):
959
+ def __write_json_inputs(
960
+ self,
961
+ in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
962
+ block_act_key: BlockActionKey,
963
+ ):
964
+ in_vals_processed: dict[str, Any] = {}
965
+ for k, v in in_vals.items():
966
+ try:
967
+ in_vals_processed[k] = (
968
+ v.prepare_JSON_dump() if isinstance(v, ParameterValue) else v
969
+ )
970
+ except (AttributeError, NotImplementedError):
971
+ in_vals_processed[k] = v
972
+
973
+ with self.action.get_param_dump_file_path_JSON(block_act_key).open("wt") as fp:
974
+ json.dump(in_vals_processed, fp)
975
+
976
+ def __write_hdf5_inputs(
977
+ self,
978
+ in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
979
+ block_act_key: BlockActionKey,
980
+ ):
981
+ import h5py # type: ignore
982
+
983
+ with h5py.File(
984
+ self.action.get_param_dump_file_path_HDF5(block_act_key), mode="w"
985
+ ) as h5file:
986
+ for k, v in in_vals.items():
987
+ grp_k = h5file.create_group(k)
988
+ try:
989
+ assert isinstance(v, ParameterValue)
990
+ v.dump_to_HDF5_group(grp_k)
991
+ except AttributeError:
992
+ # probably an element group (i.e. v is a list of `ParameterValue`
993
+ # objects):
994
+ assert isinstance(v, list)
995
+ v[0].dump_element_group_to_HDF5_group(v, grp_k)
996
+
997
+ __source_writer_map: ClassVar[dict[str, Callable[..., None]]] = {
998
+ "json": __write_json_inputs,
999
+ "hdf5": __write_hdf5_inputs,
1000
+ }
1001
+
1002
+ def __output_index(self, param_name: str) -> int:
1003
+ return cast("int", self.data_idx[f"outputs.{param_name}"])
1004
+
1005
+ def _param_save(self, block_act_key: BlockActionKey, run_dir: Path | None = None):
872
1006
  """Save script-generated parameters that are stored within the supported script
873
1007
  data output formats (HDF5, JSON, etc)."""
874
- import h5py
1008
+ in_out_names = self.action.get_script_input_output_file_paths(
1009
+ block_act_key, directory=run_dir
1010
+ )
875
1011
 
876
- for fmt in self.action.script_data_out_grouped:
1012
+ import h5py # type: ignore
1013
+
1014
+ parameters = self._app.parameters
1015
+ for fmt, load_path in in_out_names["outputs"].items():
877
1016
  if fmt == "json":
878
- load_path = self.action.get_param_load_file_path_JSON(js_idx, js_act_idx)
879
1017
  with load_path.open(mode="rt") as f:
880
- file_data = json.load(f)
1018
+ file_data: dict[str, Any] = json.load(f)
881
1019
  for param_name, param_dat in file_data.items():
882
- param_id = self.data_idx[f"outputs.{param_name}"]
883
- param_cls = self.app.parameters.get(param_name)._value_class
884
- try:
885
- param_cls.save_from_JSON(param_dat, param_id, self.workflow)
886
- continue
887
- except (AttributeError, NotImplementedError):
888
- pass
1020
+ param_id = self.__output_index(param_name)
1021
+ if param_cls := parameters.get(param_name)._force_value_class():
1022
+ try:
1023
+ param_cls.save_from_JSON(
1024
+ param_dat, param_id, self.workflow
1025
+ )
1026
+ continue
1027
+ except NotImplementedError:
1028
+ pass
889
1029
  # try to save as a primitive:
890
1030
  self.workflow.set_parameter_value(
891
1031
  param_id=param_id, value=param_dat
892
1032
  )
893
1033
 
894
1034
  elif fmt == "hdf5":
895
- load_path = self.action.get_param_load_file_path_HDF5(js_idx, js_act_idx)
896
- with h5py.File(load_path, mode="r") as f:
897
- for param_name, h5_grp in f.items():
898
- param_id = self.data_idx[f"outputs.{param_name}"]
899
- param_cls = self.app.parameters.get(param_name)._value_class
900
- param_cls.save_from_HDF5_group(h5_grp, param_id, self.workflow)
1035
+ with h5py.File(load_path, mode="r") as h5file:
1036
+ for param_name, h5_grp in h5file.items():
1037
+ param_id = self.__output_index(param_name)
1038
+ if param_cls := parameters.get(param_name)._force_value_class():
1039
+ try:
1040
+ param_cls.save_from_HDF5_group(
1041
+ h5_grp, param_id, self.workflow
1042
+ )
1043
+ continue
1044
+ except NotImplementedError:
1045
+ pass
1046
+ # Unlike with JSON, we've no fallback so we warn
1047
+ self._app.logger.warning(
1048
+ "parameter %s could not be saved; serializer not found",
1049
+ param_name,
1050
+ )
1051
+
1052
+ @property
1053
+ def is_snippet_script(self) -> bool:
1054
+ """Returns True if the action script string represents a script snippets that is
1055
+ to be modified before execution (e.g. to receive and provide parameter data)."""
1056
+ try:
1057
+ return self.action.is_snippet_script(self.action.script)
1058
+ except AttributeError:
1059
+ return False
1060
+
1061
+ def get_script_artifact_name(self) -> str:
1062
+ """Return the script name that is used when writing the script to the artifacts
1063
+ directory within the workflow.
1064
+
1065
+ Like `Action.get_script_name`, this is only applicable for snippet scripts.
1066
+
1067
+ """
1068
+ art_name, snip_path = self.action.get_script_artifact_name(
1069
+ env_spec=self.env_spec,
1070
+ act_idx=self.element_action.action_idx,
1071
+ include_suffix=True,
1072
+ specs_suffix_delim=".",
1073
+ )
1074
+ return art_name
901
1075
 
902
1076
  def compose_commands(
903
- self, jobscript: app.Jobscript, JS_action_idx: int
904
- ) -> Tuple[str, List[str], List[int]]:
1077
+ self, environments: EnvironmentsList, shell: Shell
1078
+ ) -> tuple[str, Mapping[int, Sequence[tuple[str, ...]]]]:
905
1079
  """
906
1080
  Write the EAR's enactment to disk in preparation for submission.
907
1081
 
@@ -909,48 +1083,160 @@ class ElementActionRun:
909
1083
  -------
910
1084
  commands:
911
1085
  List of argument words for the command that enacts the EAR.
1086
+ Converted to a string.
912
1087
  shell_vars:
913
1088
  Dict whose keys are command indices, and whose values are lists of tuples,
914
1089
  where each tuple contains: (parameter name, shell variable name,
915
1090
  "stdout"/"stderr").
916
1091
  """
917
- self.app.persistence_logger.debug("EAR.compose_commands")
1092
+ self._app.persistence_logger.debug("EAR.compose_commands")
918
1093
  env_spec = self.env_spec
919
1094
 
920
- for ifg in self.action.input_file_generators:
921
- # TODO: there should only be one at this stage if expanded?
922
- ifg.write_source(self.action, env_spec)
923
-
924
1095
  for ofp in self.action.output_file_parsers:
925
1096
  # TODO: there should only be one at this stage if expanded?
926
1097
  if ofp.output is None:
927
1098
  raise OutputFileParserNoOutputError()
928
- ofp.write_source(self.action, env_spec)
929
1099
 
930
- if self.action.script:
931
- self.write_source(js_idx=jobscript.index, js_act_idx=JS_action_idx)
1100
+ command_lns: list[str] = []
1101
+ if (env := environments.get(**env_spec)).setup:
1102
+ command_lns.extend(env.setup)
932
1103
 
933
- command_lns = []
934
- env = jobscript.submission.environments.get(**env_spec)
935
- if env.setup:
936
- command_lns += list(env.setup)
937
-
938
- shell_vars = {} # keys are cmd_idx, each value is a list of tuples
1104
+ shell_vars: dict[int, list[tuple[str, ...]]] = {}
939
1105
  for cmd_idx, command in enumerate(self.action.commands):
940
1106
  if cmd_idx in self.commands_idx:
941
1107
  # only execute commands that have no rules, or all valid rules:
942
- cmd_str, shell_vars_i = command.get_command_line(
943
- EAR=self, shell=jobscript.shell, env=env
1108
+ cmd_str, shell_vars[cmd_idx] = command.get_command_line(
1109
+ EAR=self, shell=shell, env=env
944
1110
  )
945
- shell_vars[cmd_idx] = shell_vars_i
946
1111
  command_lns.append(cmd_str)
947
1112
 
948
- commands = "\n".join(command_lns) + "\n"
1113
+ return ("\n".join(command_lns) + "\n"), shell_vars
1114
+
1115
+ def get_commands_file_hash(self) -> int:
1116
+ """Get a hash that can be used to group together runs that will have the same
1117
+ commands file.
949
1118
 
950
- return commands, shell_vars
1119
+ This hash is not stable across sessions or machines.
951
1120
 
1121
+ """
1122
+ return self.action.get_commands_file_hash(
1123
+ data_idx=self.get_data_idx(),
1124
+ action_idx=self.element_action.action_idx,
1125
+ )
952
1126
 
953
- class ElementAction:
1127
+ @overload
1128
+ def try_write_commands(
1129
+ self,
1130
+ jobscript: Jobscript,
1131
+ environments: EnvironmentsList,
1132
+ raise_on_unset: Literal[True],
1133
+ ) -> Path:
1134
+ ...
1135
+
1136
+ @overload
1137
+ def try_write_commands(
1138
+ self,
1139
+ jobscript: Jobscript,
1140
+ environments: EnvironmentsList,
1141
+ raise_on_unset: Literal[False] = False,
1142
+ ) -> Path | None:
1143
+ ...
1144
+
1145
+ def try_write_commands(
1146
+ self,
1147
+ jobscript: Jobscript,
1148
+ environments: EnvironmentsList,
1149
+ raise_on_unset: bool = False,
1150
+ ) -> Path | None:
1151
+ """Attempt to write the commands file for this run."""
1152
+ app_name = self._app.package_name
1153
+ try:
1154
+ commands, shell_vars = self.compose_commands(
1155
+ environments=environments,
1156
+ shell=jobscript.shell,
1157
+ )
1158
+ except UnsetParameterDataError:
1159
+ if raise_on_unset:
1160
+ raise
1161
+ self._app.submission_logger.debug(
1162
+ f"cannot yet write commands file for run ID {self.id_}; unset parameters"
1163
+ )
1164
+ return None
1165
+
1166
+ for cmd_idx, var_dat in shell_vars.items():
1167
+ for param_name, shell_var_name, st_typ in var_dat:
1168
+ commands += jobscript.shell.format_save_parameter(
1169
+ workflow_app_alias=jobscript.workflow_app_alias,
1170
+ param_name=param_name,
1171
+ shell_var_name=shell_var_name,
1172
+ cmd_idx=cmd_idx,
1173
+ stderr=(st_typ == "stderr"),
1174
+ app_name=app_name,
1175
+ )
1176
+
1177
+ commands_fmt = jobscript.shell.format_commands_file(app_name, commands)
1178
+
1179
+ if jobscript.resources.combine_scripts:
1180
+ stem = f"js_{jobscript.index}" # TODO: refactor
1181
+ else:
1182
+ stem = str(self.id_)
1183
+
1184
+ cmd_file_name = f"{stem}{jobscript.shell.JS_EXT}"
1185
+ cmd_file_path: Path = jobscript.submission.commands_path / cmd_file_name
1186
+ with cmd_file_path.open("wt", newline="\n") as fp:
1187
+ fp.write(commands_fmt)
1188
+
1189
+ return cmd_file_path
1190
+
1191
+ @contextlib.contextmanager
1192
+ def raise_on_failure_threshold(self) -> Iterator[dict[str, UnsetParamTracker]]:
1193
+ """Context manager to track parameter types and associated run IDs for which those
1194
+ parameters were found to be unset when accessed via
1195
+ `WorkflowTask._get_merged_parameter_data`.
1196
+
1197
+ """
1198
+ self.workflow._is_tracking_unset = True
1199
+ self.workflow._tracked_unset = defaultdict(
1200
+ lambda: UnsetParamTracker(run_ids=set(), group_size=-1)
1201
+ )
1202
+ try:
1203
+ yield dict(self.workflow._tracked_unset)
1204
+ except:
1205
+ raise
1206
+ else:
1207
+ try:
1208
+ for schema_inp in self.task.template.schema.inputs:
1209
+ inp_path = f"inputs.{schema_inp.typ}"
1210
+ if inp_path in self.workflow._tracked_unset:
1211
+ unset_tracker = self.workflow._tracked_unset[inp_path]
1212
+ unset_num = len(unset_tracker.run_ids)
1213
+ unset_fraction = unset_num / unset_tracker.group_size
1214
+ if isinstance(schema_inp.allow_failed_dependencies, float):
1215
+ # `True` is converted to 1.0 on SchemaInput init
1216
+ if unset_fraction > schema_inp.allow_failed_dependencies:
1217
+ raise UnsetParameterFractionLimitExceededError(
1218
+ schema_inp,
1219
+ self.task,
1220
+ unset_fraction,
1221
+ log=self._app.submission_logger,
1222
+ )
1223
+ elif isinstance(schema_inp.allow_failed_dependencies, int):
1224
+ if unset_num > schema_inp.allow_failed_dependencies:
1225
+ raise UnsetParameterNumberLimitExceededError(
1226
+ schema_inp,
1227
+ self.task,
1228
+ unset_num,
1229
+ log=self._app.submission_logger,
1230
+ )
1231
+ finally:
1232
+ self.workflow._is_tracking_unset = False
1233
+ self.workflow._tracked_unset = None
1234
+ finally:
1235
+ self.workflow._is_tracking_unset = False
1236
+ self.workflow._tracked_unset = None
1237
+
1238
+
1239
+ class ElementAction(AppAware):
954
1240
  """
955
1241
  An abstract representation of an element's action at a particular iteration and
956
1242
  the runs that enact that element iteration.
@@ -965,20 +1251,23 @@ class ElementAction:
965
1251
  The list of run indices.
966
1252
  """
967
1253
 
968
- _app_attr = "app"
969
-
970
- def __init__(self, element_iteration, action_idx, runs):
1254
+ def __init__(
1255
+ self,
1256
+ element_iteration: ElementIteration,
1257
+ action_idx: int,
1258
+ runs: dict[Mapping[str, Any], Any],
1259
+ ):
971
1260
  self._element_iteration = element_iteration
972
1261
  self._action_idx = action_idx
973
1262
  self._runs = runs
974
1263
 
975
1264
  # assigned on first access of corresponding properties:
976
- self._run_objs = None
977
- self._inputs = None
978
- self._outputs = None
979
- self._resources = None
980
- self._input_files = None
981
- self._output_files = None
1265
+ self._run_objs: list[ElementActionRun] | None = None
1266
+ self._inputs: ElementInputs | None = None
1267
+ self._outputs: ElementOutputs | None = None
1268
+ self._resources: ElementResources | None = None
1269
+ self._input_files: ElementInputFiles | None = None
1270
+ self._output_files: ElementOutputFiles | None = None
982
1271
 
983
1272
  def __repr__(self):
984
1273
  return (
@@ -990,104 +1279,104 @@ class ElementAction:
990
1279
  )
991
1280
 
992
1281
  @property
993
- def element_iteration(self):
1282
+ def element_iteration(self) -> ElementIteration:
994
1283
  """
995
1284
  The iteration for this action.
996
1285
  """
997
1286
  return self._element_iteration
998
1287
 
999
1288
  @property
1000
- def element(self):
1289
+ def element(self) -> Element:
1001
1290
  """
1002
1291
  The element for this action.
1003
1292
  """
1004
1293
  return self.element_iteration.element
1005
1294
 
1006
1295
  @property
1007
- def num_runs(self):
1296
+ def num_runs(self) -> int:
1008
1297
  """
1009
1298
  The number of runs associated with this action.
1010
1299
  """
1011
1300
  return len(self._runs)
1012
1301
 
1013
1302
  @property
1014
- def runs(self):
1303
+ def runs(self) -> list[ElementActionRun]:
1015
1304
  """
1016
1305
  The EARs that this action is enacted by.
1017
1306
  """
1018
1307
  if self._run_objs is None:
1019
1308
  self._run_objs = [
1020
- self.app.ElementActionRun(
1309
+ self._app.ElementActionRun(
1021
1310
  element_action=self,
1022
1311
  index=idx,
1023
1312
  **{
1024
1313
  k: v
1025
- for k, v in i.items()
1314
+ for k, v in run_info.items()
1026
1315
  if k not in ("elem_iter_ID", "action_idx")
1027
1316
  },
1028
1317
  )
1029
- for idx, i in enumerate(self._runs)
1318
+ for idx, run_info in enumerate(self._runs)
1030
1319
  ]
1031
1320
  return self._run_objs
1032
1321
 
1033
1322
  @property
1034
- def task(self):
1323
+ def task(self) -> WorkflowTask:
1035
1324
  """
1036
1325
  The task that this action is an instance of.
1037
1326
  """
1038
1327
  return self.element_iteration.task
1039
1328
 
1040
1329
  @property
1041
- def action_idx(self):
1330
+ def action_idx(self) -> int:
1042
1331
  """
1043
1332
  The index of the action.
1044
1333
  """
1045
1334
  return self._action_idx
1046
1335
 
1047
1336
  @property
1048
- def action(self):
1337
+ def action(self) -> Action:
1049
1338
  """
1050
1339
  The abstract task that this is a concrete model of.
1051
1340
  """
1052
1341
  return self.task.template.get_schema_action(self.action_idx)
1053
1342
 
1054
1343
  @property
1055
- def inputs(self):
1344
+ def inputs(self) -> ElementInputs:
1056
1345
  """
1057
1346
  The inputs to this action.
1058
1347
  """
1059
1348
  if not self._inputs:
1060
- self._inputs = self.app.ElementInputs(element_action=self)
1349
+ self._inputs = self._app.ElementInputs(element_action=self)
1061
1350
  return self._inputs
1062
1351
 
1063
1352
  @property
1064
- def outputs(self):
1353
+ def outputs(self) -> ElementOutputs:
1065
1354
  """
1066
1355
  The outputs from this action.
1067
1356
  """
1068
1357
  if not self._outputs:
1069
- self._outputs = self.app.ElementOutputs(element_action=self)
1358
+ self._outputs = self._app.ElementOutputs(element_action=self)
1070
1359
  return self._outputs
1071
1360
 
1072
1361
  @property
1073
- def input_files(self):
1362
+ def input_files(self) -> ElementInputFiles:
1074
1363
  """
1075
1364
  The input files to this action.
1076
1365
  """
1077
1366
  if not self._input_files:
1078
- self._input_files = self.app.ElementInputFiles(element_action=self)
1367
+ self._input_files = self._app.ElementInputFiles(element_action=self)
1079
1368
  return self._input_files
1080
1369
 
1081
1370
  @property
1082
- def output_files(self):
1371
+ def output_files(self) -> ElementOutputFiles:
1083
1372
  """
1084
1373
  The output files from this action.
1085
1374
  """
1086
1375
  if not self._output_files:
1087
- self._output_files = self.app.ElementOutputFiles(element_action=self)
1376
+ self._output_files = self._app.ElementOutputFiles(element_action=self)
1088
1377
  return self._output_files
1089
1378
 
1090
- def get_data_idx(self, path: str = None, run_idx: int = -1):
1379
+ def get_data_idx(self, path: str | None = None, run_idx: int = -1) -> DataIndex:
1091
1380
  """
1092
1381
  Get the data index for some path/run.
1093
1382
  """
@@ -1097,34 +1386,68 @@ class ElementAction:
1097
1386
  run_idx=run_idx,
1098
1387
  )
1099
1388
 
1389
+ @overload
1100
1390
  def get_parameter_sources(
1101
1391
  self,
1102
- path: str = None,
1392
+ path: str | None = None,
1393
+ *,
1103
1394
  run_idx: int = -1,
1104
- typ: str = None,
1395
+ typ: str | None = None,
1396
+ as_strings: Literal[False] = False,
1397
+ use_task_index: bool = False,
1398
+ ) -> Mapping[str, ParamSource | list[ParamSource]]:
1399
+ ...
1400
+
1401
+ @overload
1402
+ def get_parameter_sources(
1403
+ self,
1404
+ path: str | None = None,
1405
+ *,
1406
+ run_idx: int = -1,
1407
+ typ: str | None = None,
1408
+ as_strings: Literal[True],
1409
+ use_task_index: bool = False,
1410
+ ) -> Mapping[str, str]:
1411
+ ...
1412
+
1413
+ def get_parameter_sources(
1414
+ self,
1415
+ path: str | None = None,
1416
+ *,
1417
+ run_idx: int = -1,
1418
+ typ: str | None = None,
1105
1419
  as_strings: bool = False,
1106
1420
  use_task_index: bool = False,
1107
- ):
1421
+ ) -> Mapping[str, str] | Mapping[str, ParamSource | list[ParamSource]]:
1108
1422
  """
1109
1423
  Get information about where parameters originated.
1110
1424
  """
1425
+ if as_strings:
1426
+ return self.element_iteration.get_parameter_sources(
1427
+ path,
1428
+ action_idx=self.action_idx,
1429
+ run_idx=run_idx,
1430
+ typ=typ,
1431
+ as_strings=True,
1432
+ use_task_index=use_task_index,
1433
+ )
1111
1434
  return self.element_iteration.get_parameter_sources(
1112
1435
  path,
1113
1436
  action_idx=self.action_idx,
1114
1437
  run_idx=run_idx,
1115
1438
  typ=typ,
1116
- as_strings=as_strings,
1439
+ as_strings=False,
1117
1440
  use_task_index=use_task_index,
1118
1441
  )
1119
1442
 
1120
1443
  def get(
1121
1444
  self,
1122
- path: str = None,
1445
+ path: str | None = None,
1123
1446
  run_idx: int = -1,
1124
- default: Any = None,
1447
+ default: Any | None = None,
1125
1448
  raise_on_missing: bool = False,
1126
1449
  raise_on_unset: bool = False,
1127
- ):
1450
+ ) -> Any:
1128
1451
  """
1129
1452
  Get the value of a parameter.
1130
1453
  """
@@ -1137,7 +1460,7 @@ class ElementAction:
1137
1460
  raise_on_unset=raise_on_unset,
1138
1461
  )
1139
1462
 
1140
- def get_parameter_names(self, prefix: str) -> List[str]:
1463
+ def get_parameter_names(self, prefix: str) -> list[str]:
1141
1464
  """Get parameter types associated with a given prefix.
1142
1465
 
1143
1466
  For inputs, labels are ignored.
@@ -1152,12 +1475,13 @@ class ElementAction:
1152
1475
  return self.action.get_parameter_names(prefix)
1153
1476
 
1154
1477
 
1478
+ @final
1155
1479
  class ActionScope(JSONLike):
1156
1480
  """Class to represent the identification of a subset of task schema actions by a
1157
1481
  filtering process.
1158
1482
  """
1159
1483
 
1160
- _child_objects = (
1484
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1161
1485
  ChildObjectSpec(
1162
1486
  name="typ",
1163
1487
  json_like_name="type",
@@ -1166,46 +1490,53 @@ class ActionScope(JSONLike):
1166
1490
  ),
1167
1491
  )
1168
1492
 
1169
- def __init__(self, typ: Union[app.ActionScopeType, str], **kwargs):
1493
+ __ACTION_SCOPE_RE: ClassVar[Pattern] = re.compile(r"(\w*)(?:\[(.*)\])?")
1494
+
1495
+ def __init__(self, typ: ActionScopeType | str, **kwargs):
1170
1496
  if isinstance(typ, str):
1171
- typ = getattr(self.app.ActionScopeType, typ.upper())
1497
+ #: Action scope type.
1498
+ self.typ = self._app.ActionScopeType[typ.upper()]
1499
+ else:
1500
+ self.typ = typ
1172
1501
 
1173
- #: Action scope type.
1174
- self.typ = typ
1175
1502
  #: Any provided extra keyword arguments.
1176
1503
  self.kwargs = {k: v for k, v in kwargs.items() if v is not None}
1177
1504
 
1178
- bad_keys = set(kwargs.keys()) - ACTION_SCOPE_ALLOWED_KWARGS[self.typ.name]
1179
- if bad_keys:
1505
+ if bad_keys := set(kwargs) - ACTION_SCOPE_ALLOWED_KWARGS[self.typ.name]:
1180
1506
  raise TypeError(
1181
1507
  f"The following keyword arguments are unknown for ActionScopeType "
1182
1508
  f"{self.typ.name}: {bad_keys}."
1183
1509
  )
1184
1510
 
1185
- def __repr__(self):
1511
+ def __repr__(self) -> str:
1186
1512
  kwargs_str = ""
1187
1513
  if self.kwargs:
1188
1514
  kwargs_str = ", ".join(f"{k}={v!r}" for k, v in self.kwargs.items())
1189
1515
  return f"{self.__class__.__name__}.{self.typ.name.lower()}({kwargs_str})"
1190
1516
 
1191
- def __eq__(self, other):
1517
+ def __eq__(self, other: Any) -> bool:
1192
1518
  if not isinstance(other, self.__class__):
1193
1519
  return False
1194
- if self.typ is other.typ and self.kwargs == other.kwargs:
1195
- return True
1196
- return False
1520
+ return self.typ is other.typ and self.kwargs == other.kwargs
1521
+
1522
+ class __customdict(dict):
1523
+ pass
1197
1524
 
1198
1525
  @classmethod
1199
- def _parse_from_string(cls, string):
1200
- typ_str, kwargs_str = re.search(ACTION_SCOPE_REGEX, string).groups()
1201
- kwargs = {}
1526
+ def _parse_from_string(cls, string: str) -> dict[str, str]:
1527
+ if not (match := cls.__ACTION_SCOPE_RE.search(string)):
1528
+ raise TypeError(f"unparseable ActionScope: '{string}'")
1529
+ typ_str, kwargs_str = match.groups()
1530
+ # The types of the above two variables are idiotic, but bug reports to fix it
1531
+ # get closed because "it would break existing code that makes dumb assumptions"
1532
+ kwargs: dict[str, str] = cls.__customdict({"type": cast("str", typ_str)})
1202
1533
  if kwargs_str:
1203
- for i in kwargs_str.split(","):
1204
- name, val = i.split("=")
1534
+ for pair_str in kwargs_str.split(","):
1535
+ name, val = pair_str.split("=")
1205
1536
  kwargs[name.strip()] = val.strip()
1206
- return {"type": typ_str, **kwargs}
1537
+ return kwargs
1207
1538
 
1208
- def to_string(self):
1539
+ def to_string(self) -> str:
1209
1540
  """
1210
1541
  Render this action scope as a string.
1211
1542
  """
@@ -1215,59 +1546,62 @@ class ActionScope(JSONLike):
1215
1546
  return f"{self.typ.name.lower()}{kwargs_str}"
1216
1547
 
1217
1548
  @classmethod
1218
- def from_json_like(cls, json_like, shared_data=None):
1219
- if isinstance(json_like, str):
1220
- json_like = cls._parse_from_string(json_like)
1221
- else:
1222
- typ = json_like.pop("type")
1223
- json_like = {"type": typ, **json_like.pop("kwargs", {})}
1224
- return super().from_json_like(json_like, shared_data)
1549
+ def _from_json_like(
1550
+ cls,
1551
+ json_like: Mapping[str, Any] | Sequence[Mapping[str, Any]],
1552
+ shared_data: Mapping[str, Any],
1553
+ ) -> Self:
1554
+ if not isinstance(json_like, Mapping):
1555
+ raise TypeError("only mappings are supported for becoming an ActionScope")
1556
+ if not isinstance(json_like, cls.__customdict):
1557
+ # Wasn't processed by _parse_from_string() already
1558
+ json_like = {"type": json_like["type"], **json_like.get("kwargs", {})}
1559
+ return super()._from_json_like(json_like, shared_data)
1225
1560
 
1226
1561
  @classmethod
1227
- def any(cls):
1562
+ def any(cls) -> ActionScope:
1228
1563
  """
1229
1564
  Any scope.
1230
1565
  """
1231
1566
  return cls(typ=ActionScopeType.ANY)
1232
1567
 
1233
1568
  @classmethod
1234
- def main(cls):
1569
+ def main(cls) -> ActionScope:
1235
1570
  """
1236
1571
  The main scope.
1237
1572
  """
1238
1573
  return cls(typ=ActionScopeType.MAIN)
1239
1574
 
1240
1575
  @classmethod
1241
- def processing(cls):
1576
+ def processing(cls) -> ActionScope:
1242
1577
  """
1243
1578
  The processing scope.
1244
1579
  """
1245
1580
  return cls(typ=ActionScopeType.PROCESSING)
1246
1581
 
1247
1582
  @classmethod
1248
- def input_file_generator(cls, file=None):
1583
+ def input_file_generator(cls, file: str | None = None) -> ActionScope:
1249
1584
  """
1250
1585
  The scope of an input file generator.
1251
1586
  """
1252
1587
  return cls(typ=ActionScopeType.INPUT_FILE_GENERATOR, file=file)
1253
1588
 
1254
1589
  @classmethod
1255
- def output_file_parser(cls, output=None):
1590
+ def output_file_parser(cls, output: Parameter | str | None = None) -> ActionScope:
1256
1591
  """
1257
1592
  The scope of an output file parser.
1258
1593
  """
1259
1594
  return cls(typ=ActionScopeType.OUTPUT_FILE_PARSER, output=output)
1260
1595
 
1261
1596
 
1262
- @dataclass
1597
+ @dataclass()
1598
+ @hydrate
1263
1599
  class ActionEnvironment(JSONLike):
1264
1600
  """
1265
1601
  The environment that an action is enacted within.
1266
1602
  """
1267
1603
 
1268
- _app_attr = "app"
1269
-
1270
- _child_objects = (
1604
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1271
1605
  ChildObjectSpec(
1272
1606
  name="scope",
1273
1607
  class_name="ActionScope",
@@ -1275,24 +1609,24 @@ class ActionEnvironment(JSONLike):
1275
1609
  )
1276
1610
 
1277
1611
  #: The environment document.
1278
- environment: Union[str, Dict[str, Any]]
1612
+ environment: Mapping[str, Any]
1279
1613
  #: The scope.
1280
- scope: Optional[app.ActionScope] = None
1614
+ scope: ActionScope
1281
1615
 
1282
- def __post_init__(self):
1283
- if self.scope is None:
1284
- self.scope = self.app.ActionScope.any()
1285
-
1286
- orig_env = copy.deepcopy(self.environment)
1287
- if isinstance(self.environment, str):
1288
- self.environment = {"name": self.environment}
1616
+ def __init__(
1617
+ self, environment: str | dict[str, Any], scope: ActionScope | None = None
1618
+ ):
1619
+ if scope is None:
1620
+ self.scope = self._app.ActionScope.any()
1621
+ else:
1622
+ self.scope = scope
1289
1623
 
1290
- if "name" not in self.environment:
1291
- raise ActionEnvironmentMissingNameError(
1292
- f"The action-environment environment specification must include a string "
1293
- f"`name` key, or be specified as string that is that name. Provided "
1294
- f"environment key was {orig_env!r}."
1295
- )
1624
+ if isinstance(environment, str):
1625
+ self.environment = {"name": environment}
1626
+ else:
1627
+ if "name" not in environment:
1628
+ raise ActionEnvironmentMissingNameError(environment)
1629
+ self.environment = copy.deepcopy(environment)
1296
1630
 
1297
1631
 
1298
1632
  class ActionRule(JSONLike):
@@ -1318,20 +1652,23 @@ class ActionRule(JSONLike):
1318
1652
  Documentation for this rule, if any.
1319
1653
  """
1320
1654
 
1321
- _child_objects = (ChildObjectSpec(name="rule", class_name="Rule"),)
1655
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1656
+ ChildObjectSpec(name="rule", class_name="Rule"),
1657
+ )
1322
1658
 
1323
1659
  def __init__(
1324
1660
  self,
1325
- rule: Optional[app.Rule] = None,
1326
- check_exists: Optional[str] = None,
1327
- check_missing: Optional[str] = None,
1328
- path: Optional[str] = None,
1329
- condition: Optional[Union[Dict, ConditionLike]] = None,
1330
- cast: Optional[str] = None,
1331
- doc: Optional[str] = None,
1661
+ rule: Rule | None = None,
1662
+ check_exists: str | None = None,
1663
+ check_missing: str | None = None,
1664
+ path: str | None = None,
1665
+ condition: dict[str, Any] | ConditionLike | None = None,
1666
+ cast: str | None = None,
1667
+ doc: str | None = None,
1332
1668
  ):
1333
1669
  if rule is None:
1334
- rule = app.Rule(
1670
+ #: The rule to apply.
1671
+ self.rule = self._app.Rule(
1335
1672
  check_exists=check_exists,
1336
1673
  check_missing=check_missing,
1337
1674
  path=path,
@@ -1340,30 +1677,38 @@ class ActionRule(JSONLike):
1340
1677
  doc=doc,
1341
1678
  )
1342
1679
  elif any(
1343
- i is not None
1344
- for i in (check_exists, check_missing, path, condition, cast, doc)
1680
+ arg is not None
1681
+ for arg in (check_exists, check_missing, path, condition, cast, doc)
1345
1682
  ):
1346
1683
  raise TypeError(
1347
1684
  f"{self.__class__.__name__} `rule` specified in addition to rule "
1348
1685
  f"constructor arguments."
1349
1686
  )
1687
+ else:
1688
+ self.rule = rule
1350
1689
 
1351
- #: The rule to apply.
1352
- self.rule = rule
1353
1690
  #: The action that contains this rule.
1354
- self.action = None # assigned by parent action
1691
+ self.action: Action | None = None # assigned by parent action
1355
1692
  #: The command that is guarded by this rule.
1356
- self.command = None # assigned by parent command
1693
+ self.command: Command | None = None # assigned by parent command
1357
1694
 
1358
- def __eq__(self, other):
1695
+ def __eq__(self, other: Any) -> bool:
1359
1696
  if type(other) is not self.__class__:
1360
1697
  return False
1361
- if self.rule == other.rule:
1362
- return True
1363
- return False
1698
+ return self.rule == other.rule
1699
+
1700
+ @property
1701
+ def __parent_action(self) -> Action:
1702
+ if self.action:
1703
+ return self.action
1704
+ else:
1705
+ assert self.command
1706
+ act = self.command.action
1707
+ assert act
1708
+ return act
1364
1709
 
1365
1710
  @TimeIt.decorator
1366
- def test(self, element_iteration: app.ElementIteration) -> bool:
1711
+ def test(self, element_iteration: ElementIteration) -> bool:
1367
1712
  """
1368
1713
  Test if this rule holds for a particular iteration.
1369
1714
 
@@ -1372,31 +1717,38 @@ class ActionRule(JSONLike):
1372
1717
  element_iteration:
1373
1718
  The iteration to apply this rule to.
1374
1719
  """
1375
- return self.rule.test(element_like=element_iteration, action=self.action)
1720
+
1721
+ return self.rule.test(
1722
+ element_like=element_iteration,
1723
+ action=self.__parent_action,
1724
+ )
1376
1725
 
1377
1726
  @classmethod
1378
- def check_exists(cls, check_exists):
1727
+ def check_exists(cls, check_exists: str) -> ActionRule:
1379
1728
  """
1380
1729
  Make an action rule that checks if a named attribute is present.
1381
1730
 
1382
1731
  Parameter
1383
1732
  ---------
1384
- check_exists: str
1733
+ check_exists:
1385
1734
  The path to the attribute to check for.
1386
1735
  """
1387
- return cls(rule=app.Rule(check_exists=check_exists))
1736
+ return cls(rule=cls._app.Rule(check_exists=check_exists))
1388
1737
 
1389
1738
  @classmethod
1390
- def check_missing(cls, check_missing):
1739
+ def check_missing(cls, check_missing: str) -> ActionRule:
1391
1740
  """
1392
1741
  Make an action rule that checks if a named attribute is absent.
1393
1742
 
1394
1743
  Parameter
1395
1744
  ---------
1396
- check_missing: str
1745
+ check_missing:
1397
1746
  The path to the attribute to check for.
1398
1747
  """
1399
- return cls(rule=app.Rule(check_missing=check_missing))
1748
+ return cls(rule=cls._app.Rule(check_missing=check_missing))
1749
+
1750
+
1751
+ _ALL_OTHER_SYM = "*"
1400
1752
 
1401
1753
 
1402
1754
  class Action(JSONLike):
@@ -1444,12 +1796,12 @@ class Action(JSONLike):
1444
1796
  The names of files to be deleted after each step.
1445
1797
  """
1446
1798
 
1447
- _app_attr = "app"
1448
- _child_objects = (
1799
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1449
1800
  ChildObjectSpec(
1450
1801
  name="commands",
1451
1802
  class_name="Command",
1452
1803
  is_multiple=True,
1804
+ parent_ref="action",
1453
1805
  ),
1454
1806
  ChildObjectSpec(
1455
1807
  name="input_file_generators",
@@ -1501,42 +1853,45 @@ class Action(JSONLike):
1501
1853
  shared_data_name="command_files",
1502
1854
  ),
1503
1855
  )
1504
- _script_data_formats = ("direct", "json", "hdf5")
1856
+ _script_data_formats: ClassVar[tuple[str, ...]] = ("direct", "json", "hdf5")
1505
1857
 
1506
1858
  def __init__(
1507
1859
  self,
1508
- environments: Optional[List[app.ActionEnvironment]] = None,
1509
- commands: Optional[List[app.Command]] = None,
1510
- script: Optional[str] = None,
1511
- script_data_in: Optional[str] = None,
1512
- script_data_out: Optional[str] = None,
1513
- script_data_files_use_opt: Optional[bool] = False,
1514
- script_exe: Optional[str] = None,
1515
- script_pass_env_spec: Optional[bool] = False,
1516
- abortable: Optional[bool] = False,
1517
- input_file_generators: Optional[List[app.InputFileGenerator]] = None,
1518
- output_file_parsers: Optional[List[app.OutputFileParser]] = None,
1519
- input_files: Optional[List[app.FileSpec]] = None,
1520
- output_files: Optional[List[app.FileSpec]] = None,
1521
- rules: Optional[List[app.ActionRule]] = None,
1522
- save_files: Optional[List[str]] = None,
1523
- clean_up: Optional[List[str]] = None,
1860
+ environments: list[ActionEnvironment] | None = None,
1861
+ commands: list[Command] | None = None,
1862
+ script: str | None = None,
1863
+ script_data_in: str | Mapping[str, str | ScriptData] | None = None,
1864
+ script_data_out: str | Mapping[str, str | ScriptData] | None = None,
1865
+ script_data_files_use_opt: bool = False,
1866
+ script_exe: str | None = None,
1867
+ script_pass_env_spec: bool = False,
1868
+ abortable: bool = False,
1869
+ input_file_generators: list[InputFileGenerator] | None = None,
1870
+ output_file_parsers: list[OutputFileParser] | None = None,
1871
+ input_files: list[FileSpec] | None = None,
1872
+ output_files: list[FileSpec] | None = None,
1873
+ rules: list[ActionRule] | None = None,
1874
+ save_files: list[FileSpec] | None = None,
1875
+ clean_up: list[str] | None = None,
1876
+ requires_dir: bool | None = None,
1524
1877
  ):
1525
1878
  #: The commands to be run by this action.
1526
1879
  self.commands = commands or []
1527
1880
  #: The name of the Python script to run.
1528
1881
  self.script = script
1529
1882
  #: Information about data input to the script.
1530
- self.script_data_in = script_data_in
1883
+ self.script_data_in: dict[str, ScriptData] | None = None
1884
+ self._script_data_in = script_data_in
1531
1885
  #: Information about data output from the script.
1532
- self.script_data_out = script_data_out
1886
+ self.script_data_out: dict[str, ScriptData] | None = None
1887
+ self._script_data_out = script_data_out
1533
1888
  #: If True, script data input and output file paths will be passed to the script
1534
1889
  #: execution command line with an option like `--input-json` or `--output-hdf5`
1535
1890
  #: etc. If False, the file paths will be passed on their own. For Python scripts,
1536
1891
  #: options are always passed, and this parameter is overwritten to be True,
1537
1892
  #: regardless of its initial value.
1538
1893
  self.script_data_files_use_opt = (
1539
- script_data_files_use_opt if not self.script_is_python else True
1894
+ script_data_files_use_opt if not self.script_is_python_snippet else True
1540
1895
  )
1541
1896
  #: The executable to use to run the script.
1542
1897
  self.script_exe = script_exe.lower() if script_exe else None
@@ -1544,7 +1899,7 @@ class Action(JSONLike):
1544
1899
  self.script_pass_env_spec = script_pass_env_spec
1545
1900
  #: The environments in which this action can run.
1546
1901
  self.environments = environments or [
1547
- self.app.ActionEnvironment(environment="null_env")
1902
+ self._app.ActionEnvironment(environment="null_env")
1548
1903
  ]
1549
1904
  #: Whether this action can be aborted.
1550
1905
  self.abortable = abortable
@@ -1553,9 +1908,9 @@ class Action(JSONLike):
1553
1908
  #: Any applicable output file parsers.
1554
1909
  self.output_file_parsers = output_file_parsers or []
1555
1910
  #: The input files to the action's commands.
1556
- self.input_files = self._resolve_input_files(input_files or [])
1911
+ self.input_files = self.__resolve_input_files(input_files or [])
1557
1912
  #: The output files from the action's commands.
1558
- self.output_files = self._resolve_output_files(output_files or [])
1913
+ self.output_files = self.__resolve_output_files(output_files or [])
1559
1914
  #: How to determine whether to run the action.
1560
1915
  self.rules = rules or []
1561
1916
  #: The names of files to be explicitly saved after each step.
@@ -1563,125 +1918,138 @@ class Action(JSONLike):
1563
1918
  #: The names of files to be deleted after each step.
1564
1919
  self.clean_up = clean_up or []
1565
1920
 
1566
- self._task_schema = None # assigned by parent TaskSchema
1921
+ if requires_dir is None:
1922
+ requires_dir = (
1923
+ True if self.input_file_generators or self.output_file_parsers else False
1924
+ )
1925
+ self.requires_dir = requires_dir
1926
+
1927
+ self._task_schema: TaskSchema | None = None # assigned by parent TaskSchema
1567
1928
  self._from_expand = False # assigned on creation of new Action by `expand`
1568
1929
 
1569
1930
  self._set_parent_refs()
1570
1931
 
1571
- def process_script_data_formats(self):
1932
+ def process_script_data_formats(self) -> None:
1572
1933
  """
1573
1934
  Convert script data information into standard form.
1574
1935
  """
1575
- self.script_data_in = self._process_script_data_in(self.script_data_in)
1576
- self.script_data_out = self._process_script_data_out(self.script_data_out)
1936
+ self.script_data_in = self.__process_script_data(self._script_data_in, "inputs")
1937
+ self.script_data_out = self.__process_script_data(
1938
+ self._script_data_out, "outputs"
1939
+ )
1577
1940
 
1578
- def _process_script_data_format(
1579
- self, data_fmt: Union[str, Dict[str, Union[str, Dict[str, str]]]], prefix: str
1580
- ) -> Dict[str, str]:
1581
- if not data_fmt:
1582
- return {}
1941
+ def __process_script_data_str(
1942
+ self, data_fmt: str, param_names: Iterable[str]
1943
+ ) -> dict[str, ScriptData]:
1944
+ # include all input parameters, using specified data format
1945
+ data_fmt = data_fmt.lower()
1946
+ return {k: {"format": data_fmt} for k in param_names}
1583
1947
 
1584
- _all_other_sym = "*"
1585
- param_names = self.get_parameter_names(prefix)
1586
- if isinstance(data_fmt, str):
1587
- # include all input parameters, using specified data format
1588
- data_fmt = data_fmt.lower()
1589
- all_params = {k: {"format": data_fmt} for k in param_names}
1590
- else:
1591
- all_params = copy.copy(data_fmt)
1592
- for k, v in all_params.items():
1593
- # values might be strings, or dicts with "format" and potentially other
1594
- # kwargs:
1595
- try:
1596
- fmt = v["format"]
1597
- except TypeError:
1598
- fmt = v
1599
- kwargs = {}
1600
- else:
1601
- kwargs = {k2: v2 for k2, v2 in v.items() if k2 != "format"}
1602
- finally:
1603
- all_params[k] = {"format": fmt.lower(), **kwargs}
1604
-
1605
- if prefix == "inputs":
1606
- # expand unlabelled-multiple inputs to multiple labelled inputs:
1607
- multi_types = self.task_schema.multi_input_types
1608
- multis = {}
1609
- for k in list(all_params.keys()):
1610
- if k in multi_types:
1611
- k_fmt = all_params.pop(k)
1612
- for i in param_names:
1613
- if i.startswith(k):
1614
- multis[i] = copy.deepcopy(k_fmt)
1948
+ def __process_script_data_dict(
1949
+ self,
1950
+ data_fmt: Mapping[str, str | ScriptData],
1951
+ prefix: str,
1952
+ param_names: Iterable[str],
1953
+ ) -> dict[str, ScriptData]:
1954
+ all_params: dict[str, ScriptData] = {}
1955
+ for nm, v in data_fmt.items():
1956
+ # values might be strings, or dicts with "format" and potentially other
1957
+ # kwargs:
1958
+ if isinstance(v, dict):
1959
+ # Make sure format is first key
1960
+ v2: ScriptData = {
1961
+ "format": v["format"],
1962
+ }
1963
+ all_params[nm] = v2
1964
+ v2.update(v)
1965
+ else:
1966
+ all_params[nm] = {"format": v.lower()}
1967
+
1968
+ if prefix == "inputs":
1969
+ # expand unlabelled-multiple inputs to multiple labelled inputs:
1970
+ multi_types = set(self.task_schema.multi_input_types)
1971
+ multis: dict[str, ScriptData] = {}
1972
+ for nm in tuple(all_params):
1973
+ if nm in multi_types:
1974
+ k_fmt = all_params.pop(nm)
1975
+ for name in param_names:
1976
+ if name.startswith(nm):
1977
+ multis[name] = copy.deepcopy(k_fmt)
1978
+ if multis:
1615
1979
  all_params = {
1616
1980
  **multis,
1617
1981
  **all_params,
1618
1982
  }
1619
1983
 
1620
- if _all_other_sym in all_params:
1621
- # replace catch-all with all other input/output names:
1622
- other_fmt = all_params[_all_other_sym]
1623
- all_params = {k: v for k, v in all_params.items() if k != _all_other_sym}
1624
- other = set(param_names) - set(all_params.keys())
1625
- for i in other:
1626
- all_params[i] = copy.deepcopy(other_fmt)
1984
+ if _ALL_OTHER_SYM in all_params:
1985
+ # replace catch-all with all other input/output names:
1986
+ other_fmt = all_params[_ALL_OTHER_SYM]
1987
+ all_params = {k: v for k, v in all_params.items() if k != _ALL_OTHER_SYM}
1988
+ for name in set(param_names).difference(all_params):
1989
+ all_params[name] = copy.deepcopy(other_fmt)
1990
+ return all_params
1991
+
1992
+ def __process_script_data(
1993
+ self, data_fmt: str | Mapping[str, str | ScriptData] | None, prefix: str
1994
+ ) -> dict[str, ScriptData]:
1995
+ if not data_fmt:
1996
+ return {}
1997
+
1998
+ param_names = self.get_parameter_names(prefix)
1999
+ if isinstance(data_fmt, str):
2000
+ all_params = self.__process_script_data_str(data_fmt, param_names)
2001
+ else:
2002
+ all_params = self.__process_script_data_dict(data_fmt, prefix, param_names)
1627
2003
 
1628
2004
  # validation:
1629
2005
  allowed_keys = ("format", "all_iterations")
1630
2006
  for k, v in all_params.items():
1631
2007
  # validate parameter name (sub-parameters are allowed):
1632
2008
  if k.split(".")[0] not in param_names:
1633
- raise UnknownScriptDataParameter(
1634
- f"Script data parameter {k!r} is not a known parameter of the "
1635
- f"action. Parameters ({prefix}) are: {param_names!r}."
1636
- )
2009
+ raise UnknownScriptDataParameter(k, prefix, param_names)
1637
2010
  # validate format:
1638
2011
  if v["format"] not in self._script_data_formats:
1639
2012
  raise UnsupportedScriptDataFormat(
1640
- f"Script data format {v!r} for {prefix[:-1]} parameter {k!r} is not "
1641
- f"understood. Available script data formats are: "
1642
- f"{self._script_data_formats!r}."
2013
+ v, prefix[:-1], k, self._script_data_formats
1643
2014
  )
1644
-
1645
- for k2 in v:
1646
- if k2 not in allowed_keys:
1647
- raise UnknownScriptDataKey(
1648
- f"Script data key {k2!r} is not understood. Allowed keys are: "
1649
- f"{allowed_keys!r}."
1650
- )
2015
+ if any((bad_key := k2) for k2 in v if k2 not in allowed_keys):
2016
+ raise UnknownScriptDataKey(bad_key, allowed_keys)
1651
2017
 
1652
2018
  return all_params
1653
2019
 
1654
- def _process_script_data_in(
1655
- self, data_fmt: Union[str, Dict[str, str]]
1656
- ) -> Dict[str, str]:
1657
- return self._process_script_data_format(data_fmt, "inputs")
1658
-
1659
- def _process_script_data_out(
1660
- self, data_fmt: Union[str, Dict[str, str]]
1661
- ) -> Dict[str, str]:
1662
- return self._process_script_data_format(data_fmt, "outputs")
1663
-
1664
2020
  @property
1665
- def script_data_in_grouped(self) -> Dict[str, List[str]]:
2021
+ def script_data_in_grouped(self) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
1666
2022
  """Get input parameter types by script data-in format."""
1667
- return swap_nested_dict_keys(dct=self.script_data_in, inner_key="format")
2023
+ if self.script_data_in is None:
2024
+ self.process_script_data_formats()
2025
+ assert self.script_data_in is not None
2026
+ return swap_nested_dict_keys(
2027
+ dct=cast("dict", self.script_data_in), inner_key="format"
2028
+ )
1668
2029
 
1669
2030
  @property
1670
- def script_data_out_grouped(self) -> Dict[str, List[str]]:
2031
+ def script_data_out_grouped(self) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
1671
2032
  """Get output parameter types by script data-out format."""
1672
- return swap_nested_dict_keys(dct=self.script_data_out, inner_key="format")
2033
+ if self.script_data_out is None:
2034
+ self.process_script_data_formats()
2035
+ assert self.script_data_out is not None
2036
+ return swap_nested_dict_keys(
2037
+ dct=cast("dict", self.script_data_out), inner_key="format"
2038
+ )
1673
2039
 
1674
2040
  @property
1675
2041
  def script_data_in_has_files(self) -> bool:
1676
2042
  """Return True if the script requires some inputs to be passed via an
1677
2043
  intermediate file format."""
1678
- return bool(set(self.script_data_in_grouped.keys()) - {"direct"}) # TODO: test
2044
+ # TODO: should set `requires_dir` to True if this is True?
2045
+ return bool(set(self.script_data_in_grouped) - {"direct"}) # TODO: test
1679
2046
 
1680
2047
  @property
1681
2048
  def script_data_out_has_files(self) -> bool:
1682
2049
  """Return True if the script produces some outputs via an intermediate file
1683
2050
  format."""
1684
- return bool(set(self.script_data_out_grouped.keys()) - {"direct"}) # TODO: test
2051
+ # TODO: should set `requires_dir` to True if this is True?
2052
+ return bool(set(self.script_data_out_grouped) - {"direct"}) # TODO: test
1685
2053
 
1686
2054
  @property
1687
2055
  def script_data_in_has_direct(self) -> bool:
@@ -1696,15 +2064,29 @@ class Action(JSONLike):
1696
2064
  return "direct" in self.script_data_out_grouped # TODO: test
1697
2065
 
1698
2066
  @property
1699
- def script_is_python(self) -> bool:
1700
- """Return True if the script is a Python script (determined by the file
2067
+ def script_is_python_snippet(self) -> bool:
2068
+ """Return True if the script is a Python snippet script (determined by the file
1701
2069
  extension)"""
1702
- if self.script:
1703
- snip_path = self.get_snippet_script_path(self.script)
1704
- if snip_path:
1705
- return snip_path.suffix == ".py"
2070
+ if self.script and (snip_path := self.get_snippet_script_path(self.script)):
2071
+ return snip_path.suffix == ".py"
2072
+ return False
2073
+
2074
+ @override
2075
+ def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
2076
+ d = super()._postprocess_to_dict(d)
2077
+ d["script_data_in"] = d.pop("_script_data_in")
2078
+ d["script_data_out"] = d.pop("_script_data_out")
2079
+ return d
2080
+
2081
+ @property
2082
+ def is_IFG(self):
2083
+ return bool(self.input_file_generators)
2084
+
2085
+ @property
2086
+ def is_OFP(self):
2087
+ return bool(self.output_file_parsers)
1706
2088
 
1707
- def __deepcopy__(self, memo):
2089
+ def __deepcopy__(self, memo: dict[int, Any]) -> Self:
1708
2090
  kwargs = self.to_dict()
1709
2091
  _from_expand = kwargs.pop("_from_expand")
1710
2092
  _task_schema = kwargs.pop("_task_schema", None)
@@ -1714,41 +2096,41 @@ class Action(JSONLike):
1714
2096
  return obj
1715
2097
 
1716
2098
  @property
1717
- def task_schema(self):
2099
+ def task_schema(self) -> TaskSchema:
1718
2100
  """
1719
2101
  The task schema that this action came from.
1720
2102
  """
2103
+ assert self._task_schema is not None
1721
2104
  return self._task_schema
1722
2105
 
1723
- def _resolve_input_files(self, input_files):
2106
+ def __resolve_input_files(self, input_files: list[FileSpec]) -> list[FileSpec]:
1724
2107
  in_files = input_files
1725
- for i in self.input_file_generators:
1726
- if i.input_file not in in_files:
1727
- in_files.append(i.input_file)
2108
+ for ifg in self.input_file_generators:
2109
+ if ifg.input_file not in in_files:
2110
+ in_files.append(ifg.input_file)
1728
2111
  return in_files
1729
2112
 
1730
- def _resolve_output_files(self, output_files):
2113
+ def __resolve_output_files(self, output_files: list[FileSpec]) -> list[FileSpec]:
1731
2114
  out_files = output_files
1732
- for i in self.output_file_parsers:
1733
- for j in i.output_files:
1734
- if j not in out_files:
1735
- out_files.append(j)
2115
+ for ofp in self.output_file_parsers:
2116
+ for out_file in ofp.output_files:
2117
+ if out_file not in out_files:
2118
+ out_files.append(out_file)
1736
2119
  return out_files
1737
2120
 
1738
2121
  def __repr__(self) -> str:
1739
2122
  IFGs = {
1740
- i.input_file.label: [j.typ for j in i.inputs]
1741
- for i in self.input_file_generators
2123
+ ifg.input_file.label: [inp.typ for inp in ifg.inputs]
2124
+ for ifg in self.input_file_generators
2125
+ }
2126
+ OFPs = {
2127
+ ofp.output.typ
2128
+ if ofp.output
2129
+ else f"OFP_{idx}": [out_file.label for out_file in ofp.output_files]
2130
+ for idx, ofp in enumerate(self.output_file_parsers)
1742
2131
  }
1743
- OFPs = {}
1744
- for idx, i in enumerate(self.output_file_parsers):
1745
- if i.output is not None:
1746
- key = i.output.typ
1747
- else:
1748
- key = f"OFP_{idx}"
1749
- OFPs[key] = [j.label for j in i.output_files]
1750
2132
 
1751
- out = []
2133
+ out: list[str] = []
1752
2134
  if self.commands:
1753
2135
  out.append(f"commands={self.commands!r}")
1754
2136
  if self.script:
@@ -1764,10 +2146,10 @@ class Action(JSONLike):
1764
2146
 
1765
2147
  return f"{self.__class__.__name__}({', '.join(out)})"
1766
2148
 
1767
- def __eq__(self, other):
1768
- if type(other) is not self.__class__:
2149
+ def __eq__(self, other: Any) -> bool:
2150
+ if not isinstance(other, self.__class__):
1769
2151
  return False
1770
- if (
2152
+ return (
1771
2153
  self.commands == other.commands
1772
2154
  and self.script == other.script
1773
2155
  and self.environments == other.environments
@@ -1775,63 +2157,98 @@ class Action(JSONLike):
1775
2157
  and self.input_file_generators == other.input_file_generators
1776
2158
  and self.output_file_parsers == other.output_file_parsers
1777
2159
  and self.rules == other.rules
1778
- ):
1779
- return True
1780
- return False
2160
+ )
2161
+
2162
+ @staticmethod
2163
+ def env_spec_to_hashable(
2164
+ env_spec: Mapping[str, Any],
2165
+ ) -> tuple[tuple[str, ...], tuple[Any, ...]]:
2166
+ keys, values = zip(*env_spec.items()) if env_spec else ((), ())
2167
+ return tuple(keys), tuple(values)
2168
+
2169
+ @staticmethod
2170
+ def env_spec_from_hashable(
2171
+ env_spec_h: tuple[tuple[str, ...], tuple[Any, ...]],
2172
+ ) -> dict[str, Any]:
2173
+ return dict(zip(*env_spec_h))
2174
+
2175
+ def get_script_determinants(self) -> tuple:
2176
+ """Get the attributes that affect the script."""
2177
+ return (
2178
+ self.script,
2179
+ self.script_data_in,
2180
+ self.script_data_out,
2181
+ self.script_data_files_use_opt,
2182
+ self.script_exe,
2183
+ )
2184
+
2185
+ def get_script_determinant_hash(self, env_specs: dict | None = None) -> int:
2186
+ """Get a hash of the instance attributes that uniquely determine the script.
2187
+
2188
+ The hash is not stable across sessions or machines.
2189
+
2190
+ """
2191
+ env_specs = env_specs or {}
2192
+ return get_hash(
2193
+ (self.get_script_determinants(), self.env_spec_to_hashable(env_specs))
2194
+ )
1781
2195
 
1782
2196
  @classmethod
1783
- def _json_like_constructor(cls, json_like):
2197
+ def _json_like_constructor(cls, json_like) -> Self:
1784
2198
  """Invoked by `JSONLike.from_json_like` instead of `__init__`."""
1785
2199
  _from_expand = json_like.pop("_from_expand", None)
1786
2200
  obj = cls(**json_like)
1787
2201
  obj._from_expand = _from_expand
1788
2202
  return obj
1789
2203
 
1790
- def get_parameter_dependence(self, parameter: app.SchemaParameter):
2204
+ def get_parameter_dependence(self, parameter: SchemaParameter) -> ParameterDependence:
1791
2205
  """Find if/where a given parameter is used by the action."""
2206
+ # names of input files whose generation requires this parameter
1792
2207
  writer_files = [
1793
- i.input_file
1794
- for i in self.input_file_generators
1795
- if parameter.parameter in i.inputs
1796
- ] # names of input files whose generation requires this parameter
1797
- commands = [] # TODO: indices of commands in which this parameter appears
1798
- out = {"input_file_writers": writer_files, "commands": commands}
1799
- return out
2208
+ ifg.input_file
2209
+ for ifg in self.input_file_generators
2210
+ if parameter.parameter in ifg.inputs
2211
+ ]
2212
+ # TODO: indices of commands in which this parameter appears
2213
+ commands: list[int] = []
2214
+ return {"input_file_writers": writer_files, "commands": commands}
1800
2215
 
1801
- def _get_resolved_action_env(
2216
+ def __get_resolved_action_env(
1802
2217
  self,
1803
- relevant_scopes: Tuple[app.ActionScopeType],
1804
- input_file_generator: app.InputFileGenerator = None,
1805
- output_file_parser: app.OutputFileParser = None,
1806
- commands: List[app.Command] = None,
1807
- ):
1808
- possible = [i for i in self.environments if i.scope.typ in relevant_scopes]
2218
+ relevant_scopes: tuple[ActionScopeType, ...],
2219
+ input_file_generator: InputFileGenerator | None = None,
2220
+ output_file_parser: OutputFileParser | None = None,
2221
+ commands: list[Command] | None = None,
2222
+ ) -> ActionEnvironment:
2223
+ possible = [
2224
+ env
2225
+ for env in self.environments
2226
+ if env.scope and env.scope.typ in relevant_scopes
2227
+ ]
1809
2228
  if not possible:
1810
2229
  if input_file_generator:
1811
- msg = f"input file generator {input_file_generator.input_file.label!r}"
2230
+ raise MissingCompatibleActionEnvironment(
2231
+ f"input file generator {input_file_generator.input_file.label!r}"
2232
+ )
1812
2233
  elif output_file_parser:
1813
2234
  if output_file_parser.output is not None:
1814
2235
  ofp_id = output_file_parser.output.typ
1815
2236
  else:
1816
2237
  ofp_id = "<unnamed>"
1817
- msg = f"output file parser {ofp_id!r}"
2238
+ raise MissingCompatibleActionEnvironment(f"output file parser {ofp_id!r}")
1818
2239
  else:
1819
- msg = f"commands {commands!r}"
1820
- raise MissingCompatibleActionEnvironment(
1821
- f"No compatible environment is specified for the {msg}."
1822
- )
2240
+ raise MissingCompatibleActionEnvironment(f"commands {commands!r}")
1823
2241
 
1824
- # sort by scope type specificity:
1825
- possible_srt = sorted(possible, key=lambda i: i.scope.typ.value, reverse=True)
1826
- return possible_srt[0]
2242
+ # get max by scope type specificity:
2243
+ return max(possible, key=lambda i: i.scope.typ.value)
1827
2244
 
1828
2245
  def get_input_file_generator_action_env(
1829
- self, input_file_generator: app.InputFileGenerator
1830
- ):
2246
+ self, input_file_generator: InputFileGenerator
2247
+ ) -> ActionEnvironment:
1831
2248
  """
1832
2249
  Get the actual environment to use for an input file generator.
1833
2250
  """
1834
- return self._get_resolved_action_env(
2251
+ return self.__get_resolved_action_env(
1835
2252
  relevant_scopes=(
1836
2253
  ActionScopeType.ANY,
1837
2254
  ActionScopeType.PROCESSING,
@@ -1840,11 +2257,13 @@ class Action(JSONLike):
1840
2257
  input_file_generator=input_file_generator,
1841
2258
  )
1842
2259
 
1843
- def get_output_file_parser_action_env(self, output_file_parser: app.OutputFileParser):
2260
+ def get_output_file_parser_action_env(
2261
+ self, output_file_parser: OutputFileParser
2262
+ ) -> ActionEnvironment:
1844
2263
  """
1845
2264
  Get the actual environment to use for an output file parser.
1846
2265
  """
1847
- return self._get_resolved_action_env(
2266
+ return self.__get_resolved_action_env(
1848
2267
  relevant_scopes=(
1849
2268
  ActionScopeType.ANY,
1850
2269
  ActionScopeType.PROCESSING,
@@ -1853,11 +2272,11 @@ class Action(JSONLike):
1853
2272
  output_file_parser=output_file_parser,
1854
2273
  )
1855
2274
 
1856
- def get_commands_action_env(self):
2275
+ def get_commands_action_env(self) -> ActionEnvironment:
1857
2276
  """
1858
2277
  Get the actual environment to use for the action commands.
1859
2278
  """
1860
- return self._get_resolved_action_env(
2279
+ return self.__get_resolved_action_env(
1861
2280
  relevant_scopes=(ActionScopeType.ANY, ActionScopeType.MAIN),
1862
2281
  commands=self.commands,
1863
2282
  )
@@ -1868,113 +2287,295 @@ class Action(JSONLike):
1868
2287
  """
1869
2288
  return self.get_environment_spec()["name"]
1870
2289
 
1871
- def get_environment_spec(self) -> Dict[str, Any]:
2290
+ def get_environment_spec(self) -> Mapping[str, Any]:
1872
2291
  """
1873
2292
  Get the specification for the primary envionment, assuming it has been expanded.
1874
2293
  """
1875
2294
  if not self._from_expand:
1876
2295
  raise RuntimeError(
1877
- f"Cannot choose a single environment from this action because it is not "
1878
- f"expanded, meaning multiple action environments might exist."
2296
+ "Cannot choose a single environment from this action because it is not "
2297
+ "expanded, meaning multiple action environments might exist."
1879
2298
  )
1880
2299
  return self.environments[0].environment
1881
2300
 
1882
- def get_environment(self) -> app.Environment:
2301
+ def get_environment(self) -> Environment:
1883
2302
  """
1884
2303
  Get the primary environment.
1885
2304
  """
1886
- return self.app.envs.get(**self.get_environment_spec())
2305
+ return self._app.envs.get(**self.get_environment_spec())
1887
2306
 
1888
2307
  @staticmethod
1889
- def is_snippet_script(script: str) -> bool:
2308
+ def is_snippet_script(script: str | None) -> bool:
1890
2309
  """Returns True if the provided script string represents a script snippets that is
1891
2310
  to be modified before execution (e.g. to receive and provide parameter data)."""
2311
+ if script is None:
2312
+ return False
1892
2313
  return script.startswith("<<script:")
1893
2314
 
2315
+ __SCRIPT_NAME_RE: ClassVar[Pattern] = re.compile(
2316
+ r"\<\<script:(?:.*(?:\/|\\))*(.*)\>\>"
2317
+ )
2318
+
1894
2319
  @classmethod
1895
2320
  def get_script_name(cls, script: str) -> str:
1896
- """Return the script name."""
2321
+ """Return the script name.
2322
+
2323
+ If `script` is a snippet script path, this method returns the name of the script
2324
+ (i.e. the final component of the path). If `script` is not a snippet script path
2325
+ (does not start with "<<script:"), then `script` is simply returned.
2326
+
2327
+ """
1897
2328
  if cls.is_snippet_script(script):
1898
- pattern = r"\<\<script:(?:.*(?:\/|\\))*(.*)\>\>"
1899
- match_obj = re.match(pattern, script)
1900
- return match_obj.group(1)
2329
+ if not (match_obj := cls.__SCRIPT_NAME_RE.match(script)):
2330
+ raise ValueError("incomplete <<script:>>")
2331
+ return match_obj[1]
2332
+ # a script we can expect in the working directory, which might have been generated
2333
+ # by a previous action:
2334
+ return script
2335
+
2336
+ @overload
2337
+ def get_script_artifact_name(
2338
+ self,
2339
+ env_spec: Mapping[str, Any],
2340
+ act_idx: int,
2341
+ ret_specifiers: Literal[False] = False,
2342
+ include_suffix: bool = True,
2343
+ specs_suffix_delim: str = ".",
2344
+ ) -> tuple[str, Path]:
2345
+ ...
2346
+
2347
+ @overload
2348
+ def get_script_artifact_name(
2349
+ self,
2350
+ env_spec: Mapping[str, Any],
2351
+ act_idx: int,
2352
+ ret_specifiers: Literal[True],
2353
+ include_suffix: bool = True,
2354
+ specs_suffix_delim: str = ".",
2355
+ ) -> tuple[str, Path, dict]:
2356
+ ...
2357
+
2358
+ def get_script_artifact_name(
2359
+ self,
2360
+ env_spec: Mapping[str, Any],
2361
+ act_idx: int,
2362
+ ret_specifiers: bool = False,
2363
+ include_suffix: bool = True,
2364
+ specs_suffix_delim: str = ".",
2365
+ ) -> tuple[str, Path] | tuple[str, Path, dict]:
2366
+ """Return the script name that is used when writing the script to the artifacts
2367
+ directory within the workflow.
2368
+
2369
+ Like `Action.get_script_name`, this is only applicable for snippet scripts.
2370
+
2371
+ """
2372
+ snip_path_specs = self.get_snippet_script_path(
2373
+ self.script,
2374
+ env_spec,
2375
+ ret_specifiers=True,
2376
+ )
2377
+ assert snip_path_specs
2378
+ snip_path, specifiers = snip_path_specs
2379
+ specs_suffix = "__".join(f"{k}_{v}" for k, v in specifiers.items())
2380
+ if specs_suffix:
2381
+ specs_suffix = f"{specs_suffix_delim}{specs_suffix}"
2382
+
2383
+ name = f"{self.task_schema.name}_act_{act_idx}{specs_suffix}"
2384
+ if include_suffix:
2385
+ name += snip_path.suffix
2386
+
2387
+ if ret_specifiers:
2388
+ return name, snip_path, specifiers
1901
2389
  else:
1902
- # a script we can expect in the working directory:
1903
- return script
2390
+ return name, snip_path
2391
+
2392
+ __SCRIPT_RE: ClassVar[Pattern] = re.compile(r"\<\<script:(.*:?)\>\>")
2393
+ __ENV_RE: ClassVar[Pattern] = re.compile(r"\<\<env:(.*?)\>\>")
1904
2394
 
2395
+ @overload
1905
2396
  @classmethod
1906
2397
  def get_snippet_script_str(
1907
- cls, script, env_spec: Optional[Dict[str, Any]] = None
2398
+ cls,
2399
+ script: str,
2400
+ env_spec: Mapping[str, Any] | None = None,
2401
+ ret_specifiers: Literal[False] = False,
1908
2402
  ) -> str:
1909
- """
1910
- Get the substituted script snippet path as a string.
2403
+ ...
2404
+
2405
+ @overload
2406
+ @classmethod
2407
+ def get_snippet_script_str(
2408
+ cls,
2409
+ script: str,
2410
+ env_spec: Mapping[str, Any] | None = None,
2411
+ *,
2412
+ ret_specifiers: Literal[True],
2413
+ ) -> tuple[str, dict[str, Any]]:
2414
+ ...
2415
+
2416
+ @overload
2417
+ @classmethod
2418
+ def get_snippet_script_str(
2419
+ cls,
2420
+ script: str,
2421
+ env_spec: Mapping[str, Any] | None = None,
2422
+ *,
2423
+ ret_specifiers: bool,
2424
+ ) -> str | tuple[str, dict[str, Any]]:
2425
+ ...
2426
+
2427
+ @classmethod
2428
+ def get_snippet_script_str(
2429
+ cls,
2430
+ script: str,
2431
+ env_spec: Mapping[str, Any] | None = None,
2432
+ ret_specifiers: bool = False,
2433
+ ) -> str | tuple[str, dict[str, Any]]:
2434
+ """Return the specified snippet `script` with variable substitutions completed.
2435
+
2436
+ Parameters
2437
+ ----------
2438
+ ret_specifiers
2439
+ If True, also return a list of environment specifiers as a dict whose keys are
2440
+ specifier keys found in the `script` path and whose values are the
2441
+ corresponding values extracted from `env_spec`.
2442
+
1911
2443
  """
1912
2444
  if not cls.is_snippet_script(script):
1913
2445
  raise ValueError(
1914
2446
  f"Must be an app-data script name (e.g. "
1915
2447
  f"<<script:path/to/app/data/script.py>>), but received {script}"
1916
2448
  )
1917
- pattern = r"\<\<script:(.*:?)\>\>"
1918
- match_obj = re.match(pattern, script)
1919
- out = match_obj.group(1)
1920
-
1921
- if env_spec:
1922
- out = re.sub(
1923
- pattern=r"\<\<env:(.*?)\>\>",
1924
- repl=lambda match_obj: env_spec[match_obj.group(1)],
2449
+ if not (match_obj := cls.__SCRIPT_RE.match(script)):
2450
+ raise ValueError("incomplete <<script:>>")
2451
+ out: str = match_obj[1]
2452
+
2453
+ if env_spec is not None:
2454
+ specifiers: dict[str, Any] = {}
2455
+
2456
+ def repl(match_obj):
2457
+ spec = match_obj[1]
2458
+ specifiers[spec] = env_spec[spec]
2459
+ return str(env_spec[spec])
2460
+
2461
+ out = cls.__ENV_RE.sub(
2462
+ repl=repl,
1925
2463
  string=out,
1926
2464
  )
2465
+ if ret_specifiers:
2466
+ return (out, specifiers)
1927
2467
  return out
1928
2468
 
1929
2469
  @classmethod
2470
+ @overload
1930
2471
  def get_snippet_script_path(
1931
- cls, script_path, env_spec: Optional[Dict[str, Any]] = None
1932
- ) -> Path:
1933
- """
1934
- Get the substituted script snippet path, or False if there is no snippet.
2472
+ cls,
2473
+ script_path: str | None,
2474
+ env_spec: Mapping[str, Any] | None = None,
2475
+ *,
2476
+ ret_specifiers: Literal[True],
2477
+ ) -> tuple[Path, dict[str, Any]] | None:
2478
+ ...
2479
+
2480
+ @classmethod
2481
+ @overload
2482
+ def get_snippet_script_path(
2483
+ cls,
2484
+ script_path: str | None,
2485
+ env_spec: Mapping[str, Any] | None = None,
2486
+ *,
2487
+ ret_specifiers: Literal[False] = False,
2488
+ ) -> Path | None:
2489
+ ...
2490
+
2491
+ @classmethod
2492
+ def get_snippet_script_path(
2493
+ cls,
2494
+ script_path: str | None,
2495
+ env_spec: Mapping[str, Any] | None = None,
2496
+ *,
2497
+ ret_specifiers: bool = False,
2498
+ ) -> Path | tuple[Path, dict[str, Any]] | None:
2499
+ """Return the specified snippet `script` path, or None if there is no snippet.
2500
+
2501
+ Parameters
2502
+ ----------
2503
+ ret_specifiers
2504
+ If True, also return a list of environment specifiers as a dict whose keys are
2505
+ specifier keys found in the `script` path and whose values are the
2506
+ corresponding values extracted from `env_spec`.
2507
+
1935
2508
  """
1936
2509
  if not cls.is_snippet_script(script_path):
1937
- return False
2510
+ return None
2511
+
2512
+ assert script_path is not None
2513
+ path_ = cls.get_snippet_script_str(
2514
+ script_path, env_spec, ret_specifiers=ret_specifiers
2515
+ )
2516
+ if ret_specifiers:
2517
+ assert isinstance(path_, tuple)
2518
+ path_str, specifiers = path_
2519
+ else:
2520
+ assert isinstance(path_, str)
2521
+ path_str = path_
1938
2522
 
1939
- path = cls.get_snippet_script_str(script_path, env_spec)
1940
- if path in cls.app.scripts:
1941
- path = cls.app.scripts.get(path)
2523
+ path = Path(cls._app.scripts.get(path_str, path_str))
1942
2524
 
1943
- return Path(path)
2525
+ if ret_specifiers:
2526
+ return path, specifiers
2527
+ else:
2528
+ return path
1944
2529
 
1945
2530
  @staticmethod
1946
- def __get_param_dump_file_stem(js_idx: int, js_act_idx: int):
1947
- return RunDirAppFiles.get_run_param_dump_file_prefix(js_idx, js_act_idx)
2531
+ def __get_param_dump_file_stem(block_act_key: BlockActionKey) -> str:
2532
+ return RunDirAppFiles.get_run_param_dump_file_prefix(block_act_key)
1948
2533
 
1949
2534
  @staticmethod
1950
- def __get_param_load_file_stem(js_idx: int, js_act_idx: int):
1951
- return RunDirAppFiles.get_run_param_load_file_prefix(js_idx, js_act_idx)
2535
+ def __get_param_load_file_stem(block_act_key: BlockActionKey) -> str:
2536
+ return RunDirAppFiles.get_run_param_load_file_prefix(block_act_key)
1952
2537
 
1953
- def get_param_dump_file_path_JSON(self, js_idx: int, js_act_idx: int):
2538
+ def get_param_dump_file_path_JSON(
2539
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2540
+ ) -> Path:
1954
2541
  """
1955
2542
  Get the path of the JSON dump file.
1956
2543
  """
1957
- return Path(self.__get_param_dump_file_stem(js_idx, js_act_idx) + ".json")
2544
+ directory = directory or Path()
2545
+ return directory.joinpath(
2546
+ self.__get_param_dump_file_stem(block_act_key) + ".json"
2547
+ )
1958
2548
 
1959
- def get_param_dump_file_path_HDF5(self, js_idx: int, js_act_idx: int):
2549
+ def get_param_dump_file_path_HDF5(
2550
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2551
+ ) -> Path:
1960
2552
  """
1961
- Get the path of the HDF56 dump file.
2553
+ Get the path of the HDF5 dump file.
1962
2554
  """
1963
- return Path(self.__get_param_dump_file_stem(js_idx, js_act_idx) + ".h5")
2555
+ directory = directory or Path()
2556
+ return directory.joinpath(self.__get_param_dump_file_stem(block_act_key) + ".h5")
1964
2557
 
1965
- def get_param_load_file_path_JSON(self, js_idx: int, js_act_idx: int):
2558
+ def get_param_load_file_path_JSON(
2559
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2560
+ ) -> Path:
1966
2561
  """
1967
2562
  Get the path of the JSON load file.
1968
2563
  """
1969
- return Path(self.__get_param_load_file_stem(js_idx, js_act_idx) + ".json")
2564
+ directory = directory or Path()
2565
+ return directory.joinpath(
2566
+ self.__get_param_load_file_stem(block_act_key) + ".json"
2567
+ )
1970
2568
 
1971
- def get_param_load_file_path_HDF5(self, js_idx: int, js_act_idx: int):
2569
+ def get_param_load_file_path_HDF5(
2570
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2571
+ ) -> Path:
1972
2572
  """
1973
2573
  Get the path of the HDF5 load file.
1974
2574
  """
1975
- return Path(self.__get_param_load_file_stem(js_idx, js_act_idx) + ".h5")
2575
+ directory = directory or Path()
2576
+ return directory.joinpath(self.__get_param_load_file_stem(block_act_key) + ".h5")
1976
2577
 
1977
- def expand(self):
2578
+ def expand(self) -> Sequence[Action]:
1978
2579
  """
1979
2580
  Expand this action into a list of actions if necessary.
1980
2581
  This converts input file generators and output file parsers into their own actions.
@@ -1983,154 +2584,123 @@ class Action(JSONLike):
1983
2584
  # already expanded
1984
2585
  return [self]
1985
2586
 
1986
- else:
1987
- # run main if:
1988
- # - one or more output files are not passed
1989
- # run IFG if:
1990
- # - one or more output files are not passed
1991
- # - AND input file is not passed
1992
- # always run OPs, for now
1993
-
1994
- main_rules = self.rules + [
1995
- self.app.ActionRule.check_missing(f"output_files.{i.label}")
1996
- for i in self.output_files
1997
- ]
2587
+ # run main if:
2588
+ # - one or more output files are not passed
2589
+ # run IFG if:
2590
+ # - one or more output files are not passed
2591
+ # - AND input file is not passed
2592
+ # always run OPs, for now
1998
2593
 
1999
- # note we keep the IFG/OPs in the new actions, so we can check the parameters
2000
- # used/produced.
2594
+ main_rules = self.rules + [
2595
+ self._app.ActionRule.check_missing(f"output_files.{of.label}")
2596
+ for of in self.output_files
2597
+ ]
2001
2598
 
2002
- inp_files = []
2003
- inp_acts = []
2004
- for ifg in self.input_file_generators:
2005
- exe = "<<executable:python_script>>"
2006
- args = [
2007
- '"$WK_PATH"',
2008
- "$EAR_ID",
2009
- ] # WK_PATH could have a space in it
2010
- if ifg.script:
2011
- script_name = self.get_script_name(ifg.script)
2012
- variables = {
2013
- "script_name": script_name,
2014
- "script_name_no_ext": str(Path(script_name).stem),
2015
- }
2016
- else:
2017
- variables = {}
2018
- act_i = self.app.Action(
2019
- commands=[
2020
- app.Command(executable=exe, arguments=args, variables=variables)
2021
- ],
2022
- input_file_generators=[ifg],
2023
- environments=[self.get_input_file_generator_action_env(ifg)],
2024
- rules=main_rules + ifg.get_action_rules(),
2025
- script_pass_env_spec=ifg.script_pass_env_spec,
2026
- abortable=ifg.abortable,
2027
- # TODO: add script_data_in etc? and to OFP?
2028
- )
2029
- act_i._task_schema = self.task_schema
2030
- if ifg.input_file not in inp_files:
2031
- inp_files.append(ifg.input_file)
2032
- act_i._from_expand = True
2033
- inp_acts.append(act_i)
2034
-
2035
- out_files = []
2036
- out_acts = []
2037
- for ofp in self.output_file_parsers:
2038
- exe = "<<executable:python_script>>"
2039
- args = [
2040
- '"$WK_PATH"',
2041
- "$EAR_ID",
2042
- ] # WK_PATH could have a space in it
2043
- if ofp.script:
2044
- script_name = self.get_script_name(ofp.script)
2045
- variables = {
2046
- "script_name": script_name,
2047
- "script_name_no_ext": str(Path(script_name).stem),
2048
- }
2049
- else:
2050
- variables = {}
2051
- act_i = self.app.Action(
2052
- commands=[
2053
- app.Command(executable=exe, arguments=args, variables=variables)
2054
- ],
2055
- output_file_parsers=[ofp],
2056
- environments=[self.get_output_file_parser_action_env(ofp)],
2057
- rules=list(self.rules) + ofp.get_action_rules(),
2058
- script_pass_env_spec=ofp.script_pass_env_spec,
2059
- abortable=ofp.abortable,
2060
- )
2061
- act_i._task_schema = self.task_schema
2062
- for j in ofp.output_files:
2063
- if j not in out_files:
2064
- out_files.append(j)
2065
- act_i._from_expand = True
2066
- out_acts.append(act_i)
2067
-
2068
- commands = self.commands
2069
- if self.script:
2070
- exe = f"<<executable:{self.script_exe}>>"
2071
- args = []
2072
- if self.script:
2073
- script_name = self.get_script_name(self.script)
2074
- variables = {
2075
- "script_name": script_name,
2076
- "script_name_no_ext": str(Path(script_name).stem),
2077
- }
2078
- else:
2079
- variables = {}
2080
- if self.script_data_in_has_direct or self.script_data_out_has_direct:
2081
- # WK_PATH could have a space in it:
2082
- args.extend(["--wk-path", '"$WK_PATH"', "--run-id", "$EAR_ID"])
2083
-
2084
- fn_args = {"js_idx": r"${JS_IDX}", "js_act_idx": r"${JS_act_idx}"}
2085
-
2086
- for fmt in self.script_data_in_grouped:
2087
- if fmt == "json":
2088
- if self.script_data_files_use_opt:
2089
- args.append("--inputs-json")
2090
- args.append(str(self.get_param_dump_file_path_JSON(**fn_args)))
2091
- elif fmt == "hdf5":
2092
- if self.script_data_files_use_opt:
2093
- args.append("--inputs-hdf5")
2094
- args.append(str(self.get_param_dump_file_path_HDF5(**fn_args)))
2095
-
2096
- for fmt in self.script_data_out_grouped:
2097
- if fmt == "json":
2098
- if self.script_data_files_use_opt:
2099
- args.append("--outputs-json")
2100
- args.append(str(self.get_param_load_file_path_JSON(**fn_args)))
2101
- elif fmt == "hdf5":
2102
- if self.script_data_files_use_opt:
2103
- args.append("--outputs-hdf5")
2104
- args.append(str(self.get_param_load_file_path_HDF5(**fn_args)))
2105
-
2106
- commands += [
2107
- self.app.Command(executable=exe, arguments=args, variables=variables)
2108
- ]
2109
-
2110
- # TODO: store script_args? and build command with executable syntax?
2111
- main_act = self.app.Action(
2112
- commands=commands,
2113
- script=self.script,
2114
- script_data_in=self.script_data_in,
2115
- script_data_out=self.script_data_out,
2116
- script_exe=self.script_exe,
2117
- script_pass_env_spec=self.script_pass_env_spec,
2118
- environments=[self.get_commands_action_env()],
2119
- abortable=self.abortable,
2120
- rules=main_rules,
2121
- input_files=inp_files,
2122
- output_files=out_files,
2123
- save_files=self.save_files,
2124
- clean_up=self.clean_up,
2599
+ # note we keep the IFG/OPs in the new actions, so we can check the parameters
2600
+ # used/produced.
2601
+
2602
+ inp_files: list[FileSpec] = []
2603
+ inp_acts: list[Action] = []
2604
+
2605
+ app_caps = self._app.package_name.upper()
2606
+
2607
+ script_cmd_vars = {
2608
+ "script_name": f"${app_caps}_RUN_SCRIPT_NAME",
2609
+ "script_name_no_ext": f"${app_caps}_RUN_SCRIPT_NAME_NO_EXT",
2610
+ "script_dir": f"${app_caps}_RUN_SCRIPT_DIR",
2611
+ "script_path": f"${app_caps}_RUN_SCRIPT_PATH",
2612
+ }
2613
+
2614
+ for ifg in self.input_file_generators:
2615
+ script_exe = "python_script"
2616
+ exe = f"<<executable:{script_exe}>>"
2617
+ variables = script_cmd_vars if ifg.script else {}
2618
+ act_i = self._app.Action(
2619
+ commands=[self._app.Command(executable=exe, variables=variables)],
2620
+ input_file_generators=[ifg],
2621
+ environments=[self.get_input_file_generator_action_env(ifg)],
2622
+ rules=main_rules + ifg.get_action_rules(),
2623
+ script=ifg.script,
2624
+ script_data_in="direct",
2625
+ script_data_out="direct",
2626
+ script_exe=script_exe,
2627
+ script_pass_env_spec=ifg.script_pass_env_spec,
2628
+ abortable=ifg.abortable,
2629
+ requires_dir=ifg.requires_dir,
2630
+ )
2631
+ act_i._task_schema = self.task_schema
2632
+ if ifg.input_file not in inp_files:
2633
+ inp_files.append(ifg.input_file)
2634
+ act_i.process_script_data_formats()
2635
+ act_i._from_expand = True
2636
+ inp_acts.append(act_i)
2637
+
2638
+ out_files: list[FileSpec] = []
2639
+ out_acts: list[Action] = []
2640
+ for ofp in self.output_file_parsers:
2641
+ script_exe = "python_script"
2642
+ exe = f"<<executable:{script_exe}>>"
2643
+ variables = script_cmd_vars if ofp.script else {}
2644
+ act_i = self._app.Action(
2645
+ commands=[self._app.Command(executable=exe, variables=variables)],
2646
+ output_file_parsers=[ofp],
2647
+ environments=[self.get_output_file_parser_action_env(ofp)],
2648
+ rules=list(self.rules) + ofp.get_action_rules(),
2649
+ script=ofp.script,
2650
+ script_data_in="direct",
2651
+ script_data_out="direct",
2652
+ script_exe=script_exe,
2653
+ script_pass_env_spec=ofp.script_pass_env_spec,
2654
+ abortable=ofp.abortable,
2655
+ requires_dir=ofp.requires_dir,
2125
2656
  )
2126
- main_act._task_schema = self.task_schema
2127
- main_act._from_expand = True
2657
+ act_i._task_schema = self.task_schema
2658
+ for j in ofp.output_files:
2659
+ if j not in out_files:
2660
+ out_files.append(j)
2661
+ act_i.process_script_data_formats()
2662
+ act_i._from_expand = True
2663
+ out_acts.append(act_i)
2664
+
2665
+ commands = self.commands
2666
+ if self.script:
2667
+ exe = f"<<executable:{self.script_exe}>>"
2668
+ variables = script_cmd_vars if self.script else {}
2669
+ args = self.get_script_input_output_file_command_args()
2670
+ commands += [
2671
+ self._app.Command(executable=exe, arguments=args, variables=variables)
2672
+ ]
2128
2673
 
2129
- cmd_acts = inp_acts + [main_act] + out_acts
2674
+ # TODO: store script_args? and build command with executable syntax?
2675
+ main_act = self._app.Action(
2676
+ commands=commands,
2677
+ script=self.script,
2678
+ script_data_in=self.script_data_in,
2679
+ script_data_out=self.script_data_out,
2680
+ script_exe=self.script_exe,
2681
+ script_pass_env_spec=self.script_pass_env_spec,
2682
+ environments=[self.get_commands_action_env()],
2683
+ abortable=self.abortable,
2684
+ rules=main_rules,
2685
+ input_files=inp_files,
2686
+ output_files=out_files,
2687
+ save_files=self.save_files,
2688
+ clean_up=self.clean_up,
2689
+ requires_dir=self.requires_dir,
2690
+ )
2691
+ main_act._task_schema = self.task_schema
2692
+ main_act._from_expand = True
2693
+ main_act.process_script_data_formats()
2694
+
2695
+ return [*inp_acts, main_act, *out_acts]
2130
2696
 
2131
- return cmd_acts
2697
+ # note: we use "parameter" rather than "input", because it could be a schema input
2698
+ # or schema output.
2699
+ __PARAMS_RE: ClassVar[Pattern] = re.compile(
2700
+ r"\<\<(?:\w+(?:\[(?:.*)\])?\()?parameter:(.*?)\)?\>\>"
2701
+ )
2132
2702
 
2133
- def get_command_input_types(self, sub_parameters: bool = False) -> Tuple[str]:
2703
+ def get_command_input_types(self, sub_parameters: bool = False) -> tuple[str, ...]:
2134
2704
  """Get parameter types from commands.
2135
2705
 
2136
2706
  Parameters
@@ -2140,49 +2710,61 @@ class Action(JSONLike):
2140
2710
  untouched. If False (default), only return the root parameter type and
2141
2711
  disregard the sub-parameter part.
2142
2712
  """
2143
- params = []
2144
- # note: we use "parameter" rather than "input", because it could be a schema input
2145
- # or schema output.
2146
- vars_regex = r"\<\<(?:\w+(?:\[(?:.*)\])?\()?parameter:(.*?)\)?\>\>"
2713
+ params: set[str] = set()
2147
2714
  for command in self.commands:
2148
- for val in re.findall(vars_regex, command.command or ""):
2149
- if not sub_parameters:
2150
- val = val.split(".")[0]
2151
- params.append(val)
2152
- for arg in command.arguments or []:
2153
- for val in re.findall(vars_regex, arg):
2154
- if not sub_parameters:
2155
- val = val.split(".")[0]
2156
- params.append(val)
2715
+ params.update(
2716
+ val[1] if sub_parameters else val[1].split(".")[0]
2717
+ for val in self.__PARAMS_RE.finditer(command.command or "")
2718
+ )
2719
+ for arg in command.arguments or ():
2720
+ params.update(
2721
+ val[1] if sub_parameters else val[1].split(".")[0]
2722
+ for val in self.__PARAMS_RE.finditer(arg)
2723
+ )
2157
2724
  # TODO: consider stdin?
2158
- return tuple(set(params))
2725
+ return tuple(params)
2726
+
2727
+ __FILES_RE: ClassVar[Pattern] = re.compile(r"\<\<file:(.*?)\>\>")
2159
2728
 
2160
- def get_command_input_file_labels(self) -> Tuple[str]:
2729
+ def get_command_file_labels(self) -> tuple[str, ...]:
2161
2730
  """Get input files types from commands."""
2162
- files = []
2163
- vars_regex = r"\<\<file:(.*?)\>\>"
2731
+ files: set[str] = set()
2164
2732
  for command in self.commands:
2165
- for val in re.findall(vars_regex, command.command or ""):
2166
- files.append(val)
2167
- for arg in command.arguments or []:
2168
- for val in re.findall(vars_regex, arg):
2169
- files.append(val)
2733
+ files.update(self.__FILES_RE.findall(command.command or ""))
2734
+ for arg in command.arguments or ():
2735
+ files.update(self.__FILES_RE.findall(arg))
2170
2736
  # TODO: consider stdin?
2171
- return tuple(set(files))
2737
+ return tuple(files)
2172
2738
 
2173
- def get_command_output_types(self) -> Tuple[str]:
2739
+ def get_command_output_types(self) -> tuple[str, ...]:
2174
2740
  """Get parameter types from command stdout and stderr arguments."""
2175
- params = []
2741
+ params: set[str] = set()
2176
2742
  for command in self.commands:
2177
2743
  out_params = command.get_output_types()
2178
2744
  if out_params["stdout"]:
2179
- params.append(out_params["stdout"])
2745
+ params.add(out_params["stdout"])
2180
2746
  if out_params["stderr"]:
2181
- params.append(out_params["stderr"])
2747
+ params.add(out_params["stderr"])
2748
+ return tuple(params)
2182
2749
 
2183
- return tuple(set(params))
2750
+ def get_command_parameter_types(
2751
+ self, sub_parameters: bool = False
2752
+ ) -> tuple[str, ...]:
2753
+ """Get all parameter types that appear in the commands of this action.
2184
2754
 
2185
- def get_input_types(self, sub_parameters: bool = False) -> Tuple[str]:
2755
+ Parameters
2756
+ ----------
2757
+ sub_parameters
2758
+ If True, sub-parameter inputs (i.e. dot-delimited input types) will be
2759
+ returned untouched. If False (default), only return the root parameter type
2760
+ and disregard the sub-parameter part.
2761
+ """
2762
+ # TODO: not sure if we need `input_files`
2763
+ return tuple(
2764
+ f"inputs.{i}" for i in self.get_command_input_types(sub_parameters)
2765
+ ) + tuple(f"input_files.{i}" for i in self.get_command_file_labels())
2766
+
2767
+ def get_input_types(self, sub_parameters: bool = False) -> tuple[str, ...]:
2186
2768
  """Get the input types that are consumed by commands and input file generators of
2187
2769
  this action.
2188
2770
 
@@ -2193,80 +2775,79 @@ class Action(JSONLike):
2193
2775
  inputs will be returned untouched. If False (default), only return the root
2194
2776
  parameter type and disregard the sub-parameter part.
2195
2777
  """
2196
- is_script = (
2778
+ if (
2197
2779
  self.script
2198
2780
  and not self.input_file_generators
2199
2781
  and not self.output_file_parsers
2200
- )
2201
- if is_script:
2202
- params = self.task_schema.input_types
2782
+ ):
2783
+ # TODO: refine this according to `script_data_in`, since this can be used
2784
+ # to control the inputs/outputs of a script.
2785
+ params = set(self.task_schema.input_types)
2203
2786
  else:
2204
- params = list(self.get_command_input_types(sub_parameters))
2205
- for i in self.input_file_generators:
2206
- params.extend([j.typ for j in i.inputs])
2207
- for i in self.output_file_parsers:
2208
- params.extend([j for j in i.inputs or []])
2209
- return tuple(set(params))
2210
-
2211
- def get_output_types(self) -> Tuple[str]:
2787
+ params = set(self.get_command_input_types(sub_parameters))
2788
+ for ifg in self.input_file_generators:
2789
+ params.update(inp.typ for inp in ifg.inputs)
2790
+ for ofp in self.output_file_parsers:
2791
+ params.update(ofp.inputs or ())
2792
+ return tuple(params)
2793
+
2794
+ def get_output_types(self) -> tuple[str, ...]:
2212
2795
  """Get the output types that are produced by command standard outputs and errors,
2213
2796
  and by output file parsers of this action."""
2214
- is_script = (
2797
+ if (
2215
2798
  self.script
2216
2799
  and not self.input_file_generators
2217
2800
  and not self.output_file_parsers
2218
- )
2219
- if is_script:
2220
- params = self.task_schema.output_types
2801
+ ):
2802
+ params = set(self.task_schema.output_types)
2803
+ # TODO: refine this according to `script_data_out`, since this can be used
2804
+ # to control the inputs/outputs of a script.
2221
2805
  else:
2222
- params = list(self.get_command_output_types())
2223
- for i in self.output_file_parsers:
2224
- if i.output is not None:
2225
- params.append(i.output.typ)
2226
- params.extend([j for j in i.outputs or []])
2227
- return tuple(set(params))
2806
+ params = set(self.get_command_output_types())
2807
+ for ofp in self.output_file_parsers:
2808
+ if ofp.output is not None:
2809
+ params.add(ofp.output.typ)
2810
+ params.update(ofp.outputs or ())
2811
+ return tuple(params)
2228
2812
 
2229
- def get_input_file_labels(self):
2813
+ def get_input_file_labels(self) -> tuple[str, ...]:
2230
2814
  """
2231
2815
  Get the labels from the input files.
2232
2816
  """
2233
- return tuple(i.label for i in self.input_files)
2817
+ return tuple(in_f.label for in_f in self.input_files)
2234
2818
 
2235
- def get_output_file_labels(self):
2819
+ def get_output_file_labels(self) -> tuple[str, ...]:
2236
2820
  """
2237
2821
  Get the labels from the output files.
2238
2822
  """
2239
- return tuple(i.label for i in self.output_files)
2823
+ return tuple(out_f.label for out_f in self.output_files)
2240
2824
 
2241
2825
  @TimeIt.decorator
2242
2826
  def generate_data_index(
2243
2827
  self,
2244
- act_idx,
2245
- EAR_ID,
2246
- schema_data_idx,
2247
- all_data_idx,
2248
- workflow,
2249
- param_source,
2250
- ) -> List[int]:
2828
+ act_idx: int,
2829
+ EAR_ID: int,
2830
+ schema_data_idx: DataIndex,
2831
+ all_data_idx: dict[tuple[int, int], DataIndex],
2832
+ workflow: Workflow,
2833
+ param_source: ParamSource,
2834
+ ) -> list[int | list[int]]:
2251
2835
  """Generate the data index for this action of an element iteration whose overall
2252
2836
  data index is passed.
2253
2837
 
2254
2838
  This mutates `all_data_idx`.
2255
-
2256
2839
  """
2257
2840
 
2258
2841
  # output keys must be processed first for this to work, since when processing an
2259
2842
  # output key, we may need to update the index of an output in a previous action's
2260
2843
  # data index, which could affect the data index in an input of this action.
2261
- keys = [f"outputs.{i}" for i in self.get_output_types()]
2262
- keys += [f"inputs.{i}" for i in self.get_input_types()]
2263
- for i in self.input_files:
2264
- keys.append(f"input_files.{i.label}")
2265
- for i in self.output_files:
2266
- keys.append(f"output_files.{i.label}")
2844
+ keys = [f"outputs.{typ}" for typ in self.get_output_types()]
2845
+ keys.extend(f"inputs.{typ}" for typ in self.get_input_types())
2846
+ keys.extend(f"input_files.{file.label}" for file in self.input_files)
2847
+ keys.extend(f"output_files.{file.label}" for file in self.output_files)
2267
2848
 
2268
2849
  # these are consumed by the OFP, so should not be considered to generate new data:
2269
- OFP_outs = [j for i in self.output_file_parsers for j in i.outputs or []]
2850
+ OFP_outs = {j for ofp in self.output_file_parsers for j in ofp.outputs or ()}
2270
2851
 
2271
2852
  # keep all resources and repeats data:
2272
2853
  sub_data_idx = {
@@ -2274,37 +2855,40 @@ class Action(JSONLike):
2274
2855
  for k, v in schema_data_idx.items()
2275
2856
  if ("resources" in k or "repeats" in k)
2276
2857
  }
2277
- param_src_update = []
2858
+ param_src_update: list[int | list[int]] = []
2278
2859
  for key in keys:
2279
- sub_param_idx = {}
2860
+ sub_param_idx: dict[str, int | list[int]] = {}
2280
2861
  if (
2281
2862
  key.startswith("input_files")
2282
2863
  or key.startswith("output_files")
2283
2864
  or key.startswith("inputs")
2284
- or (key.startswith("outputs") and key.split("outputs.")[1] in OFP_outs)
2865
+ or (
2866
+ key.startswith("outputs") and key.removeprefix("outputs.") in OFP_outs
2867
+ )
2285
2868
  ):
2286
2869
  # look for an index in previous data indices (where for inputs we look
2287
2870
  # for *output* parameters of the same name):
2288
- k_idx = None
2871
+ k_idx: int | list[int] | None = None
2289
2872
  for prev_data_idx in all_data_idx.values():
2290
2873
  if key.startswith("inputs"):
2291
- k_param = key.split("inputs.")[1]
2874
+ k_param = key.removeprefix("inputs.")
2292
2875
  k_out = f"outputs.{k_param}"
2293
2876
  if k_out in prev_data_idx:
2294
2877
  k_idx = prev_data_idx[k_out]
2295
-
2296
- else:
2297
- if key in prev_data_idx:
2298
- k_idx = prev_data_idx[key]
2878
+ elif key in prev_data_idx:
2879
+ k_idx = prev_data_idx[key]
2299
2880
 
2300
2881
  if k_idx is None:
2301
2882
  # otherwise take from the schema_data_idx:
2302
2883
  if key in schema_data_idx:
2303
2884
  k_idx = schema_data_idx[key]
2885
+ prefix = f"{key}." # sub-parameter (note dot)
2304
2886
  # add any associated sub-parameters:
2305
- for k, v in schema_data_idx.items():
2306
- if k.startswith(f"{key}."): # sub-parameter (note dot)
2307
- sub_param_idx[k] = v
2887
+ sub_param_idx.update(
2888
+ (k, v)
2889
+ for k, v in schema_data_idx.items()
2890
+ if k.startswith(prefix)
2891
+ )
2308
2892
  else:
2309
2893
  # otherwise we need to allocate a new parameter datum:
2310
2894
  # (for input/output_files keys)
@@ -2313,13 +2897,12 @@ class Action(JSONLike):
2313
2897
  else:
2314
2898
  # outputs
2315
2899
  k_idx = None
2316
- for (act_idx_i, EAR_ID_i), prev_data_idx in all_data_idx.items():
2900
+ for (_, EAR_ID_i), prev_data_idx in all_data_idx.items():
2317
2901
  if key in prev_data_idx:
2318
2902
  k_idx = prev_data_idx[key]
2319
2903
 
2320
2904
  # allocate a new parameter datum for this intermediate output:
2321
- param_source_i = copy.deepcopy(param_source)
2322
- # param_source_i["action_idx"] = act_idx_i
2905
+ param_source_i = copy.copy(param_source)
2323
2906
  param_source_i["EAR_ID"] = EAR_ID_i
2324
2907
  new_k_idx = workflow._add_unset_parameter_data(param_source_i)
2325
2908
 
@@ -2336,36 +2919,48 @@ class Action(JSONLike):
2336
2919
  sub_data_idx[key] = k_idx
2337
2920
  sub_data_idx.update(sub_param_idx)
2338
2921
 
2339
- all_data_idx[(act_idx, EAR_ID)] = sub_data_idx
2922
+ all_data_idx[act_idx, EAR_ID] = sub_data_idx
2340
2923
 
2341
2924
  return param_src_update
2342
2925
 
2343
- def get_possible_scopes(self) -> Tuple[app.ActionScope]:
2926
+ def get_possible_scopes(self) -> tuple[ActionScope, ...]:
2344
2927
  """Get the action scopes that are inclusive of this action, ordered by decreasing
2345
2928
  specificity."""
2346
2929
 
2347
2930
  scope = self.get_precise_scope()
2348
-
2349
2931
  if self.input_file_generators:
2350
- scopes = (
2932
+ return (
2351
2933
  scope,
2352
- self.app.ActionScope.input_file_generator(),
2353
- self.app.ActionScope.processing(),
2354
- self.app.ActionScope.any(),
2934
+ self._app.ActionScope.input_file_generator(),
2935
+ self._app.ActionScope.processing(),
2936
+ self._app.ActionScope.any(),
2355
2937
  )
2356
2938
  elif self.output_file_parsers:
2357
- scopes = (
2939
+ return (
2358
2940
  scope,
2359
- self.app.ActionScope.output_file_parser(),
2360
- self.app.ActionScope.processing(),
2361
- self.app.ActionScope.any(),
2941
+ self._app.ActionScope.output_file_parser(),
2942
+ self._app.ActionScope.processing(),
2943
+ self._app.ActionScope.any(),
2362
2944
  )
2363
2945
  else:
2364
- scopes = (scope, self.app.ActionScope.any())
2946
+ return (scope, self._app.ActionScope.any())
2947
+
2948
+ def _get_possible_scopes_reversed(self) -> Iterator[ActionScope]:
2949
+ """Get the action scopes that are inclusive of this action, ordered by increasing
2950
+ specificity."""
2365
2951
 
2366
- return scopes
2952
+ # Fail early if a failure is possible
2953
+ precise_scope = self.get_precise_scope()
2954
+ yield self._app.ActionScope.any()
2955
+ if self.input_file_generators:
2956
+ yield self._app.ActionScope.processing()
2957
+ yield self._app.ActionScope.input_file_generator()
2958
+ elif self.output_file_parsers:
2959
+ yield self._app.ActionScope.processing()
2960
+ yield self._app.ActionScope.output_file_parser()
2961
+ yield precise_scope
2367
2962
 
2368
- def get_precise_scope(self) -> app.ActionScope:
2963
+ def get_precise_scope(self) -> ActionScope:
2369
2964
  """
2370
2965
  Get the exact scope of this action.
2371
2966
  The action must have been expanded prior to calling this.
@@ -2377,21 +2972,21 @@ class Action(JSONLike):
2377
2972
  )
2378
2973
 
2379
2974
  if self.input_file_generators:
2380
- return self.app.ActionScope.input_file_generator(
2975
+ return self._app.ActionScope.input_file_generator(
2381
2976
  file=self.input_file_generators[0].input_file.label
2382
2977
  )
2383
2978
  elif self.output_file_parsers:
2384
2979
  if self.output_file_parsers[0].output is not None:
2385
- return self.app.ActionScope.output_file_parser(
2980
+ return self._app.ActionScope.output_file_parser(
2386
2981
  output=self.output_file_parsers[0].output
2387
2982
  )
2388
2983
  else:
2389
- return self.app.ActionScope.output_file_parser()
2984
+ return self._app.ActionScope.output_file_parser()
2390
2985
  else:
2391
- return self.app.ActionScope.main()
2986
+ return self._app.ActionScope.main()
2392
2987
 
2393
2988
  def is_input_type_required(
2394
- self, typ: str, provided_files: List[app.FileSpec]
2989
+ self, typ: str, provided_files: Container[FileSpec]
2395
2990
  ) -> bool:
2396
2991
  """
2397
2992
  Determine if the given input type is required by this action.
@@ -2410,38 +3005,30 @@ class Action(JSONLike):
2410
3005
 
2411
3006
  # typ is required if used in any input file generators and input file is not
2412
3007
  # provided:
2413
- for IFG in self.input_file_generators:
2414
- if typ in (i.typ for i in IFG.inputs):
2415
- if IFG.input_file not in provided_files:
2416
- return True
2417
-
2418
- # typ is required if used in any output file parser
2419
- for OFP in self.output_file_parsers:
2420
- if typ in (OFP.inputs or []):
3008
+ for ifg in self.input_file_generators:
3009
+ if typ in (inp.typ for inp in ifg.inputs) and (
3010
+ ifg.input_file not in provided_files
3011
+ ):
2421
3012
  return True
2422
3013
 
2423
- # Appears to be not required
2424
- return False
3014
+ # typ is required if used in any output file parser
3015
+ return any(typ in (ofp.inputs or ()) for ofp in self.output_file_parsers)
2425
3016
 
2426
3017
  @TimeIt.decorator
2427
- def test_rules(self, element_iter) -> Tuple[bool, List[int]]:
3018
+ def test_rules(self, element_iter: ElementIteration) -> tuple[bool, list[int]]:
2428
3019
  """Test all rules against the specified element iteration."""
2429
- rules_valid = [rule.test(element_iteration=element_iter) for rule in self.rules]
2430
- action_valid = all(rules_valid)
2431
- commands_idx = []
2432
- if action_valid:
2433
- for cmd_idx, cmd in enumerate(self.commands):
2434
- if any(not i.test(element_iteration=element_iter) for i in cmd.rules):
2435
- continue
2436
- commands_idx.append(cmd_idx)
2437
- return action_valid, commands_idx
2438
-
2439
- def get_required_executables(self) -> Tuple[str]:
3020
+ if any(not rule.test(element_iteration=element_iter) for rule in self.rules):
3021
+ return False, []
3022
+ return True, [
3023
+ cmd_idx
3024
+ for cmd_idx, cmd in enumerate(self.commands)
3025
+ if all(rule.test(element_iteration=element_iter) for rule in cmd.rules)
3026
+ ]
3027
+
3028
+ def get_required_executables(self) -> Iterator[str]:
2440
3029
  """Return executable labels required by this action."""
2441
- exec_labs = []
2442
3030
  for command in self.commands:
2443
- exec_labs.extend(command.get_required_executables())
2444
- return tuple(set(exec_labs))
3031
+ yield from command.get_required_executables()
2445
3032
 
2446
3033
  def compose_source(self, snip_path: Path) -> str:
2447
3034
  """Generate the file contents of this source."""
@@ -2450,125 +3037,114 @@ class Action(JSONLike):
2450
3037
  with snip_path.open("rt") as fp:
2451
3038
  script_str = fp.read()
2452
3039
 
2453
- if not self.script_is_python:
3040
+ if not self.script_is_python_snippet:
2454
3041
  return script_str
2455
3042
 
3043
+ if self.is_OFP and self.output_file_parsers[0].output is None:
3044
+ # might be used just for saving files:
3045
+ return ""
3046
+
3047
+ app_caps = self._app.package_name.upper()
2456
3048
  py_imports = dedent(
2457
3049
  """\
2458
- import argparse, sys
3050
+ import argparse
3051
+ import os
2459
3052
  from pathlib import Path
2460
3053
 
2461
- parser = argparse.ArgumentParser()
2462
- parser.add_argument("--wk-path")
2463
- parser.add_argument("--run-id", type=int)
2464
- parser.add_argument("--inputs-json")
2465
- parser.add_argument("--inputs-hdf5")
2466
- parser.add_argument("--outputs-json")
2467
- parser.add_argument("--outputs-hdf5")
2468
- args = parser.parse_args()
2469
-
2470
- """
2471
- )
3054
+ import {app_module} as app
2472
3055
 
2473
- # if any direct inputs/outputs, we must load the workflow (must be python):
2474
- if self.script_data_in_has_direct or self.script_data_out_has_direct:
2475
- py_main_block_workflow_load = dedent(
2476
- """\
2477
- import {app_module} as app
2478
- app.load_config(
2479
- log_file_path=Path("{run_log_file}").resolve(),
2480
- config_dir=r"{cfg_dir}",
2481
- config_key=r"{cfg_invoc_key}",
2482
- )
2483
- wk_path, EAR_ID = args.wk_path, args.run_id
2484
- wk = app.Workflow(wk_path)
2485
- EAR = wk.get_EARs_from_IDs([EAR_ID])[0]
2486
- """
2487
- ).format(
2488
- run_log_file=self.app.RunDirAppFiles.get_log_file_name(),
2489
- app_module=self.app.module,
2490
- cfg_dir=self.app.config.config_directory,
2491
- cfg_invoc_key=self.app.config.config_key,
2492
- )
2493
- else:
2494
- py_main_block_workflow_load = ""
3056
+ std_path = os.getenv("{app_caps}_RUN_STD_PATH")
3057
+ log_path = os.getenv("{app_caps}_RUN_LOG_PATH")
3058
+ run_id = int(os.getenv("{app_caps}_RUN_ID"))
3059
+ wk_path = os.getenv("{app_caps}_WK_PATH")
2495
3060
 
2496
- func_kwargs_lst = []
2497
- if "direct" in self.script_data_in_grouped:
2498
- direct_ins_str = "direct_ins = EAR.get_input_values_direct()"
2499
- direct_ins_arg_str = "**direct_ins"
2500
- func_kwargs_lst.append(direct_ins_arg_str)
2501
- else:
2502
- direct_ins_str = ""
3061
+ with app.redirect_std_to_file(std_path):
2503
3062
 
2504
- if self.script_data_in_has_files:
2505
- # need to pass "_input_files" keyword argument to script main function:
2506
- input_files_str = dedent(
2507
- """\
2508
- inp_files = {}
2509
- if args.inputs_json:
2510
- inp_files["json"] = Path(args.inputs_json)
2511
- if args.inputs_hdf5:
2512
- inp_files["hdf5"] = Path(args.inputs_hdf5)
2513
3063
  """
2514
- )
2515
- input_files_arg_str = "_input_files=inp_files"
2516
- func_kwargs_lst.append(input_files_arg_str)
2517
- else:
2518
- input_files_str = ""
3064
+ ).format(app_module=self._app.module, app_caps=app_caps)
2519
3065
 
2520
- if self.script_data_out_has_files:
2521
- # need to pass "_output_files" keyword argument to script main function:
2522
- output_files_str = dedent(
2523
- """\
2524
- out_files = {}
2525
- if args.outputs_json:
2526
- out_files["json"] = Path(args.outputs_json)
2527
- if args.outputs_hdf5:
2528
- out_files["hdf5"] = Path(args.outputs_hdf5)
3066
+ # we must load the workflow (must be python):
3067
+ # (note: we previously only loaded the workflow if there were any direct inputs
3068
+ # or outputs; now we always load so we can use the method
3069
+ # `get_py_script_func_kwargs`)
3070
+ py_main_block_workflow_load = dedent(
3071
+ """\
3072
+ app.load_config(
3073
+ log_file_path=Path(log_path),
3074
+ config_dir=r"{cfg_dir}",
3075
+ config_key=r"{cfg_invoc_key}",
3076
+ )
3077
+ wk = app.Workflow(wk_path)
3078
+ EAR = wk.get_EARs_from_IDs([run_id])[0]
2529
3079
  """
2530
- )
2531
- output_files_arg_str = "_output_files=out_files"
2532
- func_kwargs_lst.append(output_files_arg_str)
3080
+ ).format(
3081
+ cfg_dir=self._app.config.config_directory,
3082
+ cfg_invoc_key=self._app.config.config_key,
3083
+ app_caps=app_caps,
3084
+ )
2533
3085
 
2534
- else:
2535
- output_files_str = ""
3086
+ tab_indent = " "
3087
+ tab_indent_2 = 2 * tab_indent
3088
+
3089
+ func_kwargs_str = dedent(
3090
+ """\
3091
+ blk_act_key = (
3092
+ os.environ["{app_caps}_JS_IDX"],
3093
+ os.environ["{app_caps}_BLOCK_IDX"],
3094
+ os.environ["{app_caps}_BLOCK_ACT_IDX"],
3095
+ )
3096
+ with EAR.raise_on_failure_threshold() as unset_params:
3097
+ func_kwargs = EAR.get_py_script_func_kwargs(
3098
+ raise_on_unset=False,
3099
+ add_script_files=True,
3100
+ blk_act_key=blk_act_key,
3101
+ )
3102
+ """
3103
+ ).format(app_caps=app_caps)
2536
3104
 
2537
3105
  script_main_func = Path(script_name).stem
2538
- func_invoke_str = f"{script_main_func}({', '.join(func_kwargs_lst)})"
2539
- if "direct" in self.script_data_out_grouped:
3106
+ func_invoke_str = f"{script_main_func}(**func_kwargs)"
3107
+ if not self.is_OFP and "direct" in self.script_data_out_grouped:
2540
3108
  py_main_block_invoke = f"outputs = {func_invoke_str}"
2541
3109
  py_main_block_outputs = dedent(
2542
3110
  """\
2543
- outputs = {"outputs." + k: v for k, v in outputs.items()}
2544
- for name_i, out_i in outputs.items():
2545
- wk.set_parameter_value(param_id=EAR.data_idx[name_i], value=out_i)
3111
+ with app.redirect_std_to_file(std_path):
3112
+ for name_i, out_i in outputs.items():
3113
+ wk.set_parameter_value(param_id=EAR.data_idx[f"outputs.{name_i}"], value=out_i)
2546
3114
  """
2547
3115
  )
3116
+ elif self.is_OFP:
3117
+ py_main_block_invoke = f"output = {func_invoke_str}"
3118
+ assert self.output_file_parsers[0].output
3119
+ py_main_block_outputs = dedent(
3120
+ """\
3121
+ with app.redirect_std_to_file(std_path):
3122
+ wk.save_parameter(name="outputs.{output_typ}", value=output, EAR_ID=run_id)
3123
+ """
3124
+ ).format(output_typ=self.output_file_parsers[0].output.typ)
2548
3125
  else:
2549
3126
  py_main_block_invoke = func_invoke_str
2550
3127
  py_main_block_outputs = ""
2551
3128
 
2552
- tab_indent = " "
3129
+ wk_load = (
3130
+ "\n" + indent(py_main_block_workflow_load, tab_indent_2)
3131
+ if py_main_block_workflow_load
3132
+ else ""
3133
+ )
2553
3134
  py_main_block = dedent(
2554
3135
  """\
2555
3136
  if __name__ == "__main__":
2556
- {py_imports}
2557
- {wk_load}
2558
- {direct_ins}
2559
- {in_files}
2560
- {out_files}
3137
+ {py_imports}{wk_load}
3138
+ {func_kwargs}
2561
3139
  {invoke}
2562
3140
  {outputs}
2563
3141
  """
2564
3142
  ).format(
2565
3143
  py_imports=indent(py_imports, tab_indent),
2566
- wk_load=indent(py_main_block_workflow_load, tab_indent),
2567
- direct_ins=indent(direct_ins_str, tab_indent),
2568
- in_files=indent(input_files_str, tab_indent),
2569
- out_files=indent(output_files_str, tab_indent),
3144
+ wk_load=wk_load,
3145
+ func_kwargs=indent(func_kwargs_str, tab_indent_2),
2570
3146
  invoke=indent(py_main_block_invoke, tab_indent),
2571
- outputs=indent(py_main_block_outputs, tab_indent),
3147
+ outputs=indent(dedent(py_main_block_outputs), tab_indent),
2572
3148
  )
2573
3149
 
2574
3150
  out = dedent(
@@ -2583,7 +3159,7 @@ class Action(JSONLike):
2583
3159
 
2584
3160
  return out
2585
3161
 
2586
- def get_parameter_names(self, prefix: str) -> List[str]:
3162
+ def get_parameter_names(self, prefix: str) -> list[str]:
2587
3163
  """Get parameter types associated with a given prefix.
2588
3164
 
2589
3165
  For example, with the prefix "inputs", this would return `['p1', 'p2']` for an
@@ -2604,11 +3180,147 @@ class Action(JSONLike):
2604
3180
  """
2605
3181
  if prefix == "inputs":
2606
3182
  single_lab_lookup = self.task_schema._get_single_label_lookup()
2607
- out = list(single_lab_lookup.get(i, i) for i in self.get_input_types())
3183
+ return [single_lab_lookup.get(i, i) for i in self.get_input_types()]
2608
3184
  elif prefix == "outputs":
2609
- out = list(f"{i}" for i in self.get_output_types())
3185
+ return list(self.get_output_types())
2610
3186
  elif prefix == "input_files":
2611
- out = list(f"{i}" for i in self.get_input_file_labels())
3187
+ return list(self.get_input_file_labels())
2612
3188
  elif prefix == "output_files":
2613
- out = list(f"{i}" for i in self.get_output_file_labels())
2614
- return out
3189
+ return list(self.get_output_file_labels())
3190
+ else:
3191
+ raise ValueError(f"unexpected prefix: {prefix}")
3192
+
3193
+ def get_commands_file_hash(self, data_idx: DataIndex, action_idx: int) -> int:
3194
+ """Get a hash that can be used to group together runs that will have the same
3195
+ commands file.
3196
+
3197
+ This hash is not stable across sessions or machines.
3198
+
3199
+ """
3200
+
3201
+ # filter data index by input parameters that appear in the commands, or are used in
3202
+ # rules in conditional commands:
3203
+ param_types = self.get_command_parameter_types()
3204
+
3205
+ relevant_paths: list[str] = []
3206
+ for i in param_types:
3207
+ relevant_paths.extend(
3208
+ list(WorkflowTask._get_relevant_paths(data_idx, i.split(".")).keys())
3209
+ )
3210
+
3211
+ # hash any relevant data index from rule path
3212
+ for cmd in self.commands:
3213
+ for act_rule in cmd.rules:
3214
+ rule_path = act_rule.rule.path
3215
+ assert rule_path
3216
+ rule_path_split = rule_path.split(".")
3217
+ if rule_path.startswith("resources."):
3218
+ # include all resource paths for now:
3219
+ relevant_paths.extend(
3220
+ list(
3221
+ WorkflowTask._get_relevant_paths(
3222
+ data_idx, ["resources"]
3223
+ ).keys()
3224
+ )
3225
+ )
3226
+ else:
3227
+ relevant_paths.extend(
3228
+ list(
3229
+ WorkflowTask._get_relevant_paths(
3230
+ data_idx, rule_path_split
3231
+ ).keys()
3232
+ )
3233
+ )
3234
+
3235
+ # note we don't need to consider action-level rules, since these determine
3236
+ # whether a run will be included in a submission or not; this method is only
3237
+ # called on runs that are part of a submission, at which point action-level rules
3238
+ # are irrelevant.
3239
+
3240
+ relevant_data_idx = {k: v for k, v in data_idx.items() if k in relevant_paths}
3241
+
3242
+ try:
3243
+ schema_name = self.task_schema.name
3244
+ except AssertionError:
3245
+ # allows for testing without making a schema
3246
+ schema_name = ""
3247
+
3248
+ return get_hash(
3249
+ (
3250
+ schema_name,
3251
+ action_idx,
3252
+ relevant_data_idx,
3253
+ )
3254
+ )
3255
+
3256
+ @classmethod
3257
+ def get_block_act_idx_shell_vars(cls) -> BlockActionKey:
3258
+ """Return a the jobscript index, block index, and block action idx shell
3259
+ environment variable names formatted for shell substitution.
3260
+
3261
+ Notes
3262
+ -----
3263
+ This seem so be shell-agnostic, at least for those currently supported.
3264
+
3265
+ """
3266
+ app_caps = cls._app.package_name.upper()
3267
+ return (
3268
+ f"${{{app_caps}_JS_IDX}}",
3269
+ f"${{{app_caps}_BLOCK_IDX}}",
3270
+ f"${{{app_caps}_BLOCK_ACT_IDX}}",
3271
+ )
3272
+
3273
+ def get_script_input_output_file_paths(
3274
+ self,
3275
+ block_act_key: BlockActionKey,
3276
+ directory: Path | None = None,
3277
+ ) -> dict[str, dict[str, Path]]:
3278
+ """Get the names (as `Path`s) of script input and output files for this action."""
3279
+ in_out_paths: dict[str, dict[str, Path]] = {
3280
+ "inputs": {},
3281
+ "outputs": {},
3282
+ }
3283
+ for fmt in self.script_data_in_grouped:
3284
+ if fmt == "json":
3285
+ path = self.get_param_dump_file_path_JSON(
3286
+ block_act_key, directory=directory
3287
+ )
3288
+ elif fmt == "hdf5":
3289
+ path = self.get_param_dump_file_path_HDF5(
3290
+ block_act_key, directory=directory
3291
+ )
3292
+ else:
3293
+ continue
3294
+ in_out_paths["inputs"][fmt] = path
3295
+
3296
+ for fmt in self.script_data_out_grouped:
3297
+ if fmt == "json":
3298
+ path = self.get_param_load_file_path_JSON(
3299
+ block_act_key, directory=directory
3300
+ )
3301
+ elif fmt == "hdf5":
3302
+ path = self.get_param_load_file_path_HDF5(
3303
+ block_act_key, directory=directory
3304
+ )
3305
+ else:
3306
+ continue
3307
+ in_out_paths["outputs"][fmt] = path
3308
+
3309
+ return in_out_paths
3310
+
3311
+ def get_script_input_output_file_command_args(self) -> list[str]:
3312
+ """Get the script input and output file names as command line arguments."""
3313
+ in_out_names = self.get_script_input_output_file_paths(
3314
+ self.get_block_act_idx_shell_vars()
3315
+ )
3316
+ args: list[str] = []
3317
+ for fmt, path in in_out_names["inputs"].items():
3318
+ if self.script_data_files_use_opt:
3319
+ args.append(f"--inputs-{fmt}")
3320
+ args.append(str(path))
3321
+ for fmt, path in in_out_names["outputs"].items():
3322
+ if self.script_data_files_use_opt:
3323
+ args.append(f"--outputs-{fmt}")
3324
+ args.append(str(path))
3325
+
3326
+ return args