hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. hpcflow/__init__.py +2 -11
  2. hpcflow/__pyinstaller/__init__.py +5 -0
  3. hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
  4. hpcflow/_version.py +1 -1
  5. hpcflow/app.py +43 -0
  6. hpcflow/cli.py +2 -461
  7. hpcflow/data/demo_data_manifest/__init__.py +3 -0
  8. hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
  9. hpcflow/data/jinja_templates/test/test_template.txt +8 -0
  10. hpcflow/data/programs/hello_world/README.md +1 -0
  11. hpcflow/data/programs/hello_world/hello_world.c +87 -0
  12. hpcflow/data/programs/hello_world/linux/hello_world +0 -0
  13. hpcflow/data/programs/hello_world/macos/hello_world +0 -0
  14. hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
  15. hpcflow/data/scripts/__init__.py +1 -0
  16. hpcflow/data/scripts/bad_script.py +2 -0
  17. hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
  18. hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
  19. hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
  20. hpcflow/data/scripts/do_nothing.py +2 -0
  21. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  22. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  23. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  24. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  25. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  26. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  27. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  28. hpcflow/data/scripts/generate_t1_file_01.py +7 -0
  29. hpcflow/data/scripts/import_future_script.py +7 -0
  30. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  31. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  32. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  33. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  34. hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
  35. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  36. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  37. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  38. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  39. hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
  40. hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
  41. hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
  42. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  43. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  44. hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
  45. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
  46. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  47. hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
  48. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
  49. hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
  50. hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
  51. hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
  52. hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
  53. hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
  54. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  55. hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
  56. hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
  57. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  58. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  59. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  60. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  61. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  62. hpcflow/data/scripts/parse_t1_file_01.py +4 -0
  63. hpcflow/data/scripts/script_exit_test.py +5 -0
  64. hpcflow/data/template_components/__init__.py +1 -0
  65. hpcflow/data/template_components/command_files.yaml +26 -0
  66. hpcflow/data/template_components/environments.yaml +13 -0
  67. hpcflow/data/template_components/parameters.yaml +14 -0
  68. hpcflow/data/template_components/task_schemas.yaml +139 -0
  69. hpcflow/data/workflows/workflow_1.yaml +5 -0
  70. hpcflow/examples.ipynb +1037 -0
  71. hpcflow/sdk/__init__.py +149 -0
  72. hpcflow/sdk/app.py +4266 -0
  73. hpcflow/sdk/cli.py +1479 -0
  74. hpcflow/sdk/cli_common.py +385 -0
  75. hpcflow/sdk/config/__init__.py +5 -0
  76. hpcflow/sdk/config/callbacks.py +246 -0
  77. hpcflow/sdk/config/cli.py +388 -0
  78. hpcflow/sdk/config/config.py +1410 -0
  79. hpcflow/sdk/config/config_file.py +501 -0
  80. hpcflow/sdk/config/errors.py +272 -0
  81. hpcflow/sdk/config/types.py +150 -0
  82. hpcflow/sdk/core/__init__.py +38 -0
  83. hpcflow/sdk/core/actions.py +3857 -0
  84. hpcflow/sdk/core/app_aware.py +25 -0
  85. hpcflow/sdk/core/cache.py +224 -0
  86. hpcflow/sdk/core/command_files.py +814 -0
  87. hpcflow/sdk/core/commands.py +424 -0
  88. hpcflow/sdk/core/element.py +2071 -0
  89. hpcflow/sdk/core/enums.py +221 -0
  90. hpcflow/sdk/core/environment.py +256 -0
  91. hpcflow/sdk/core/errors.py +1043 -0
  92. hpcflow/sdk/core/execute.py +207 -0
  93. hpcflow/sdk/core/json_like.py +809 -0
  94. hpcflow/sdk/core/loop.py +1320 -0
  95. hpcflow/sdk/core/loop_cache.py +282 -0
  96. hpcflow/sdk/core/object_list.py +933 -0
  97. hpcflow/sdk/core/parameters.py +3371 -0
  98. hpcflow/sdk/core/rule.py +196 -0
  99. hpcflow/sdk/core/run_dir_files.py +57 -0
  100. hpcflow/sdk/core/skip_reason.py +7 -0
  101. hpcflow/sdk/core/task.py +3792 -0
  102. hpcflow/sdk/core/task_schema.py +993 -0
  103. hpcflow/sdk/core/test_utils.py +538 -0
  104. hpcflow/sdk/core/types.py +447 -0
  105. hpcflow/sdk/core/utils.py +1207 -0
  106. hpcflow/sdk/core/validation.py +87 -0
  107. hpcflow/sdk/core/values.py +477 -0
  108. hpcflow/sdk/core/workflow.py +4820 -0
  109. hpcflow/sdk/core/zarr_io.py +206 -0
  110. hpcflow/sdk/data/__init__.py +13 -0
  111. hpcflow/sdk/data/config_file_schema.yaml +34 -0
  112. hpcflow/sdk/data/config_schema.yaml +260 -0
  113. hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
  114. hpcflow/sdk/data/files_spec_schema.yaml +5 -0
  115. hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
  116. hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
  117. hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
  118. hpcflow/sdk/demo/__init__.py +3 -0
  119. hpcflow/sdk/demo/cli.py +242 -0
  120. hpcflow/sdk/helper/__init__.py +3 -0
  121. hpcflow/sdk/helper/cli.py +137 -0
  122. hpcflow/sdk/helper/helper.py +300 -0
  123. hpcflow/sdk/helper/watcher.py +192 -0
  124. hpcflow/sdk/log.py +288 -0
  125. hpcflow/sdk/persistence/__init__.py +18 -0
  126. hpcflow/sdk/persistence/base.py +2817 -0
  127. hpcflow/sdk/persistence/defaults.py +6 -0
  128. hpcflow/sdk/persistence/discovery.py +39 -0
  129. hpcflow/sdk/persistence/json.py +954 -0
  130. hpcflow/sdk/persistence/pending.py +948 -0
  131. hpcflow/sdk/persistence/store_resource.py +203 -0
  132. hpcflow/sdk/persistence/types.py +309 -0
  133. hpcflow/sdk/persistence/utils.py +73 -0
  134. hpcflow/sdk/persistence/zarr.py +2388 -0
  135. hpcflow/sdk/runtime.py +320 -0
  136. hpcflow/sdk/submission/__init__.py +3 -0
  137. hpcflow/sdk/submission/enums.py +70 -0
  138. hpcflow/sdk/submission/jobscript.py +2379 -0
  139. hpcflow/sdk/submission/schedulers/__init__.py +281 -0
  140. hpcflow/sdk/submission/schedulers/direct.py +233 -0
  141. hpcflow/sdk/submission/schedulers/sge.py +376 -0
  142. hpcflow/sdk/submission/schedulers/slurm.py +598 -0
  143. hpcflow/sdk/submission/schedulers/utils.py +25 -0
  144. hpcflow/sdk/submission/shells/__init__.py +52 -0
  145. hpcflow/sdk/submission/shells/base.py +229 -0
  146. hpcflow/sdk/submission/shells/bash.py +504 -0
  147. hpcflow/sdk/submission/shells/os_version.py +115 -0
  148. hpcflow/sdk/submission/shells/powershell.py +352 -0
  149. hpcflow/sdk/submission/submission.py +1402 -0
  150. hpcflow/sdk/submission/types.py +140 -0
  151. hpcflow/sdk/typing.py +194 -0
  152. hpcflow/sdk/utils/arrays.py +69 -0
  153. hpcflow/sdk/utils/deferred_file.py +55 -0
  154. hpcflow/sdk/utils/hashing.py +16 -0
  155. hpcflow/sdk/utils/patches.py +31 -0
  156. hpcflow/sdk/utils/strings.py +69 -0
  157. hpcflow/tests/api/test_api.py +32 -0
  158. hpcflow/tests/conftest.py +123 -0
  159. hpcflow/tests/data/__init__.py +0 -0
  160. hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
  161. hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
  162. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  163. hpcflow/tests/data/workflow_1.json +10 -0
  164. hpcflow/tests/data/workflow_1.yaml +5 -0
  165. hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
  166. hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
  167. hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
  168. hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
  169. hpcflow/tests/programs/test_programs.py +180 -0
  170. hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
  171. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  172. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
  173. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  174. hpcflow/tests/scripts/test_main_scripts.py +1361 -0
  175. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  176. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  177. hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
  178. hpcflow/tests/unit/test_action.py +1066 -0
  179. hpcflow/tests/unit/test_action_rule.py +24 -0
  180. hpcflow/tests/unit/test_app.py +132 -0
  181. hpcflow/tests/unit/test_cache.py +46 -0
  182. hpcflow/tests/unit/test_cli.py +172 -0
  183. hpcflow/tests/unit/test_command.py +377 -0
  184. hpcflow/tests/unit/test_config.py +195 -0
  185. hpcflow/tests/unit/test_config_file.py +162 -0
  186. hpcflow/tests/unit/test_element.py +666 -0
  187. hpcflow/tests/unit/test_element_iteration.py +88 -0
  188. hpcflow/tests/unit/test_element_set.py +158 -0
  189. hpcflow/tests/unit/test_group.py +115 -0
  190. hpcflow/tests/unit/test_input_source.py +1479 -0
  191. hpcflow/tests/unit/test_input_value.py +398 -0
  192. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  193. hpcflow/tests/unit/test_json_like.py +1247 -0
  194. hpcflow/tests/unit/test_loop.py +2674 -0
  195. hpcflow/tests/unit/test_meta_task.py +325 -0
  196. hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
  197. hpcflow/tests/unit/test_object_list.py +116 -0
  198. hpcflow/tests/unit/test_parameter.py +243 -0
  199. hpcflow/tests/unit/test_persistence.py +664 -0
  200. hpcflow/tests/unit/test_resources.py +243 -0
  201. hpcflow/tests/unit/test_run.py +286 -0
  202. hpcflow/tests/unit/test_run_directories.py +29 -0
  203. hpcflow/tests/unit/test_runtime.py +9 -0
  204. hpcflow/tests/unit/test_schema_input.py +372 -0
  205. hpcflow/tests/unit/test_shell.py +129 -0
  206. hpcflow/tests/unit/test_slurm.py +39 -0
  207. hpcflow/tests/unit/test_submission.py +502 -0
  208. hpcflow/tests/unit/test_task.py +2560 -0
  209. hpcflow/tests/unit/test_task_schema.py +182 -0
  210. hpcflow/tests/unit/test_utils.py +616 -0
  211. hpcflow/tests/unit/test_value_sequence.py +549 -0
  212. hpcflow/tests/unit/test_values.py +91 -0
  213. hpcflow/tests/unit/test_workflow.py +827 -0
  214. hpcflow/tests/unit/test_workflow_template.py +186 -0
  215. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  216. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  217. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  218. hpcflow/tests/unit/utils/test_patches.py +5 -0
  219. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  220. hpcflow/tests/unit/utils/test_strings.py +97 -0
  221. hpcflow/tests/workflows/__init__.py +0 -0
  222. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  223. hpcflow/tests/workflows/test_jobscript.py +355 -0
  224. hpcflow/tests/workflows/test_run_status.py +198 -0
  225. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  226. hpcflow/tests/workflows/test_submission.py +140 -0
  227. hpcflow/tests/workflows/test_workflows.py +564 -0
  228. hpcflow/tests/workflows/test_zip.py +18 -0
  229. hpcflow/viz_demo.ipynb +6794 -0
  230. hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
  231. hpcflow-0.2.0a271.dist-info/METADATA +65 -0
  232. hpcflow-0.2.0a271.dist-info/RECORD +237 -0
  233. {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
  234. hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
  235. hpcflow/api.py +0 -490
  236. hpcflow/archive/archive.py +0 -307
  237. hpcflow/archive/cloud/cloud.py +0 -45
  238. hpcflow/archive/cloud/errors.py +0 -9
  239. hpcflow/archive/cloud/providers/dropbox.py +0 -427
  240. hpcflow/archive/errors.py +0 -5
  241. hpcflow/base_db.py +0 -4
  242. hpcflow/config.py +0 -233
  243. hpcflow/copytree.py +0 -66
  244. hpcflow/data/examples/_config.yml +0 -14
  245. hpcflow/data/examples/damask/demo/1.run.yml +0 -4
  246. hpcflow/data/examples/damask/demo/2.process.yml +0 -29
  247. hpcflow/data/examples/damask/demo/geom.geom +0 -2052
  248. hpcflow/data/examples/damask/demo/load.load +0 -1
  249. hpcflow/data/examples/damask/demo/material.config +0 -185
  250. hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
  251. hpcflow/data/examples/damask/inputs/load.load +0 -1
  252. hpcflow/data/examples/damask/inputs/material.config +0 -185
  253. hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
  254. hpcflow/data/examples/damask/profiles/damask.yml +0 -4
  255. hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
  256. hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
  257. hpcflow/data/examples/damask/profiles/default.yml +0 -6
  258. hpcflow/data/examples/thinking.yml +0 -177
  259. hpcflow/errors.py +0 -2
  260. hpcflow/init_db.py +0 -37
  261. hpcflow/models.py +0 -2595
  262. hpcflow/nesting.py +0 -9
  263. hpcflow/profiles.py +0 -455
  264. hpcflow/project.py +0 -81
  265. hpcflow/scheduler.py +0 -322
  266. hpcflow/utils.py +0 -103
  267. hpcflow/validation.py +0 -166
  268. hpcflow/variables.py +0 -543
  269. hpcflow-0.1.15.dist-info/METADATA +0 -168
  270. hpcflow-0.1.15.dist-info/RECORD +0 -45
  271. hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
  272. hpcflow-0.1.15.dist-info/top_level.txt +0 -1
  273. /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
  274. /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
  275. /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
@@ -0,0 +1,3857 @@
1
+ """
2
+ Actions are base components of elements.
3
+ Element action runs (EARs) are the basic components of any enactment;
4
+ they may be grouped together within a jobscript for efficiency.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from collections.abc import Mapping
9
+ import copy
10
+ from dataclasses import dataclass
11
+ import json
12
+ import contextlib
13
+ from collections import defaultdict
14
+ from pathlib import Path
15
+ import re
16
+ import warnings
17
+ from functools import partial
18
+ from itertools import chain
19
+ from textwrap import indent, dedent
20
+ from typing import cast, final, overload, TYPE_CHECKING
21
+ from typing_extensions import override
22
+
23
+ from watchdog.utils.dirsnapshot import DirectorySnapshotDiff
24
+
25
+ from hpcflow.sdk.core import ABORT_EXIT_CODE
26
+ from hpcflow.sdk.core.app_aware import AppAware
27
+ from hpcflow.sdk.core.enums import ActionScopeType, EARStatus
28
+ from hpcflow.sdk.core.skip_reason import SkipReason
29
+ from hpcflow.sdk.core.task import WorkflowTask
30
+ from hpcflow.sdk.core.errors import (
31
+ ActionEnvironmentMissingNameError,
32
+ MissingCompatibleActionEnvironment,
33
+ OutputFileParserNoOutputError,
34
+ UnknownActionDataKey,
35
+ UnknownActionDataParameter,
36
+ UnsupportedActionDataFormat,
37
+ UnsetParameterDataError,
38
+ UnsetParameterFractionLimitExceededError,
39
+ UnsetParameterNumberLimitExceededError,
40
+ )
41
+ from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
42
+ from hpcflow.sdk.core.parameters import ParameterValue
43
+ from hpcflow.sdk.typing import ParamSource, hydrate
44
+ from hpcflow.sdk.core.utils import (
45
+ JSONLikeDirSnapShot,
46
+ split_param_label,
47
+ swap_nested_dict_keys,
48
+ get_relative_path,
49
+ )
50
+ from hpcflow.sdk.log import TimeIt
51
+ from hpcflow.sdk.core.run_dir_files import RunDirAppFiles
52
+ from hpcflow.sdk.submission.enums import SubmissionStatus
53
+ from hpcflow.sdk.submission.submission import Submission
54
+ from hpcflow.sdk.utils.hashing import get_hash
55
+
56
+ from jinja2 import (
57
+ Environment as JinjaEnvironment,
58
+ FileSystemLoader as JinjaFileSystemLoader,
59
+ Template as JinjaTemplate,
60
+ meta as jinja_meta,
61
+ )
62
+
63
+ if TYPE_CHECKING:
64
+ from collections.abc import Callable, Container, Iterable, Iterator, Sequence
65
+ from datetime import datetime
66
+ from re import Pattern
67
+ from typing import Any, ClassVar, Literal
68
+ from typing_extensions import Self
69
+ from valida.conditions import ConditionLike # type: ignore
70
+
71
+ from ..typing import DataIndex, ParamSource
72
+ from ..submission.shells import Shell
73
+ from ..submission.jobscript import Jobscript
74
+ from .commands import Command
75
+ from .command_files import InputFileGenerator, OutputFileParser, FileSpec
76
+ from .element import (
77
+ Element,
78
+ ElementIteration,
79
+ ElementInputs,
80
+ ElementOutputs,
81
+ ElementResources,
82
+ ElementInputFiles,
83
+ ElementOutputFiles,
84
+ )
85
+ from .environment import Environment
86
+ from .parameters import SchemaParameter, Parameter
87
+ from .rule import Rule
88
+ from .task import WorkflowTask
89
+ from .task_schema import TaskSchema
90
+ from .types import ParameterDependence, ActionData, BlockActionKey
91
+ from .workflow import Workflow
92
+ from .object_list import EnvironmentsList
93
+
94
+ ACTION_SCOPE_REGEX = r"(\w*)(?:\[(.*)\])?"
95
+
96
+
97
+ @dataclass
98
+ class UnsetParamTracker:
99
+ """Class to track run IDs that are the sources of unset parameter data for some input
100
+ parameter type.
101
+
102
+ Attributes
103
+ ----------
104
+ run_ids
105
+ Set of integer run IDs that have been tracked.
106
+ group_size
107
+ The size of the group, if the associated SchemaInput in question is a group.
108
+
109
+ Notes
110
+ -----
111
+ Objects of this class are instantiated within
112
+ `WorkflowTask._get_merged_parameter_data` when we are tracking unset parameters.
113
+
114
+ """
115
+
116
+ run_ids: set[int]
117
+ group_size: int
118
+
119
+
120
+ #: Keyword arguments permitted for particular scopes.
121
+ ACTION_SCOPE_ALLOWED_KWARGS: Mapping[str, frozenset[str]] = {
122
+ ActionScopeType.ANY.name: frozenset(),
123
+ ActionScopeType.MAIN.name: frozenset(),
124
+ ActionScopeType.PROCESSING.name: frozenset(),
125
+ ActionScopeType.INPUT_FILE_GENERATOR.name: frozenset({"file"}),
126
+ ActionScopeType.OUTPUT_FILE_PARSER.name: frozenset({"output"}),
127
+ }
128
+
129
+
130
+ class ElementActionRun(AppAware):
131
+ """
132
+ The Element Action Run (EAR) is an atomic unit of an enacted workflow, representing
133
+ one unit of work (e.g., particular submitted job to run a program) within that
134
+ overall workflow. With looping over, say, parameter spaces, there may be many EARs
135
+ per element.
136
+
137
+ Parameters
138
+ ----------
139
+ id_: int
140
+ The ID of the EAR.
141
+ is_pending: bool
142
+ Whether this EAR is pending.
143
+ element_action:
144
+ The particular element action that this is a run of.
145
+ index: int:
146
+ The index of the run within the collection of runs.
147
+ data_idx: dict
148
+ Used for looking up input data to the EAR.
149
+ commands_idx: list[int]
150
+ Indices of commands to apply.
151
+ start_time: datetime
152
+ Time of start of run, if the run has ever been started.
153
+ end_time: datetime
154
+ Time of end of run, if the run has ever ended.
155
+ snapshot_start: dict
156
+ Parameters for taking a snapshot of the data directory before the run.
157
+ If unspecified, no snapshot will be taken.
158
+ snapshot_end: dict
159
+ Parameters for taking a snapshot of the data directory after the run.
160
+ If unspecified, no snapshot will be taken.
161
+ submission_idx: int
162
+ What submission was this (if it has been submitted)?
163
+ success: bool
164
+ Whether this EAR succeeded (if it has run).
165
+ skip: bool
166
+ Whether this EAR was skipped.
167
+ exit_code: int
168
+ The exit code, if known.
169
+ metadata: dict
170
+ Metadata about the EAR.
171
+ run_hostname: str
172
+ Where to run the EAR (if not locally).
173
+ """
174
+
175
+ def __init__(
176
+ self,
177
+ id_: int,
178
+ is_pending: bool,
179
+ element_action: ElementAction,
180
+ index: int,
181
+ data_idx: DataIndex,
182
+ commands_idx: list[int],
183
+ start_time: datetime | None,
184
+ end_time: datetime | None,
185
+ snapshot_start: dict[str, Any] | None,
186
+ snapshot_end: dict[str, Any] | None,
187
+ submission_idx: int | None,
188
+ commands_file_ID: int | None,
189
+ success: bool | None,
190
+ skip: int,
191
+ exit_code: int | None,
192
+ metadata: dict[str, Any],
193
+ run_hostname: str | None,
194
+ port_number: int | None,
195
+ ) -> None:
196
+ self._id = id_
197
+ self._is_pending = is_pending
198
+ self._element_action = element_action
199
+ self._index = index # local index of this run with the action
200
+ self._data_idx = data_idx
201
+ self._commands_idx = commands_idx
202
+ self._start_time = start_time
203
+ self._end_time = end_time
204
+ self._submission_idx = submission_idx
205
+ self._commands_file_ID = commands_file_ID
206
+ self._success = success
207
+ self._skip = skip
208
+ self._snapshot_start = snapshot_start
209
+ self._snapshot_end = snapshot_end
210
+ self._exit_code = exit_code
211
+ self._metadata = metadata
212
+ self._run_hostname = run_hostname
213
+ self._port_number = port_number
214
+
215
+ # assigned on first access of corresponding properties:
216
+ self._inputs: ElementInputs | None = None
217
+ self._outputs: ElementOutputs | None = None
218
+ self._resources: ElementResources | None = None
219
+ self._resources_with_defaults: ElementResources | None = None
220
+ self._input_files: ElementInputFiles | None = None
221
+ self._output_files: ElementOutputFiles | None = None
222
+ self._ss_start_obj: JSONLikeDirSnapShot | None = None
223
+ self._ss_end_obj: JSONLikeDirSnapShot | None = None
224
+ self._ss_diff_obj: DirectorySnapshotDiff | None = None
225
+
226
+ def __repr__(self) -> str:
227
+ return (
228
+ f"{self.__class__.__name__}("
229
+ f"id={self.id_!r}, index={self.index!r}, "
230
+ f"element_action={self.element_action!r})"
231
+ )
232
+
233
+ @property
234
+ def id_(self) -> int:
235
+ """
236
+ The ID of the EAR.
237
+ """
238
+ return self._id
239
+
240
+ @property
241
+ def is_pending(self) -> bool:
242
+ """
243
+ Whether this EAR is pending.
244
+ """
245
+ return self._is_pending
246
+
247
+ @property
248
+ def element_action(self) -> ElementAction:
249
+ """
250
+ The particular element action that this is a run of.
251
+ """
252
+ return self._element_action
253
+
254
+ @property
255
+ def index(self) -> int:
256
+ """Run index."""
257
+ return self._index
258
+
259
+ @property
260
+ def action(self) -> Action:
261
+ """
262
+ The action this is a run of.
263
+ """
264
+ return self.element_action.action
265
+
266
+ @property
267
+ def element_iteration(self) -> ElementIteration:
268
+ """
269
+ The iteration information of this run.
270
+ """
271
+ return self.element_action.element_iteration
272
+
273
+ @property
274
+ def element(self) -> Element:
275
+ """
276
+ The element this is a run of.
277
+ """
278
+ return self.element_iteration.element
279
+
280
+ @property
281
+ def workflow(self) -> Workflow:
282
+ """
283
+ The workflow this is a run of.
284
+ """
285
+ return self.element_iteration.workflow
286
+
287
+ @property
288
+ def data_idx(self) -> DataIndex:
289
+ """
290
+ Used for looking up input data to the EAR.
291
+ """
292
+ return self._data_idx
293
+
294
+ @property
295
+ def commands_idx(self) -> Sequence[int]:
296
+ """
297
+ Indices of commands to apply.
298
+ """
299
+ return self._commands_idx
300
+
301
+ @property
302
+ def metadata(self) -> Mapping[str, Any]:
303
+ """
304
+ Metadata about the EAR.
305
+ """
306
+ return self._metadata
307
+
308
+ @property
309
+ def run_hostname(self) -> str | None:
310
+ """
311
+ Where to run the EAR, if known/specified.
312
+ """
313
+ return self._run_hostname
314
+
315
+ @property
316
+ def port_number(self):
317
+ return self._port_number
318
+
319
+ @property
320
+ def start_time(self) -> datetime | None:
321
+ """
322
+ When the EAR started.
323
+ """
324
+ return self._start_time
325
+
326
+ @property
327
+ def end_time(self) -> datetime | None:
328
+ """
329
+ When the EAR finished.
330
+ """
331
+ return self._end_time
332
+
333
+ @property
334
+ def submission_idx(self) -> int | None:
335
+ """
336
+ What actual submission index was this?
337
+ """
338
+ return self._submission_idx
339
+
340
+ @property
341
+ def commands_file_ID(self):
342
+ return self._commands_file_ID
343
+
344
+ @property
345
+ def success(self) -> bool | None:
346
+ """
347
+ Did the EAR succeed?
348
+ """
349
+ return self._success
350
+
351
+ @property
352
+ def skip(self) -> int:
353
+ """
354
+ Was the EAR skipped?
355
+ """
356
+ return self._skip
357
+
358
+ @property
359
+ def skip_reason(self):
360
+ return SkipReason(self.skip)
361
+
362
+ @property
363
+ def snapshot_start(self) -> JSONLikeDirSnapShot | None:
364
+ """
365
+ The snapshot of the data directory at the start of the run.
366
+ """
367
+ if self._ss_start_obj is None and self._snapshot_start:
368
+ self._ss_start_obj = JSONLikeDirSnapShot(
369
+ root_path=".",
370
+ **self._snapshot_start,
371
+ )
372
+ return self._ss_start_obj
373
+
374
+ @property
375
+ def snapshot_end(self) -> JSONLikeDirSnapShot | None:
376
+ """
377
+ The snapshot of the data directory at the end of the run.
378
+ """
379
+ if self._ss_end_obj is None and self._snapshot_end:
380
+ self._ss_end_obj = JSONLikeDirSnapShot(root_path=".", **self._snapshot_end)
381
+ return self._ss_end_obj
382
+
383
+ @property
384
+ def dir_diff(self) -> DirectorySnapshotDiff | None:
385
+ """
386
+ The changes to the EAR working directory due to the execution of this EAR.
387
+ """
388
+ if (
389
+ not self._ss_diff_obj
390
+ and (ss := self.snapshot_start)
391
+ and (se := self.snapshot_end)
392
+ ):
393
+ self._ss_diff_obj = DirectorySnapshotDiff(ss, se)
394
+ return self._ss_diff_obj
395
+
396
+ @property
397
+ def exit_code(self) -> int | None:
398
+ """
399
+ The exit code of the underlying program run by the EAR, if known.
400
+ """
401
+ return self._exit_code
402
+
403
+ @property
404
+ def task(self) -> WorkflowTask:
405
+ """
406
+ The task that this EAR is part of the implementation of.
407
+ """
408
+ return self.element_action.task
409
+
410
+ @property
411
+ def status(self) -> EARStatus:
412
+ """
413
+ The state of this EAR.
414
+ """
415
+
416
+ if self.skip:
417
+ return EARStatus.skipped
418
+
419
+ elif self.end_time is not None:
420
+ if self.exit_code == 0:
421
+ return EARStatus.success
422
+ elif self.action.abortable and self.exit_code == ABORT_EXIT_CODE:
423
+ return EARStatus.aborted
424
+ else:
425
+ return EARStatus.error
426
+
427
+ elif self.start_time is not None:
428
+ return EARStatus.running
429
+
430
+ elif self.submission_idx is not None:
431
+ wk_sub_stat = self.workflow.submissions[self.submission_idx].status
432
+
433
+ if wk_sub_stat == SubmissionStatus.PENDING:
434
+ return EARStatus.prepared
435
+ elif wk_sub_stat == SubmissionStatus.SUBMITTED:
436
+ return EARStatus.submitted
437
+ else:
438
+ RuntimeError(f"Workflow submission status not understood: {wk_sub_stat}.")
439
+
440
+ return EARStatus.pending
441
+
442
+ __RES_RE: ClassVar[Pattern] = re.compile(r"\<\<resource:(\w+)\>\>")
443
+ __ENV_RE: ClassVar[Pattern] = re.compile(
444
+ r"\<\<env:(.*?)\>\>"
445
+ ) # TODO: refactor; also in `Action`
446
+ __PARAM_RE: ClassVar[Pattern] = re.compile(r"\<\<parameter:(\w+)\>\>")
447
+
448
+ def __substitute_vars_in_paths(self, path: str) -> str:
449
+ """Substitute resources, environment specifiers, and parameter values in string
450
+ paths."""
451
+
452
+ def resource_repl(match_obj: re.Match[str], resources: ElementResources) -> str:
453
+ return getattr(resources, match_obj.groups()[0])
454
+
455
+ def env_repl(
456
+ match_obj: re.Match[str],
457
+ env_spec: Mapping[str, Any],
458
+ ) -> str:
459
+ return env_spec[match_obj.groups()[0]]
460
+
461
+ def param_repl(
462
+ match_obj: re.Match[str],
463
+ run: ElementActionRun,
464
+ ) -> str:
465
+ param = match_obj.groups()[0]
466
+ key = f"outputs.{param}"
467
+ key = key if key in run.get_data_idx() else f"inputs.{param}"
468
+ return str(run.get(key))
469
+
470
+ # substitute resources in the path:
471
+ path = self.__RES_RE.sub(
472
+ repl=partial(resource_repl, resources=self.resources_with_defaults),
473
+ string=path,
474
+ )
475
+ # substitute environment specifiers in the path:
476
+ path = self.__ENV_RE.sub(
477
+ repl=partial(env_repl, env_spec=self.env_spec),
478
+ string=path,
479
+ )
480
+ # substitute parameter values in the path:
481
+ return self.__PARAM_RE.sub(
482
+ repl=partial(param_repl, run=self),
483
+ string=path,
484
+ )
485
+
486
+ @property
487
+ def program_path_actual(self) -> Path | None:
488
+ """Get the path to the associated action program, if the action includes a program
489
+ specification, with variable substitutions applied."""
490
+
491
+ if prog_or_path := self.action.program_or_program_path:
492
+ prog_path_str = self.__substitute_vars_in_paths(prog_or_path)
493
+ return (
494
+ self._app.programs[prog_path_str]
495
+ if self.action.program
496
+ else Path(prog_path_str)
497
+ )
498
+ return None
499
+
500
+ @property
501
+ def jinja_template_path_actual(self):
502
+ """
503
+ Get the path to the associated jinja template, if the action includes a template
504
+ specification, with variable substitutions applied.
505
+ """
506
+ if template_or_path := self.action.jinja_template_or_template_path:
507
+ template_path_str = self.__substitute_vars_in_paths(template_or_path)
508
+ return self.action.get_jinja_template_resolved_path(template_path_str)
509
+ return None
510
+
511
+ def get_parameter_names(self, prefix: str) -> Sequence[str]:
512
+ """Get parameter types associated with a given prefix.
513
+
514
+ For inputs, labels are ignored. See `Action.get_parameter_names` for more
515
+ information.
516
+
517
+ Parameters
518
+ ----------
519
+ prefix
520
+ One of "inputs", "outputs", "input_files", "output_files".
521
+ """
522
+ return self.action.get_parameter_names(prefix)
523
+
524
+ def get_data_idx(self, path: str | None = None) -> DataIndex:
525
+ """
526
+ Get the data index of a value in the most recent iteration.
527
+
528
+ Parameters
529
+ ----------
530
+ path:
531
+ Path to the parameter.
532
+ """
533
+ return self.element_iteration.get_data_idx(
534
+ path,
535
+ action_idx=self.element_action.action_idx,
536
+ run_idx=self.index,
537
+ )
538
+
539
+ @overload
540
+ def get_parameter_sources(
541
+ self,
542
+ *,
543
+ path: str | None = None,
544
+ typ: str | None = None,
545
+ as_strings: Literal[False] = False,
546
+ use_task_index: bool = False,
547
+ ) -> Mapping[str, ParamSource | list[ParamSource]]: ...
548
+
549
+ @overload
550
+ def get_parameter_sources(
551
+ self,
552
+ *,
553
+ path: str | None = None,
554
+ typ: str | None = None,
555
+ as_strings: Literal[True],
556
+ use_task_index: bool = False,
557
+ ) -> Mapping[str, str]: ...
558
+
559
+ @TimeIt.decorator
560
+ def get_parameter_sources(
561
+ self,
562
+ *,
563
+ path: str | None = None,
564
+ typ: str | None = None,
565
+ as_strings: bool = False,
566
+ use_task_index: bool = False,
567
+ ) -> Mapping[str, str] | Mapping[str, ParamSource | list[ParamSource]]:
568
+ """
569
+ Get the source or sources of a parameter in the most recent iteration.
570
+
571
+ Parameters
572
+ ----------
573
+ path:
574
+ Path to the parameter.
575
+ typ:
576
+ The parameter type.
577
+ as_strings:
578
+ Whether to return the result as human-readable strings.
579
+ use_task_index:
580
+ Whether to use the task index.
581
+ """
582
+ if as_strings:
583
+ return self.element_iteration.get_parameter_sources(
584
+ path,
585
+ action_idx=self.element_action.action_idx,
586
+ run_idx=self.index,
587
+ typ=typ,
588
+ as_strings=True,
589
+ use_task_index=use_task_index,
590
+ )
591
+ return self.element_iteration.get_parameter_sources(
592
+ path,
593
+ action_idx=self.element_action.action_idx,
594
+ run_idx=self.index,
595
+ typ=typ,
596
+ as_strings=False,
597
+ use_task_index=use_task_index,
598
+ )
599
+
600
+ def get(
601
+ self,
602
+ path: str | None = None,
603
+ default: Any | None = None,
604
+ raise_on_missing: bool = False,
605
+ raise_on_unset: bool = False,
606
+ ) -> Any:
607
+ """
608
+ Get a value (parameter, input, output, etc.) from the most recent iteration.
609
+
610
+ Parameters
611
+ ----------
612
+ path:
613
+ Path to the value.
614
+ default:
615
+ Default value to provide if value absent.
616
+ raise_on_missing:
617
+ Whether to raise an exception on an absent value.
618
+ If not, the default is returned.
619
+ raise_on_unset:
620
+ Whether to raise an exception on an explicitly unset value.
621
+ If not, the default is returned.
622
+ """
623
+ return self.element_iteration.get(
624
+ path=path,
625
+ action_idx=self.element_action.action_idx,
626
+ run_idx=self.index,
627
+ default=default,
628
+ raise_on_missing=raise_on_missing,
629
+ raise_on_unset=raise_on_unset,
630
+ )
631
+
632
+ @overload
633
+ def get_EAR_dependencies(self, as_objects: Literal[False] = False) -> set[int]: ...
634
+
635
+ @overload
636
+ def get_EAR_dependencies(
637
+ self, as_objects: Literal[True]
638
+ ) -> list[ElementActionRun]: ...
639
+
640
+ @TimeIt.decorator
641
+ def get_EAR_dependencies(self, as_objects=False) -> list[ElementActionRun] | set[int]:
642
+ """Get EARs that this EAR depends on, or just their IDs."""
643
+ out: set[int] = set()
644
+ for src in self.get_parameter_sources(typ="EAR_output").values():
645
+ for src_i in src if isinstance(src, list) else [src]:
646
+ EAR_ID_i: int = src_i["EAR_ID"]
647
+ if EAR_ID_i != self.id_:
648
+ # don't record a self dependency!
649
+ out.add(EAR_ID_i)
650
+
651
+ if as_objects:
652
+ return self.workflow.get_EARs_from_IDs(sorted(out))
653
+ return out
654
+
655
+ def get_input_dependencies(self) -> Mapping[str, ParamSource]:
656
+ """Get information about locally defined input, sequence, and schema-default
657
+ values that this EAR depends on. Note this does not get values from this EAR's
658
+ task/schema, because the aim of this method is to help determine which upstream
659
+ tasks this EAR depends on."""
660
+
661
+ wanted_types = ("local_input", "default_input")
662
+ return {
663
+ k: v_i
664
+ for k, v in self.get_parameter_sources().items()
665
+ for v_i in (v if isinstance(v, list) else [v])
666
+ if (
667
+ v_i["type"] in wanted_types
668
+ and v_i["task_insert_ID"] != self.task.insert_ID
669
+ )
670
+ }
671
+
672
+ @overload
673
+ def get_dependent_EARs(self, as_objects: Literal[False] = False) -> set[int]: ...
674
+
675
+ @overload
676
+ def get_dependent_EARs(self, as_objects: Literal[True]) -> list[ElementActionRun]: ...
677
+
678
+ def get_dependent_EARs(
679
+ self, as_objects: bool = False
680
+ ) -> list[ElementActionRun] | set[int]:
681
+ """Get downstream EARs that depend on this EAR."""
682
+ deps = {
683
+ run.id_
684
+ for task in self.workflow.tasks[self.task.index :]
685
+ for elem in task.elements[:]
686
+ for iter_ in elem.iterations
687
+ for run in iter_.action_runs
688
+ # does EAR dependency belong to self?
689
+ if self._id in run.get_EAR_dependencies()
690
+ }
691
+ if as_objects:
692
+ return self.workflow.get_EARs_from_IDs(sorted(deps))
693
+ return deps
694
+
695
+ @property
696
+ def inputs(self) -> ElementInputs:
697
+ """
698
+ The inputs to this EAR.
699
+ """
700
+ if not self._inputs:
701
+ self._inputs = self._app.ElementInputs(element_action_run=self)
702
+ return self._inputs
703
+
704
+ @property
705
+ def outputs(self) -> ElementOutputs:
706
+ """
707
+ The outputs from this EAR.
708
+ """
709
+ if not self._outputs:
710
+ self._outputs = self._app.ElementOutputs(element_action_run=self)
711
+ return self._outputs
712
+
713
+ @property
714
+ @TimeIt.decorator
715
+ def resources(self) -> ElementResources:
716
+ """
717
+ The resources to use with (or used by) this EAR.
718
+ """
719
+ if not self._resources:
720
+ self._resources = self.__get_resources_obj()
721
+ return self._resources
722
+
723
+ @property
724
+ @TimeIt.decorator
725
+ def resources_with_defaults(self) -> ElementResources:
726
+ """
727
+ The resources to use with (or used by) this EAR, with defaults applied.
728
+ """
729
+ if not self._resources_with_defaults:
730
+ self._resources_with_defaults = self.__get_resources_obj(set_defaults=True)
731
+ return self._resources_with_defaults
732
+
733
+ @property
734
+ def input_files(self) -> ElementInputFiles:
735
+ """
736
+ The input files to the controlled program.
737
+ """
738
+ if not self._input_files:
739
+ self._input_files = self._app.ElementInputFiles(element_action_run=self)
740
+ return self._input_files
741
+
742
+ @property
743
+ def output_files(self) -> ElementOutputFiles:
744
+ """
745
+ The output files from the controlled program.
746
+ """
747
+ if not self._output_files:
748
+ self._output_files = self._app.ElementOutputFiles(element_action_run=self)
749
+ return self._output_files
750
+
751
+ @property
752
+ @TimeIt.decorator
753
+ def env_spec(self) -> Mapping[str, Any]:
754
+ """
755
+ Get the specification that defines the environment in which this run will execute.
756
+ This will include at least a `name` key.
757
+ """
758
+ if (envs := self.resources.environments) is None:
759
+ return {}
760
+ return envs[self.action.get_environment_name()]
761
+
762
+ @property
763
+ @TimeIt.decorator
764
+ def env_spec_hashable(self) -> tuple:
765
+ return self.action.env_spec_to_hashable(self.env_spec)
766
+
767
+ def get_directory(self) -> Path | None:
768
+ """
769
+ Get the working directory, if one is required.
770
+ """
771
+ return self.workflow.get_run_directories(run_ids=[self.id_])[0]
772
+
773
+ def get_app_log_path(self) -> Path:
774
+ assert self.submission_idx is not None
775
+ return Submission.get_app_log_file_path(
776
+ self.workflow.submissions_path,
777
+ self.submission_idx,
778
+ self.id_,
779
+ )
780
+
781
+ def get_app_std_path(self) -> Path:
782
+ assert self.submission_idx is not None
783
+ std_dir = Submission.get_app_std_path(
784
+ self.workflow.submissions_path,
785
+ self.submission_idx,
786
+ )
787
+ return std_dir / f"{self.id_}.txt" # TODO: refactor
788
+
789
+ @TimeIt.decorator
790
+ def get_resources(self) -> Mapping[str, Any]:
791
+ """Resolve specific resources for this EAR, considering all applicable scopes and
792
+ template-level resources."""
793
+ return self.element_iteration.get_resources(self.action)
794
+
795
+ @TimeIt.decorator
796
+ def __get_resources_obj(self, set_defaults: bool = False) -> ElementResources:
797
+ """Resolve specific resources for this EAR, considering all applicable scopes and
798
+ template-level resources."""
799
+ return self.element_iteration.get_resources_obj(
800
+ self.action, set_defaults=set_defaults
801
+ )
802
+
803
+ def get_environment_spec(self) -> Mapping[str, Any]:
804
+ """
805
+ Get the specification that defines the environment in which this run will execute.
806
+ This will include at least a `name` key.
807
+
808
+ Notes
809
+ -----
810
+ This is an alias for the `env_spec` property.
811
+
812
+ """
813
+ return self.env_spec
814
+
815
+ def get_environment(self) -> Environment:
816
+ """
817
+ Get the environment in which this run will execute.
818
+ """
819
+ return self._app.envs.get(**self.get_environment_spec())
820
+
821
+ def get_all_previous_iteration_runs(
822
+ self, include_self: bool = True
823
+ ) -> list[ElementActionRun]:
824
+ """Get a list of run over all iterations that correspond to this run, optionally
825
+ including this run."""
826
+ self_iter = self.element_iteration
827
+ self_elem = self_iter.element
828
+ self_act_idx = self.element_action.action_idx
829
+ max_idx = self_iter.index + (1 if include_self else 0)
830
+ return [
831
+ iter_i.actions[self_act_idx].runs[-1]
832
+ for iter_i in self_elem.iterations[:max_idx]
833
+ ]
834
+
835
+ def get_data_in_values(
836
+ self,
837
+ data_in_keys: Sequence[str] | Mapping[str, Mapping[str, Any]] | None = None,
838
+ label_dict: bool = True,
839
+ raise_on_unset: bool = False,
840
+ include_prefix: bool = False,
841
+ ) -> Mapping[str, Mapping[str, Any]]:
842
+ """Get a dict of (optionally a subset of) parameter values and input/output file
843
+ paths ("data-in" that is passed to a script or program, for example) for this run.
844
+
845
+ Parameters
846
+ ----------
847
+ data_in_keys:
848
+ If specified, a list of parameter types and files to include, or a dict whose
849
+ keys are parameter types and files to include. Prefixes should be included,
850
+ which should be, for each key, one of "inputs.", "outputs.", "input_files.",
851
+ or "output_files." For schema inputs that have `multiple=True`, the input
852
+ type should be labelled. If a dict is passed, and the key "all_iterations` is
853
+ present and `True`, the return for that input will be structured to include
854
+ values for all previous iterations.
855
+ label_dict:
856
+ If True, arrange the values of schema inputs with multiple=True as a dict
857
+ whose keys are the labels. If False, labels will be included in the top level
858
+ keys.
859
+ include_prefix:
860
+ If False, strip the prefix ("inputs.", "outputs.", "input_files.", or
861
+ "output_files.") from the keys of in the returned mapping.
862
+ """
863
+
864
+ dat_names = self.action.get_prefixed_data_names()
865
+ _PREFIXES = ("inputs.", "outputs.", "input_files.", "output_files.")
866
+
867
+ if data_in_keys is None:
868
+ # by default just include input parameters
869
+ data_in_keys = dat_names["inputs"]
870
+ else:
871
+ for key in data_in_keys:
872
+ if not any(key.startswith(prefix_i) for prefix_i in _PREFIXES):
873
+ raise ValueError(
874
+ f"Data-in keys must start with an allowed prefix: {_PREFIXES}, "
875
+ f"but received {key!r}."
876
+ )
877
+
878
+ out: dict[str, dict[str, Any]] = {}
879
+ for dat_key in data_in_keys:
880
+ if self.__all_iters(data_in_keys, dat_key):
881
+ val_i = {
882
+ f"iteration_{run_i.element_iteration.index}": {
883
+ "loop_idx": run_i.element_iteration.loop_idx,
884
+ "value": run_i.get(dat_key, raise_on_unset=raise_on_unset),
885
+ }
886
+ for run_i in self.get_all_previous_iteration_runs(include_self=True)
887
+ }
888
+ else:
889
+ val_i = self.get(dat_key, raise_on_unset=raise_on_unset)
890
+
891
+ if dat_key.startswith("inputs."):
892
+ key, label_i = self.__split_input_name(dat_key, label_dict)
893
+ key = key if include_prefix else ".".join(key.split(".")[1:])
894
+ if label_i:
895
+ out.setdefault(key, {})[label_i] = val_i
896
+ else:
897
+ out[key] = val_i
898
+ else:
899
+ dat_key = dat_key if include_prefix else ".".join(dat_key.split(".")[1:])
900
+ out[dat_key] = val_i
901
+
902
+ if self.action.script_pass_env_spec:
903
+ out["env_spec"] = cast("Any", self.env_spec)
904
+
905
+ return out
906
+
907
+ @staticmethod
908
+ def __all_iters(
909
+ inputs: Sequence[str] | Mapping[str, Mapping[str, Any]], inp_name: str
910
+ ) -> bool:
911
+ try:
912
+ return isinstance(inputs, Mapping) and bool(
913
+ inputs[inp_name]["all_iterations"]
914
+ )
915
+ except (TypeError, KeyError):
916
+ return False
917
+
918
+ @staticmethod
919
+ def __split_input_name(inp_name: str, label_dict: bool) -> tuple[str, str | None]:
920
+ key = inp_name
921
+ path, label = split_param_label(key)
922
+ if label_dict and path:
923
+ key = path # exclude label from key
924
+ # for sub-parameters, take only the final part as the dict key:
925
+ return "inputs." + key.split(".")[-1], (label if label_dict else None)
926
+
927
+ def get_data_in_values_direct(
928
+ self,
929
+ label_dict: bool = True,
930
+ raise_on_unset: bool = False,
931
+ include_prefix: bool = False,
932
+ ) -> Mapping[str, Mapping[str, Any]]:
933
+ """Get a dict of input values that are to be passed directly to a Python script
934
+ function."""
935
+ return self.get_data_in_values(
936
+ data_in_keys=self.action.script_data_in_grouped.get("direct", {}),
937
+ label_dict=label_dict,
938
+ raise_on_unset=raise_on_unset,
939
+ include_prefix=include_prefix,
940
+ )
941
+
942
+ def get_IFG_input_values(self, raise_on_unset: bool = False) -> Mapping[str, Any]:
943
+ """
944
+ Get a dict of input values that are to be passed via an input file generator.
945
+ """
946
+ if not self.action._from_expand:
947
+ raise RuntimeError(
948
+ "Cannot get input file generator inputs from this EAR because the "
949
+ "associated action is not expanded, meaning multiple IFGs might exists."
950
+ )
951
+ input_types = [i.typ for i in self.action.input_file_generators[0].inputs]
952
+ inputs = {
953
+ typ_i: self.get(f"inputs.{typ_i}", raise_on_unset=raise_on_unset)
954
+ for typ_i in input_types
955
+ }
956
+
957
+ if self.action.script_pass_env_spec:
958
+ inputs["env_spec"] = self.env_spec
959
+
960
+ return inputs
961
+
962
+ def get_OFP_output_files(self) -> Mapping[str, Path | list[Path]]:
963
+ """
964
+ Get a dict of output files that are going to be parsed to generate one or more
965
+ outputs.
966
+ """
967
+ if not self.action._from_expand:
968
+ raise RuntimeError(
969
+ "Cannot get output file parser files from this from EAR because the "
970
+ "associated action is not expanded, meaning multiple OFPs might exist."
971
+ )
972
+ return {
973
+ file_spec.label: (
974
+ [Path(val_i) for val_i in fs_val]
975
+ if isinstance((fs_val := file_spec.name.value()), list)
976
+ else Path(fs_val)
977
+ )
978
+ for file_spec in self.action.output_file_parsers[0].output_files
979
+ }
980
+
981
+ def get_OFP_outputs(
982
+ self, raise_on_unset: bool = False
983
+ ) -> Mapping[str, str | list[str]]:
984
+ """
985
+ Get the outputs that are required to execute an output file parser.
986
+ """
987
+ if not self.action._from_expand:
988
+ raise RuntimeError(
989
+ "Cannot get output file parser outputs from this from EAR because the "
990
+ "associated action is not expanded, meaning multiple OFPs might exist."
991
+ )
992
+ outputs: dict[str, str | list[str]] = {} # not sure this type is correct
993
+ for out_typ in self.action.output_file_parsers[0].outputs or []:
994
+ outputs[out_typ] = self.get(
995
+ f"outputs.{out_typ}", raise_on_unset=raise_on_unset
996
+ )
997
+ return outputs
998
+
999
+ def get_py_script_func_kwargs(
1000
+ self,
1001
+ raise_on_unset: bool = False,
1002
+ add_script_files: bool = False,
1003
+ blk_act_key: BlockActionKey | None = None,
1004
+ ) -> Mapping[str, Any]:
1005
+ """Get function arguments to run the Python script associated with this action.
1006
+
1007
+ Parameters
1008
+ ----------
1009
+ raise_on_unset
1010
+ If True, raise if unset parameter data is found when trying to retrieve input
1011
+ data.
1012
+ add_script_files
1013
+ If True, include additional keys "_input_files" and "_output_files" that will
1014
+ be dicts mapping file formats to file names for script input and output files.
1015
+ If True, `js_blk_act_key` must be provided.
1016
+ js_blk_act_key
1017
+ A three-tuple of integers corresponding to the jobscript index, block index,
1018
+ and block-action index.
1019
+ """
1020
+ kwargs: dict[str, Any] = {}
1021
+ if self.action.is_IFG:
1022
+ input_file = self.action.input_file_generators[0].input_file
1023
+ if (fn_spec := input_file.name).is_regex:
1024
+ # pass to the IFG the label rather than name (there is no point searching
1025
+ # with the regular expression via `name.value()`; the file(s) won't exist
1026
+ # yet!):
1027
+ path = input_file.label
1028
+ else:
1029
+ path_ = fn_spec.value()
1030
+ assert isinstance(path_, str)
1031
+ path = path_
1032
+ kwargs["path"] = Path(path)
1033
+ kwargs.update(self.get_IFG_input_values(raise_on_unset=raise_on_unset))
1034
+
1035
+ elif self.action.is_OFP:
1036
+ kwargs.update(self.get_OFP_output_files())
1037
+ kwargs.update(self.get_data_in_values_direct(raise_on_unset=raise_on_unset))
1038
+ kwargs.update(self.get_OFP_outputs(raise_on_unset=raise_on_unset))
1039
+
1040
+ if (
1041
+ not any((self.action.is_IFG, self.action.is_OFP))
1042
+ and self.action.script_data_in_has_direct
1043
+ ):
1044
+ kwargs.update(self.get_data_in_values_direct(raise_on_unset=raise_on_unset))
1045
+
1046
+ if add_script_files:
1047
+ assert blk_act_key
1048
+ in_out_names = self.action.get_input_output_file_paths("script", blk_act_key)
1049
+ in_names, out_names = in_out_names["inputs"], in_out_names["outputs"]
1050
+ if in_names:
1051
+ kwargs["_input_files"] = in_names
1052
+ if out_names:
1053
+ kwargs["_output_files"] = out_names
1054
+
1055
+ return kwargs
1056
+
1057
+ def write_script_data_in_files(self, block_act_key: BlockActionKey) -> None:
1058
+ """
1059
+ Write values to files in standard formats.
1060
+ """
1061
+ for fmt, ins in self.action.script_data_in_grouped.items():
1062
+ in_vals = self.get_data_in_values(
1063
+ data_in_keys=ins, label_dict=False, raise_on_unset=False
1064
+ )
1065
+ if writer := self.__data_in_writer_map.get(fmt):
1066
+ writer(self, in_vals, block_act_key)
1067
+
1068
+ def write_program_data_in_files(self, block_act_key: BlockActionKey) -> None:
1069
+ """
1070
+ Write values to files in standard formats.
1071
+ """
1072
+ for fmt, ins in self.action.program_data_in_grouped.items():
1073
+ in_vals = self.get_data_in_values(
1074
+ data_in_keys=ins, label_dict=False, raise_on_unset=False
1075
+ )
1076
+ if writer := self.__data_in_writer_map.get(fmt):
1077
+ writer(self, in_vals, block_act_key)
1078
+
1079
+ def __write_json_data_in(
1080
+ self,
1081
+ in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
1082
+ block_act_key: BlockActionKey,
1083
+ ):
1084
+ in_vals_processed: dict[str, Any] = {}
1085
+ for k, v in in_vals.items():
1086
+ try:
1087
+ in_vals_processed[k] = (
1088
+ v.prepare_JSON_dump() if isinstance(v, ParameterValue) else v
1089
+ )
1090
+ except (AttributeError, NotImplementedError):
1091
+ in_vals_processed[k] = v
1092
+
1093
+ with self.action.get_param_dump_file_path_JSON(block_act_key).open("wt") as fp:
1094
+ json.dump(in_vals_processed, fp)
1095
+
1096
+ def __write_hdf5_data_in(
1097
+ self,
1098
+ in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
1099
+ block_act_key: BlockActionKey,
1100
+ ):
1101
+ import h5py # type: ignore
1102
+
1103
+ with h5py.File(
1104
+ self.action.get_param_dump_file_path_HDF5(block_act_key), mode="w"
1105
+ ) as h5file:
1106
+ for k, v in in_vals.items():
1107
+ grp_k = h5file.create_group(k)
1108
+ try:
1109
+ assert isinstance(v, ParameterValue)
1110
+ v.dump_to_HDF5_group(grp_k)
1111
+ except AssertionError:
1112
+ # probably an element group (i.e. v is a list of `ParameterValue`
1113
+ # objects):
1114
+ assert isinstance(v, list)
1115
+ v[0].dump_element_group_to_HDF5_group(v, grp_k)
1116
+
1117
+ __data_in_writer_map: ClassVar[dict[str, Callable[..., None]]] = {
1118
+ "json": __write_json_data_in,
1119
+ "hdf5": __write_hdf5_data_in,
1120
+ }
1121
+
1122
+ def __output_index(self, param_name: str) -> int:
1123
+ return cast("int", self.data_idx[f"outputs.{param_name}"])
1124
+
1125
+ def _param_save(
1126
+ self,
1127
+ type: Literal["script", "program"],
1128
+ block_act_key: BlockActionKey,
1129
+ run_dir: Path | None = None,
1130
+ ):
1131
+ """Save script- or program-generated parameters that are stored within the
1132
+ supported data output formats (HDF5, JSON, etc)."""
1133
+ in_out_names = self.action.get_input_output_file_paths(
1134
+ type, block_act_key, directory=run_dir
1135
+ )
1136
+
1137
+ import h5py # type: ignore
1138
+
1139
+ parameters = self._app.parameters
1140
+ for fmt, load_path in in_out_names["outputs"].items():
1141
+ if fmt == "json":
1142
+ with load_path.open(mode="rt") as f:
1143
+ file_data: dict[str, Any] = json.load(f)
1144
+ for param_name, param_dat in file_data.items():
1145
+ param_id = self.__output_index(param_name)
1146
+ if param_cls := parameters.get(param_name)._force_value_class():
1147
+ try:
1148
+ param_cls.save_from_JSON(
1149
+ param_dat, param_id, self.workflow
1150
+ )
1151
+ continue
1152
+ except NotImplementedError:
1153
+ pass
1154
+ # try to save as a primitive:
1155
+ self.workflow.set_parameter_value(
1156
+ param_id=param_id, value=param_dat
1157
+ )
1158
+
1159
+ elif fmt == "hdf5":
1160
+ with h5py.File(load_path, mode="r") as h5file:
1161
+ for param_name, h5_grp in h5file.items():
1162
+ param_id = self.__output_index(param_name)
1163
+ if param_cls := parameters.get(param_name)._force_value_class():
1164
+ try:
1165
+ param_cls.save_from_HDF5_group(
1166
+ h5_grp, param_id, self.workflow
1167
+ )
1168
+ continue
1169
+ except NotImplementedError:
1170
+ pass
1171
+ # Unlike with JSON, we've no fallback so we warn
1172
+ self._app.logger.warning(
1173
+ "parameter %s could not be saved; serializer not found",
1174
+ param_name,
1175
+ )
1176
+
1177
+ @property
1178
+ def is_snippet_script(self) -> bool:
1179
+ """Returns True if the action script string represents a script snippets that is
1180
+ to be modified before execution (e.g. to receive and provide parameter data)."""
1181
+ try:
1182
+ return self.action.is_snippet_script(self.action.script)
1183
+ except AttributeError:
1184
+ return False
1185
+
1186
+ def get_script_artifact_name(self) -> str:
1187
+ """Return the script name that is used when writing the script to the artifacts
1188
+ directory within the workflow.
1189
+
1190
+ Like `Action.get_script_name`, this is only applicable for snippet scripts.
1191
+
1192
+ """
1193
+ art_name, snip_path = self.action.get_script_artifact_name(
1194
+ env_spec=self.env_spec,
1195
+ act_idx=self.element_action.action_idx,
1196
+ include_suffix=True,
1197
+ specs_suffix_delim=".",
1198
+ )
1199
+ return art_name
1200
+
1201
+ def compose_commands(
1202
+ self, environments: EnvironmentsList, shell: Shell
1203
+ ) -> tuple[str, Mapping[int, Sequence[tuple[str, ...]]]]:
1204
+ """
1205
+ Write the EAR's enactment to disk in preparation for submission.
1206
+
1207
+ Returns
1208
+ -------
1209
+ commands:
1210
+ List of argument words for the command that enacts the EAR.
1211
+ Converted to a string.
1212
+ shell_vars:
1213
+ Dict whose keys are command indices, and whose values are lists of tuples,
1214
+ where each tuple contains: (parameter name, shell variable name,
1215
+ "stdout"/"stderr").
1216
+ """
1217
+ self._app.persistence_logger.debug("EAR.compose_commands")
1218
+ env_spec = self.env_spec
1219
+
1220
+ for ofp in self.action.output_file_parsers:
1221
+ # TODO: there should only be one at this stage if expanded?
1222
+ if ofp.output is None:
1223
+ raise OutputFileParserNoOutputError()
1224
+
1225
+ command_lns: list[str] = []
1226
+ if (env := environments.get(**env_spec)).setup:
1227
+ command_lns.extend(env.setup)
1228
+
1229
+ shell_vars: dict[int, list[tuple[str, ...]]] = {}
1230
+ for cmd_idx, command in enumerate(self.action.commands):
1231
+ if cmd_idx in self.commands_idx:
1232
+ # only execute commands that have no rules, or all valid rules:
1233
+ cmd_str, shell_vars[cmd_idx] = command.get_command_line(
1234
+ EAR=self, shell=shell, env=env
1235
+ )
1236
+ command_lns.append(cmd_str)
1237
+
1238
+ return ("\n".join(command_lns) + "\n"), shell_vars
1239
+
1240
+ @TimeIt.decorator
1241
+ def get_commands_file_hash(self) -> int:
1242
+ """Get a hash that can be used to group together runs that will have the same
1243
+ commands file.
1244
+
1245
+ This hash is not stable across sessions or machines.
1246
+
1247
+ """
1248
+ return self.action.get_commands_file_hash(
1249
+ data_idx=self.get_data_idx(),
1250
+ action_idx=self.element_action.action_idx,
1251
+ env_spec_hashable=self.env_spec_hashable,
1252
+ )
1253
+
1254
+ @overload
1255
+ def try_write_commands(
1256
+ self,
1257
+ jobscript: Jobscript,
1258
+ environments: EnvironmentsList,
1259
+ raise_on_unset: Literal[True],
1260
+ ) -> Path: ...
1261
+
1262
+ @overload
1263
+ def try_write_commands(
1264
+ self,
1265
+ jobscript: Jobscript,
1266
+ environments: EnvironmentsList,
1267
+ raise_on_unset: Literal[False] = False,
1268
+ ) -> Path | None: ...
1269
+
1270
+ def try_write_commands(
1271
+ self,
1272
+ jobscript: Jobscript,
1273
+ environments: EnvironmentsList,
1274
+ raise_on_unset: bool = False,
1275
+ ) -> Path | None:
1276
+ """Attempt to write the commands file for this run."""
1277
+ app_name = self._app.package_name
1278
+ try:
1279
+ commands, shell_vars = self.compose_commands(
1280
+ environments=environments,
1281
+ shell=jobscript.shell,
1282
+ )
1283
+ except UnsetParameterDataError:
1284
+ if raise_on_unset:
1285
+ raise
1286
+ self._app.submission_logger.debug(
1287
+ f"cannot yet write commands file for run ID {self.id_}; unset parameters"
1288
+ )
1289
+ return None
1290
+
1291
+ for cmd_idx, var_dat in shell_vars.items():
1292
+ for param_name, shell_var_name, st_typ in var_dat:
1293
+ commands += jobscript.shell.format_save_parameter(
1294
+ workflow_app_alias=jobscript.workflow_app_alias,
1295
+ param_name=param_name,
1296
+ shell_var_name=shell_var_name,
1297
+ cmd_idx=cmd_idx,
1298
+ stderr=(st_typ == "stderr"),
1299
+ app_name=app_name,
1300
+ )
1301
+
1302
+ commands_fmt = jobscript.shell.format_commands_file(app_name, commands)
1303
+
1304
+ if jobscript.resources.combine_scripts:
1305
+ stem = f"js_{jobscript.index}" # TODO: refactor
1306
+ else:
1307
+ stem = str(self.id_)
1308
+
1309
+ cmd_file_name = f"{stem}{jobscript.shell.JS_EXT}"
1310
+ cmd_file_path: Path = jobscript.submission.commands_path / cmd_file_name
1311
+ with cmd_file_path.open("wt", newline="\n") as fp:
1312
+ fp.write(commands_fmt)
1313
+
1314
+ return cmd_file_path
1315
+
1316
+ @contextlib.contextmanager
1317
+ def raise_on_failure_threshold(self) -> Iterator[dict[str, UnsetParamTracker]]:
1318
+ """Context manager to track parameter types and associated run IDs for which those
1319
+ parameters were found to be unset when accessed via
1320
+ `WorkflowTask._get_merged_parameter_data`.
1321
+
1322
+ """
1323
+ self.workflow._is_tracking_unset = True
1324
+ self.workflow._tracked_unset = defaultdict(
1325
+ lambda: UnsetParamTracker(run_ids=set(), group_size=-1)
1326
+ )
1327
+ try:
1328
+ yield dict(self.workflow._tracked_unset)
1329
+ except:
1330
+ raise
1331
+ else:
1332
+ try:
1333
+ for schema_inp in self.task.template.schema.inputs:
1334
+ inp_path = f"inputs.{schema_inp.typ}"
1335
+ if inp_path in self.workflow._tracked_unset:
1336
+ unset_tracker = self.workflow._tracked_unset[inp_path]
1337
+ unset_num = len(unset_tracker.run_ids)
1338
+ unset_fraction = unset_num / unset_tracker.group_size
1339
+ if isinstance(schema_inp.allow_failed_dependencies, float):
1340
+ # `True` is converted to 1.0 on SchemaInput init
1341
+ if unset_fraction > schema_inp.allow_failed_dependencies:
1342
+ raise UnsetParameterFractionLimitExceededError(
1343
+ schema_inp,
1344
+ self.task,
1345
+ unset_fraction,
1346
+ log=self._app.submission_logger,
1347
+ )
1348
+ elif isinstance(schema_inp.allow_failed_dependencies, int):
1349
+ if unset_num > schema_inp.allow_failed_dependencies:
1350
+ raise UnsetParameterNumberLimitExceededError(
1351
+ schema_inp,
1352
+ self.task,
1353
+ unset_num,
1354
+ log=self._app.submission_logger,
1355
+ )
1356
+ finally:
1357
+ self.workflow._is_tracking_unset = False
1358
+ self.workflow._tracked_unset = None
1359
+ finally:
1360
+ self.workflow._is_tracking_unset = False
1361
+ self.workflow._tracked_unset = None
1362
+
1363
+ def render_jinja_template(self) -> str:
1364
+ """
1365
+ Render the associated Jinja template as a string.
1366
+ """
1367
+ if not self.action.has_jinja_template:
1368
+ raise ValueError("This action is not associated with a Jinja template.")
1369
+ inputs = self.action.get_jinja_template_inputs(
1370
+ path=self.jinja_template_path_actual,
1371
+ include_prefix=True,
1372
+ )
1373
+ assert inputs
1374
+ return self.action.render_jinja_template(
1375
+ self.get_data_in_values(tuple(inputs), include_prefix=False),
1376
+ path=self.jinja_template_path_actual,
1377
+ )
1378
+
1379
+ def write_jinja_template(self):
1380
+ """
1381
+ Render the Jinja template and write to disk in the current working directory.
1382
+ """
1383
+ template_str = self.render_jinja_template()
1384
+ if self.action.input_file_generators:
1385
+ # use the name of the input file:
1386
+ name = self.action.input_file_generators[0].input_file.name.name
1387
+ else:
1388
+ # use the existing template name
1389
+ name = Path(self.action.jinja_template).name
1390
+ with Path(name).open("wt") as fh:
1391
+ fh.write(template_str)
1392
+
1393
+
1394
+ class ElementAction(AppAware):
1395
+ """
1396
+ An abstract representation of an element's action at a particular iteration and
1397
+ the runs that enact that element iteration.
1398
+
1399
+ Parameters
1400
+ ----------
1401
+ element_iteration:
1402
+ The iteration
1403
+ action_idx:
1404
+ The action index.
1405
+ runs:
1406
+ The list of run indices.
1407
+ """
1408
+
1409
+ def __init__(
1410
+ self,
1411
+ element_iteration: ElementIteration,
1412
+ action_idx: int,
1413
+ runs: dict[Mapping[str, Any], Any],
1414
+ ):
1415
+ self._element_iteration = element_iteration
1416
+ self._action_idx = action_idx
1417
+ self._runs = runs
1418
+
1419
+ # assigned on first access of corresponding properties:
1420
+ self._run_objs: list[ElementActionRun] | None = None
1421
+ self._inputs: ElementInputs | None = None
1422
+ self._outputs: ElementOutputs | None = None
1423
+ self._resources: ElementResources | None = None
1424
+ self._input_files: ElementInputFiles | None = None
1425
+ self._output_files: ElementOutputFiles | None = None
1426
+
1427
+ def __repr__(self):
1428
+ return (
1429
+ f"{self.__class__.__name__}("
1430
+ f"iter_ID={self.element_iteration.id_}, "
1431
+ f"scope={self.action.get_precise_scope().to_string()!r}, "
1432
+ f"action_idx={self.action_idx}, num_runs={self.num_runs}"
1433
+ f")"
1434
+ )
1435
+
1436
+ @property
1437
+ def element_iteration(self) -> ElementIteration:
1438
+ """
1439
+ The iteration for this action.
1440
+ """
1441
+ return self._element_iteration
1442
+
1443
+ @property
1444
+ def element(self) -> Element:
1445
+ """
1446
+ The element for this action.
1447
+ """
1448
+ return self.element_iteration.element
1449
+
1450
+ @property
1451
+ def num_runs(self) -> int:
1452
+ """
1453
+ The number of runs associated with this action.
1454
+ """
1455
+ return len(self._runs)
1456
+
1457
+ @property
1458
+ def runs(self) -> list[ElementActionRun]:
1459
+ """
1460
+ The EARs that this action is enacted by.
1461
+ """
1462
+ if self._run_objs is None:
1463
+ self._run_objs = [
1464
+ self._app.ElementActionRun(
1465
+ element_action=self,
1466
+ index=idx,
1467
+ **{
1468
+ k: v
1469
+ for k, v in run_info.items()
1470
+ if k not in ("elem_iter_ID", "action_idx")
1471
+ },
1472
+ )
1473
+ for idx, run_info in enumerate(self._runs)
1474
+ ]
1475
+ return self._run_objs
1476
+
1477
+ @property
1478
+ def task(self) -> WorkflowTask:
1479
+ """
1480
+ The task that this action is an instance of.
1481
+ """
1482
+ return self.element_iteration.task
1483
+
1484
+ @property
1485
+ def action_idx(self) -> int:
1486
+ """
1487
+ The index of the action.
1488
+ """
1489
+ return self._action_idx
1490
+
1491
+ @property
1492
+ def action(self) -> Action:
1493
+ """
1494
+ The abstract task that this is a concrete model of.
1495
+ """
1496
+ return self.task.template.get_schema_action(self.action_idx)
1497
+
1498
+ @property
1499
+ def inputs(self) -> ElementInputs:
1500
+ """
1501
+ The inputs to this action.
1502
+ """
1503
+ if not self._inputs:
1504
+ self._inputs = self._app.ElementInputs(element_action=self)
1505
+ return self._inputs
1506
+
1507
+ @property
1508
+ def outputs(self) -> ElementOutputs:
1509
+ """
1510
+ The outputs from this action.
1511
+ """
1512
+ if not self._outputs:
1513
+ self._outputs = self._app.ElementOutputs(element_action=self)
1514
+ return self._outputs
1515
+
1516
+ @property
1517
+ def input_files(self) -> ElementInputFiles:
1518
+ """
1519
+ The input files to this action.
1520
+ """
1521
+ if not self._input_files:
1522
+ self._input_files = self._app.ElementInputFiles(element_action=self)
1523
+ return self._input_files
1524
+
1525
+ @property
1526
+ def output_files(self) -> ElementOutputFiles:
1527
+ """
1528
+ The output files from this action.
1529
+ """
1530
+ if not self._output_files:
1531
+ self._output_files = self._app.ElementOutputFiles(element_action=self)
1532
+ return self._output_files
1533
+
1534
+ def get_data_idx(self, path: str | None = None, run_idx: int = -1) -> DataIndex:
1535
+ """
1536
+ Get the data index for some path/run.
1537
+ """
1538
+ return self.element_iteration.get_data_idx(
1539
+ path,
1540
+ action_idx=self.action_idx,
1541
+ run_idx=run_idx,
1542
+ )
1543
+
1544
+ @overload
1545
+ def get_parameter_sources(
1546
+ self,
1547
+ path: str | None = None,
1548
+ *,
1549
+ run_idx: int = -1,
1550
+ typ: str | None = None,
1551
+ as_strings: Literal[False] = False,
1552
+ use_task_index: bool = False,
1553
+ ) -> Mapping[str, ParamSource | list[ParamSource]]: ...
1554
+
1555
+ @overload
1556
+ def get_parameter_sources(
1557
+ self,
1558
+ path: str | None = None,
1559
+ *,
1560
+ run_idx: int = -1,
1561
+ typ: str | None = None,
1562
+ as_strings: Literal[True],
1563
+ use_task_index: bool = False,
1564
+ ) -> Mapping[str, str]: ...
1565
+
1566
+ def get_parameter_sources(
1567
+ self,
1568
+ path: str | None = None,
1569
+ *,
1570
+ run_idx: int = -1,
1571
+ typ: str | None = None,
1572
+ as_strings: bool = False,
1573
+ use_task_index: bool = False,
1574
+ ) -> Mapping[str, str] | Mapping[str, ParamSource | list[ParamSource]]:
1575
+ """
1576
+ Get information about where parameters originated.
1577
+ """
1578
+ if as_strings:
1579
+ return self.element_iteration.get_parameter_sources(
1580
+ path,
1581
+ action_idx=self.action_idx,
1582
+ run_idx=run_idx,
1583
+ typ=typ,
1584
+ as_strings=True,
1585
+ use_task_index=use_task_index,
1586
+ )
1587
+ return self.element_iteration.get_parameter_sources(
1588
+ path,
1589
+ action_idx=self.action_idx,
1590
+ run_idx=run_idx,
1591
+ typ=typ,
1592
+ as_strings=False,
1593
+ use_task_index=use_task_index,
1594
+ )
1595
+
1596
+ def get(
1597
+ self,
1598
+ path: str | None = None,
1599
+ run_idx: int = -1,
1600
+ default: Any | None = None,
1601
+ raise_on_missing: bool = False,
1602
+ raise_on_unset: bool = False,
1603
+ ) -> Any:
1604
+ """
1605
+ Get the value of a parameter.
1606
+ """
1607
+ return self.element_iteration.get(
1608
+ path=path,
1609
+ action_idx=self.action_idx,
1610
+ run_idx=run_idx,
1611
+ default=default,
1612
+ raise_on_missing=raise_on_missing,
1613
+ raise_on_unset=raise_on_unset,
1614
+ )
1615
+
1616
+ def get_parameter_names(self, prefix: str) -> list[str]:
1617
+ """Get parameter types associated with a given prefix.
1618
+
1619
+ For inputs, labels are ignored.
1620
+ See :py:meth:`.Action.get_parameter_names` for more information.
1621
+
1622
+ Parameters
1623
+ ----------
1624
+ prefix
1625
+ One of "inputs", "outputs", "input_files", "output_files".
1626
+
1627
+ """
1628
+ return self.action.get_parameter_names(prefix)
1629
+
1630
+
1631
+ @final
1632
+ class ActionScope(JSONLike):
1633
+ """Class to represent the identification of a subset of task schema actions by a
1634
+ filtering process.
1635
+ """
1636
+
1637
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1638
+ ChildObjectSpec(
1639
+ name="typ",
1640
+ json_like_name="type",
1641
+ class_name="ActionScopeType",
1642
+ is_enum=True,
1643
+ ),
1644
+ )
1645
+
1646
+ __ACTION_SCOPE_RE: ClassVar[Pattern] = re.compile(r"(\w*)(?:\[(.*)\])?")
1647
+
1648
+ def __init__(self, typ: ActionScopeType | str, **kwargs):
1649
+ if isinstance(typ, str):
1650
+ #: Action scope type.
1651
+ self.typ = self._app.ActionScopeType[typ.upper()]
1652
+ else:
1653
+ self.typ = typ
1654
+
1655
+ #: Any provided extra keyword arguments.
1656
+ self.kwargs = {k: v for k, v in kwargs.items() if v is not None}
1657
+
1658
+ if bad_keys := set(kwargs) - ACTION_SCOPE_ALLOWED_KWARGS[self.typ.name]:
1659
+ raise TypeError(
1660
+ f"The following keyword arguments are unknown for ActionScopeType "
1661
+ f"{self.typ.name}: {bad_keys}."
1662
+ )
1663
+
1664
+ def __repr__(self) -> str:
1665
+ kwargs_str = ""
1666
+ if self.kwargs:
1667
+ kwargs_str = ", ".join(f"{k}={v!r}" for k, v in self.kwargs.items())
1668
+ return f"{self.__class__.__name__}.{self.typ.name.lower()}({kwargs_str})"
1669
+
1670
+ def __eq__(self, other: Any) -> bool:
1671
+ if not isinstance(other, self.__class__):
1672
+ return False
1673
+ return self.typ is other.typ and self.kwargs == other.kwargs
1674
+
1675
+ class __customdict(dict):
1676
+ pass
1677
+
1678
+ @classmethod
1679
+ def _parse_from_string(cls, string: str) -> dict[str, str]:
1680
+ if not (match := cls.__ACTION_SCOPE_RE.search(string)):
1681
+ raise TypeError(f"unparseable ActionScope: '{string}'")
1682
+ typ_str, kwargs_str = match.groups()
1683
+ # The types of the above two variables are idiotic, but bug reports to fix it
1684
+ # get closed because "it would break existing code that makes dumb assumptions"
1685
+ kwargs: dict[str, str] = cls.__customdict({"type": cast("str", typ_str)})
1686
+ if kwargs_str:
1687
+ for pair_str in kwargs_str.split(","):
1688
+ name, val = pair_str.split("=")
1689
+ kwargs[name.strip()] = val.strip()
1690
+ return kwargs
1691
+
1692
+ def to_string(self) -> str:
1693
+ """
1694
+ Render this action scope as a string.
1695
+ """
1696
+ kwargs_str = ""
1697
+ if self.kwargs:
1698
+ kwargs_str = "[" + ", ".join(f"{k}={v}" for k, v in self.kwargs.items()) + "]"
1699
+ return f"{self.typ.name.lower()}{kwargs_str}"
1700
+
1701
+ @classmethod
1702
+ def _from_json_like(
1703
+ cls,
1704
+ json_like: Mapping[str, Any] | Sequence[Mapping[str, Any]],
1705
+ shared_data: Mapping[str, Any],
1706
+ ) -> Self:
1707
+ if not isinstance(json_like, Mapping):
1708
+ raise TypeError("only mappings are supported for becoming an ActionScope")
1709
+ if not isinstance(json_like, cls.__customdict):
1710
+ # Wasn't processed by _parse_from_string() already
1711
+ json_like = {"type": json_like["type"], **json_like.get("kwargs", {})}
1712
+ return super()._from_json_like(json_like, shared_data)
1713
+
1714
+ @classmethod
1715
+ def any(cls) -> ActionScope:
1716
+ """
1717
+ Any scope.
1718
+ """
1719
+ return cls(typ=ActionScopeType.ANY)
1720
+
1721
+ @classmethod
1722
+ def main(cls) -> ActionScope:
1723
+ """
1724
+ The main scope.
1725
+ """
1726
+ return cls(typ=ActionScopeType.MAIN)
1727
+
1728
+ @classmethod
1729
+ def processing(cls) -> ActionScope:
1730
+ """
1731
+ The processing scope.
1732
+ """
1733
+ return cls(typ=ActionScopeType.PROCESSING)
1734
+
1735
+ @classmethod
1736
+ def input_file_generator(cls, file: str | None = None) -> ActionScope:
1737
+ """
1738
+ The scope of an input file generator.
1739
+ """
1740
+ return cls(typ=ActionScopeType.INPUT_FILE_GENERATOR, file=file)
1741
+
1742
+ @classmethod
1743
+ def output_file_parser(cls, output: Parameter | str | None = None) -> ActionScope:
1744
+ """
1745
+ The scope of an output file parser.
1746
+ """
1747
+ return cls(typ=ActionScopeType.OUTPUT_FILE_PARSER, output=output)
1748
+
1749
+
1750
+ @dataclass()
1751
+ @hydrate
1752
+ class ActionEnvironment(JSONLike):
1753
+ """
1754
+ The environment that an action is enacted within.
1755
+ """
1756
+
1757
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1758
+ ChildObjectSpec(
1759
+ name="scope",
1760
+ class_name="ActionScope",
1761
+ ),
1762
+ )
1763
+
1764
+ #: The environment document.
1765
+ environment: Mapping[str, Any]
1766
+ #: The scope.
1767
+ scope: ActionScope
1768
+
1769
+ def __init__(
1770
+ self, environment: str | dict[str, Any], scope: ActionScope | None = None
1771
+ ):
1772
+ if scope is None:
1773
+ self.scope = self._app.ActionScope.any()
1774
+ else:
1775
+ self.scope = scope
1776
+
1777
+ if isinstance(environment, str):
1778
+ self.environment = {"name": environment}
1779
+ else:
1780
+ if "name" not in environment:
1781
+ raise ActionEnvironmentMissingNameError(environment)
1782
+ self.environment = copy.deepcopy(environment)
1783
+
1784
+
1785
+ class ActionRule(JSONLike):
1786
+ """
1787
+ Class to represent a rule/condition that must be True if an action is to be
1788
+ included.
1789
+
1790
+ Parameters
1791
+ ----------
1792
+ rule: ~hpcflow.app.Rule
1793
+ The rule to apply.
1794
+ check_exists: str
1795
+ A special rule that is enabled if this named attribute is present.
1796
+ check_missing: str
1797
+ A special rule that is enabled if this named attribute is absent.
1798
+ path: str
1799
+ Where to find the attribute to check.
1800
+ condition: dict | ConditionLike
1801
+ A more complex condition to apply.
1802
+ cast: str
1803
+ The name of a class to cast the attribute to before checking.
1804
+ doc: str
1805
+ Documentation for this rule, if any.
1806
+ default: bool
1807
+ Optional default value to return when testing the rule if the path is not valid.
1808
+ """
1809
+
1810
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1811
+ ChildObjectSpec(name="rule", class_name="Rule"),
1812
+ )
1813
+
1814
+ def __init__(
1815
+ self,
1816
+ rule: Rule | None = None,
1817
+ check_exists: str | None = None,
1818
+ check_missing: str | None = None,
1819
+ path: str | None = None,
1820
+ condition: dict[str, Any] | ConditionLike | None = None,
1821
+ cast: str | None = None,
1822
+ doc: str | None = None,
1823
+ default: bool | None = None,
1824
+ ):
1825
+ if rule is None:
1826
+ #: The rule to apply.
1827
+ self.rule = self._app.Rule(
1828
+ check_exists=check_exists,
1829
+ check_missing=check_missing,
1830
+ path=path,
1831
+ condition=condition,
1832
+ cast=cast,
1833
+ doc=doc,
1834
+ default=default,
1835
+ )
1836
+ elif any(
1837
+ arg is not None
1838
+ for arg in (check_exists, check_missing, path, condition, cast, doc, default)
1839
+ ):
1840
+ raise TypeError(
1841
+ f"{self.__class__.__name__} `rule` specified in addition to rule "
1842
+ f"constructor arguments."
1843
+ )
1844
+ else:
1845
+ self.rule = rule
1846
+
1847
+ #: The action that contains this rule.
1848
+ self.action: Action | None = None # assigned by parent action
1849
+ #: The command that is guarded by this rule.
1850
+ self.command: Command | None = None # assigned by parent command
1851
+
1852
+ def __eq__(self, other: Any) -> bool:
1853
+ if type(other) is not self.__class__:
1854
+ return False
1855
+ return self.rule == other.rule
1856
+
1857
+ @property
1858
+ def __parent_action(self) -> Action:
1859
+ if self.action:
1860
+ return self.action
1861
+ else:
1862
+ assert self.command
1863
+ act = self.command.action
1864
+ assert act
1865
+ return act
1866
+
1867
+ @TimeIt.decorator
1868
+ def test(self, element_iteration: ElementIteration) -> bool:
1869
+ """
1870
+ Test if this rule holds for a particular iteration.
1871
+
1872
+ Parameter
1873
+ ---------
1874
+ element_iteration:
1875
+ The iteration to apply this rule to.
1876
+ """
1877
+
1878
+ return self.rule.test(
1879
+ element_like=element_iteration,
1880
+ action=self.__parent_action,
1881
+ )
1882
+
1883
+ @classmethod
1884
+ def check_exists(cls, check_exists: str) -> ActionRule:
1885
+ """
1886
+ Make an action rule that checks if a named attribute is present.
1887
+
1888
+ Parameter
1889
+ ---------
1890
+ check_exists:
1891
+ The path to the attribute to check for.
1892
+ """
1893
+ return cls(rule=cls._app.Rule(check_exists=check_exists))
1894
+
1895
+ @classmethod
1896
+ def check_missing(cls, check_missing: str) -> ActionRule:
1897
+ """
1898
+ Make an action rule that checks if a named attribute is absent.
1899
+
1900
+ Parameter
1901
+ ---------
1902
+ check_missing:
1903
+ The path to the attribute to check for.
1904
+ """
1905
+ return cls(rule=cls._app.Rule(check_missing=check_missing))
1906
+
1907
+
1908
+ _ALL_OTHER_SYM = "*"
1909
+
1910
+
1911
+ class Action(JSONLike):
1912
+ """
1913
+ An atomic component of a workflow that will be enacted within an iteration
1914
+ structure.
1915
+
1916
+ Parameters
1917
+ ----------
1918
+ environments: list[ActionEnvironment]
1919
+ The environments in which this action can run.
1920
+ commands: list[~hpcflow.app.Command]
1921
+ The commands to be run by this action.
1922
+ script: str
1923
+ The name of the Python script to run.
1924
+ script_data_in: str
1925
+ Information about data input to the script.
1926
+ script_data_out: str
1927
+ Information about data output from the script.
1928
+ data_files_use_opt: bool
1929
+ If True, data input and output file paths will be passed to the script or program
1930
+ execution command line with an option like ``--input-json`` or ``--output-hdf5``
1931
+ etc. If False, the file paths will be passed on their own. For Python scripts,
1932
+ options are always passed, and this parameter is overwritten to be True,
1933
+ regardless of its initial value.
1934
+ script_data_files_use_opt: bool
1935
+ Deprecated; please use `data_files_use_opt` instead, which has the same meaning.
1936
+ script_exe: str
1937
+ The executable to use to run the script.
1938
+ script_pass_env_spec: bool
1939
+ Whether to pass the environment details to the script.
1940
+ jinja_template: str
1941
+ Path to a built-in Jinja template file to generate as part of this action.
1942
+ jinja_template_path: str
1943
+ Path to an external Jinja template file to generate as part of this action.
1944
+ program: str
1945
+ Path to a built-in program to run.
1946
+ program_path: str
1947
+ Path to an external program to run.
1948
+ program_exe: str
1949
+ Executable instance label associated with the program to run
1950
+ program_data_in: str
1951
+ Information about data input to the program.
1952
+ program_data_out: str
1953
+ Information about data output from the program.
1954
+ abortable: bool
1955
+ Whether this action can be aborted.
1956
+ input_file_generators: list[~hpcflow.app.InputFileGenerator]
1957
+ Any applicable input file generators.
1958
+ output_file_parsers: list[~hpcflow.app.OutputFileParser]
1959
+ Any applicable output file parsers.
1960
+ input_files: list[~hpcflow.app.FileSpec]
1961
+ The input files to the action's commands.
1962
+ output_files: list[~hpcflow.app.FileSpec]
1963
+ The output files from the action's commands.
1964
+ rules: list[ActionRule]
1965
+ How to determine whether to run the action.
1966
+ save_files: list[str]
1967
+ The names of files to be explicitly saved after each step.
1968
+ clean_up: list[str]
1969
+ The names of files to be deleted after each step.
1970
+ """
1971
+
1972
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
1973
+ ChildObjectSpec(
1974
+ name="commands",
1975
+ class_name="Command",
1976
+ is_multiple=True,
1977
+ parent_ref="action",
1978
+ ),
1979
+ ChildObjectSpec(
1980
+ name="input_file_generators",
1981
+ is_multiple=True,
1982
+ class_name="InputFileGenerator",
1983
+ dict_key_attr="input_file",
1984
+ ),
1985
+ ChildObjectSpec(
1986
+ name="output_file_parsers",
1987
+ is_multiple=True,
1988
+ class_name="OutputFileParser",
1989
+ dict_key_attr="output",
1990
+ ),
1991
+ ChildObjectSpec(
1992
+ name="input_files",
1993
+ is_multiple=True,
1994
+ class_name="FileSpec",
1995
+ shared_data_name="command_files",
1996
+ ),
1997
+ ChildObjectSpec(
1998
+ name="output_files",
1999
+ is_multiple=True,
2000
+ class_name="FileSpec",
2001
+ shared_data_name="command_files",
2002
+ ),
2003
+ ChildObjectSpec(
2004
+ name="environments",
2005
+ class_name="ActionEnvironment",
2006
+ is_multiple=True,
2007
+ dict_key_attr="scope",
2008
+ dict_val_attr="environment",
2009
+ ),
2010
+ ChildObjectSpec(
2011
+ name="rules",
2012
+ class_name="ActionRule",
2013
+ is_multiple=True,
2014
+ parent_ref="action",
2015
+ ),
2016
+ ChildObjectSpec(
2017
+ name="save_files",
2018
+ class_name="FileSpec",
2019
+ is_multiple=True,
2020
+ shared_data_primary_key="label",
2021
+ shared_data_name="command_files",
2022
+ ),
2023
+ ChildObjectSpec(
2024
+ name="clean_up",
2025
+ class_name="FileSpec",
2026
+ is_multiple=True,
2027
+ shared_data_primary_key="label",
2028
+ shared_data_name="command_files",
2029
+ ),
2030
+ )
2031
+ _data_formats: ClassVar[Mapping[str, tuple[str, ...]]] = {
2032
+ "script": ("direct", "json", "hdf5"),
2033
+ "program": ("json", "hdf5"),
2034
+ }
2035
+
2036
+ def __init__(
2037
+ self,
2038
+ environments: list[ActionEnvironment] | None = None,
2039
+ commands: list[Command] | None = None,
2040
+ script: str | None = None,
2041
+ script_data_in: str | Mapping[str, str | ActionData] | None = None,
2042
+ script_data_out: str | Mapping[str, str | ActionData] | None = None,
2043
+ script_data_files_use_opt: bool = False,
2044
+ data_files_use_opt: bool = False,
2045
+ script_exe: str | None = None,
2046
+ script_pass_env_spec: bool = False,
2047
+ jinja_template: str | None = None,
2048
+ jinja_template_path: str | None = None,
2049
+ program: str | None = None,
2050
+ program_path: str | None = None,
2051
+ program_exe: str | None = None,
2052
+ program_data_in: str | Mapping[str, str | ActionData] | None = None,
2053
+ program_data_out: str | Mapping[str, str | ActionData] | None = None,
2054
+ abortable: bool = False,
2055
+ input_file_generators: list[InputFileGenerator] | None = None,
2056
+ output_file_parsers: list[OutputFileParser] | None = None,
2057
+ input_files: list[FileSpec] | None = None,
2058
+ output_files: list[FileSpec] | None = None,
2059
+ rules: list[ActionRule] | None = None,
2060
+ save_files: list[FileSpec] | None = None,
2061
+ clean_up: list[str] | None = None,
2062
+ requires_dir: bool | None = None,
2063
+ ):
2064
+
2065
+ if script_data_files_use_opt:
2066
+ warnings.warn(
2067
+ f"{self.__class__.__name__!r}: Please use `data_files_use_opt` instead "
2068
+ f"of `script_data_files_use_opt`, which will be removed in a future "
2069
+ f"release.",
2070
+ DeprecationWarning,
2071
+ stacklevel=2,
2072
+ )
2073
+ data_files_use_opt = script_data_files_use_opt
2074
+
2075
+ #: The commands to be run by this action.
2076
+ self.commands = commands or []
2077
+ #: The name of the Python script to run.
2078
+ self.script = script
2079
+ #: Information about data input to the script.
2080
+ self.script_data_in: dict[str, ActionData] | None = None
2081
+ self._script_data_in = script_data_in
2082
+ #: Information about data output from the script.
2083
+ self.script_data_out: dict[str, ActionData] | None = None
2084
+ self._script_data_out = script_data_out
2085
+ #: If True, data input and output file paths will be passed to the script or
2086
+ #: program execution command line with an option like `--input-json` or
2087
+ #: `--output-hdf5` etc. If False, the file paths will be passed on their own. For
2088
+ #: Python scripts, options are always passed, and this parameter is overwritten
2089
+ #: to be True, regardless of its initial value.
2090
+ self.data_files_use_opt = (
2091
+ data_files_use_opt if not self.script_is_python_snippet else True
2092
+ )
2093
+ #: The executable to use to run the script.
2094
+ self.script_exe = script_exe.lower() if script_exe else None
2095
+ #: Whether to pass the environment details to the script.
2096
+ self.script_pass_env_spec = script_pass_env_spec
2097
+ #: Path to a built-in program to run.
2098
+ self.program = program
2099
+ #: Path to an external program to run
2100
+ self.program_path = program_path
2101
+ #: Executable instance label associated with the program to run
2102
+ self.program_exe = program_exe
2103
+ #: Information about data input to the program.
2104
+ self.program_data_in: dict[str, ActionData] | None = None
2105
+ self._program_data_in = program_data_in
2106
+ #: Information about data output from the program
2107
+ self.program_data_out: dict[str, ActionData] | None = None
2108
+ self._program_data_out = program_data_out
2109
+ #: The environments in which this action can run.
2110
+ self.environments = environments or [
2111
+ self._app.ActionEnvironment(environment="null_env")
2112
+ ]
2113
+ #: The path to a builtin Jinja template to render.
2114
+ self.jinja_template = jinja_template
2115
+ #: The path to an external Jinja template to render.
2116
+ self.jinja_template_path = jinja_template_path
2117
+ #: Whether this action can be aborted.
2118
+ self.abortable = abortable
2119
+ #: Any applicable input file generators.
2120
+ self.input_file_generators = input_file_generators or []
2121
+ #: Any applicable output file parsers.
2122
+ self.output_file_parsers = output_file_parsers or []
2123
+ #: The input files to the action's commands.
2124
+ self.input_files = self.__resolve_input_files(input_files or [])
2125
+ #: The output files from the action's commands.
2126
+ self.output_files = self.__resolve_output_files(output_files or [])
2127
+ #: How to determine whether to run the action.
2128
+ self.rules = rules or []
2129
+ #: The names of files to be explicitly saved after each step.
2130
+ self.save_files = save_files or []
2131
+ #: The names of files to be deleted after each step.
2132
+ self.clean_up = clean_up or []
2133
+
2134
+ if requires_dir is None:
2135
+ # TODO: once Jinja templates are written to the shared subs dir, we can omit
2136
+ # from here:
2137
+ requires_dir = (
2138
+ True
2139
+ if self.input_file_generators
2140
+ or self.output_file_parsers
2141
+ or self.jinja_template
2142
+ else False
2143
+ )
2144
+ self.requires_dir = requires_dir
2145
+
2146
+ self._task_schema: TaskSchema | None = None # assigned by parent TaskSchema
2147
+ self._from_expand = False # assigned on creation of new Action by `expand`
2148
+
2149
+ self._set_parent_refs()
2150
+
2151
+ def process_action_data_formats(self) -> None:
2152
+ """
2153
+ Convert all script/program data in/out information into standard form.
2154
+ """
2155
+ self.script_data_in = self.__process_action_data(
2156
+ "script", self._script_data_in, "in"
2157
+ )
2158
+ self.script_data_out = self.__process_action_data(
2159
+ "script", self._script_data_out, "out"
2160
+ )
2161
+ self.program_data_in = self.__process_action_data(
2162
+ "program", self._program_data_in, "in"
2163
+ )
2164
+ self.program_data_out = self.__process_action_data(
2165
+ "program", self._program_data_out, "out"
2166
+ )
2167
+
2168
+ def __process_action_data_str(
2169
+ self, data_fmt: str, direction: Literal["in", "out"], param_names: Iterable[str]
2170
+ ) -> dict[str, ActionData]:
2171
+ """Process script/program data in/out, when the user specified a single format for
2172
+ all data-in/out keys; we assume only input parameters are to be included."""
2173
+ data_fmt = data_fmt.lower()
2174
+ return {f"{direction}puts.{k}": {"format": data_fmt} for k in param_names}
2175
+
2176
+ def __process_action_data_dict(
2177
+ self,
2178
+ data_fmt: Mapping[str, str | ActionData],
2179
+ direction: Literal["in", "out"],
2180
+ param_names: Iterable[str],
2181
+ ) -> dict[str, ActionData]:
2182
+ all_params: dict[str, ActionData] = {}
2183
+ _PREFIXES = ("inputs.", "outputs.", "input_files.", "output_files.")
2184
+ for nm, v in data_fmt.items():
2185
+ # by default, assume keys are in/output parameters, unless explicitly prefixed:
2186
+ if not any(nm.startswith(prefix) for prefix in _PREFIXES):
2187
+ nm = f"{direction}puts.{nm}"
2188
+
2189
+ # values might be strings, or dicts with "format" and potentially other
2190
+ # kwargs:
2191
+ if isinstance(v, dict):
2192
+ # Make sure format is first key
2193
+ v2: ActionData = {"format": v["format"]}
2194
+ all_params[nm] = v2
2195
+ v2.update(v)
2196
+ else:
2197
+ all_params[nm] = {"format": v.lower()}
2198
+
2199
+ if direction == "in":
2200
+ # expand unlabelled-multiple inputs to multiple labelled inputs:
2201
+ multi_types = set(self.task_schema.multi_input_types)
2202
+ multis: dict[str, ActionData] = {}
2203
+ for nm in tuple(all_params):
2204
+ if not nm.startswith("inputs."):
2205
+ continue
2206
+ if nm[len("inputs.") :] in multi_types:
2207
+ k_fmt = all_params.pop(nm)
2208
+ for name in param_names:
2209
+ if f"inputs.{name}".startswith(nm):
2210
+ multis[f"inputs.{name}"] = copy.deepcopy(k_fmt)
2211
+
2212
+ if multis:
2213
+ all_params = {
2214
+ **multis,
2215
+ **all_params,
2216
+ }
2217
+
2218
+ all_param_inp_keys = [
2219
+ key[len("inputs.") :] for key in all_params if key.startswith("inputs.")
2220
+ ]
2221
+
2222
+ if (all_other_inputs := f"inputs.{_ALL_OTHER_SYM}") in all_params:
2223
+ # replace catch-all with all other input/output names:
2224
+ other_fmt = all_params[all_other_inputs]
2225
+ all_params = {k: v for k, v in all_params.items() if k != all_other_inputs}
2226
+ for name in set(param_names).difference(all_param_inp_keys):
2227
+ all_params[f"inputs.{name}"] = copy.deepcopy(other_fmt)
2228
+ return all_params
2229
+
2230
+ def __process_action_data(
2231
+ self,
2232
+ type: Literal["script", "program"],
2233
+ data_fmt: str | Mapping[str, str | ActionData] | None,
2234
+ direction: Literal["in", "out"],
2235
+ ) -> dict[str, ActionData]:
2236
+ """Process specific action script/program data_in/out into a standard form.
2237
+
2238
+ Parameters
2239
+ ----------
2240
+ data_fmt:
2241
+ The format as specified in the action for how to pass data to and from the
2242
+ script/program. This will be normalised into a standard form.
2243
+ direction:
2244
+ This refers to whether the data is being passed into the script/program
2245
+ (`in`), or being retrieved from the script/program (`out`). Note that the data
2246
+ that is passed into the script/program may include more than just task schema
2247
+ inputs, but could also include input file paths (those generated by input file
2248
+ generators or passed by the user in the workflow template).
2249
+
2250
+ """
2251
+
2252
+ if not data_fmt:
2253
+ return {}
2254
+
2255
+ param_names = self.get_parameter_names(f"{direction}puts")
2256
+ if isinstance(data_fmt, str):
2257
+ all_params = self.__process_action_data_str(data_fmt, direction, param_names)
2258
+ else:
2259
+ all_params = self.__process_action_data_dict(data_fmt, direction, param_names)
2260
+
2261
+ all_dat_names = self.get_prefixed_data_names_flat()
2262
+
2263
+ # validation:
2264
+ allowed_keys = ("format", "all_iterations")
2265
+ for k, v in all_params.items():
2266
+ # validate parameter name (sub-parameters are allowed):
2267
+ if ".".join(k.split(".")[:2]) not in all_dat_names:
2268
+ raise UnknownActionDataParameter(type, k, direction, all_dat_names)
2269
+ # validate format:
2270
+ if v["format"] not in self._data_formats[type]:
2271
+ raise UnsupportedActionDataFormat(
2272
+ type,
2273
+ v,
2274
+ cast('Literal["input", "output"]', f"{direction}put"),
2275
+ k,
2276
+ self._data_formats[type],
2277
+ )
2278
+ if any((bad_key := k2) for k2 in v if k2 not in allowed_keys):
2279
+ raise UnknownActionDataKey(type, bad_key, allowed_keys)
2280
+
2281
+ return all_params
2282
+
2283
+ @property
2284
+ def has_program(self) -> bool:
2285
+ return bool(self.program_or_program_path)
2286
+
2287
+ @property
2288
+ def program_or_program_path(self) -> str | None:
2289
+ return self.program or self.program_path
2290
+
2291
+ @property
2292
+ def has_jinja_template(self) -> bool:
2293
+ return bool(self.jinja_template_or_template_path)
2294
+
2295
+ @property
2296
+ def jinja_template_or_template_path(self) -> str | None:
2297
+ return self.jinja_template or self.jinja_template_path
2298
+
2299
+ @property
2300
+ def script_data_in_grouped(self) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
2301
+ """Get input parameter types by script data-in format."""
2302
+ if self.script_data_in is None:
2303
+ self.process_action_data_formats()
2304
+ assert self.script_data_in is not None
2305
+ return swap_nested_dict_keys(
2306
+ dct=cast("dict", self.script_data_in), inner_key="format"
2307
+ )
2308
+
2309
+ @property
2310
+ def script_data_out_grouped(self) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
2311
+ """Get output parameter types by script data-out format."""
2312
+ if self.script_data_out is None:
2313
+ self.process_action_data_formats()
2314
+ assert self.script_data_out is not None
2315
+ return swap_nested_dict_keys(
2316
+ dct=cast("dict", self.script_data_out), inner_key="format"
2317
+ )
2318
+
2319
+ @property
2320
+ def program_data_in_grouped(self) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
2321
+ """Get input parameter types by program data-in format."""
2322
+ if self.program_data_in is None:
2323
+ self.process_action_data_formats()
2324
+ assert self.program_data_in is not None
2325
+ return swap_nested_dict_keys(
2326
+ dct=cast("dict", self.program_data_in), inner_key="format"
2327
+ )
2328
+
2329
+ @property
2330
+ def program_data_out_grouped(self) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
2331
+ """Get output parameter types by program data-out format."""
2332
+ if self.program_data_out is None:
2333
+ self.process_action_data_formats()
2334
+ assert self.program_data_out is not None
2335
+ return swap_nested_dict_keys(
2336
+ dct=cast("dict", self.program_data_out), inner_key="format"
2337
+ )
2338
+
2339
+ @property
2340
+ def script_data_in_has_files(self) -> bool:
2341
+ """Return True if the script requires some inputs to be passed via an
2342
+ intermediate file format."""
2343
+ # TODO: should set `requires_dir` to True if this is True? although in future we
2344
+ # may write input data files in a directory that is shared by multiple runs.
2345
+ return bool(set(self.script_data_in_grouped) - {"direct"}) # TODO: test
2346
+
2347
+ @property
2348
+ def script_data_out_has_files(self) -> bool:
2349
+ """Return True if the script produces some outputs via an intermediate file
2350
+ format."""
2351
+ # TODO: should set `requires_dir` to True if this is True?
2352
+ return bool(set(self.script_data_out_grouped) - {"direct"}) # TODO: test
2353
+
2354
+ @property
2355
+ def script_data_in_has_direct(self) -> bool:
2356
+ """Return True if the script requires some inputs to be passed directly from the
2357
+ app."""
2358
+ return "direct" in self.script_data_in_grouped # TODO: test
2359
+
2360
+ @property
2361
+ def script_data_out_has_direct(self) -> bool:
2362
+ """Return True if the script produces some outputs to be passed directly to the
2363
+ app."""
2364
+ return "direct" in self.script_data_out_grouped # TODO: test
2365
+
2366
+ @property
2367
+ def script_is_python_snippet(self) -> bool:
2368
+ """Return True if the script is a Python snippet script (determined by the file
2369
+ extension)"""
2370
+ if self.script and (snip_path := self.get_snippet_script_path(self.script)):
2371
+ return snip_path.suffix == ".py"
2372
+ return False
2373
+
2374
+ @property
2375
+ def program_data_in_has_files(self) -> bool:
2376
+ """Return True if the program requires some inputs to be passed via an
2377
+ intermediate file format."""
2378
+ # TODO: should set `requires_dir` to True if this is True? although in future we
2379
+ # may write input data files in a directory that is shared by multiple runs.
2380
+ return bool(self.program_data_in_grouped) # TODO: test
2381
+
2382
+ @property
2383
+ def program_data_out_has_files(self) -> bool:
2384
+ """Return True if the program produces some outputs via an intermediate file
2385
+ format."""
2386
+ # TODO: should set `requires_dir` to True if this is True?
2387
+ return bool(self.program_data_out_grouped) # TODO: test
2388
+
2389
+ @override
2390
+ def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
2391
+ d = super()._postprocess_to_dict(d)
2392
+ d["script_data_in"] = d.pop("_script_data_in")
2393
+ d["script_data_out"] = d.pop("_script_data_out")
2394
+ d["program_data_in"] = d.pop("_program_data_in")
2395
+ d["program_data_out"] = d.pop("_program_data_out")
2396
+ return d
2397
+
2398
+ @property
2399
+ def is_IFG(self):
2400
+ return bool(self.input_file_generators)
2401
+
2402
+ @property
2403
+ def is_OFP(self):
2404
+ return bool(self.output_file_parsers)
2405
+
2406
+ def __deepcopy__(self, memo: dict[int, Any]) -> Self:
2407
+ kwargs = self.to_dict()
2408
+ _from_expand = kwargs.pop("_from_expand")
2409
+ _task_schema = kwargs.pop("_task_schema", None)
2410
+ obj = self.__class__(**copy.deepcopy(kwargs, memo))
2411
+ obj._from_expand = _from_expand
2412
+ obj._task_schema = _task_schema
2413
+ return obj
2414
+
2415
+ @property
2416
+ def task_schema(self) -> TaskSchema:
2417
+ """
2418
+ The task schema that this action came from.
2419
+ """
2420
+ assert self._task_schema is not None
2421
+ return self._task_schema
2422
+
2423
+ def __resolve_input_files(self, input_files: list[FileSpec]) -> list[FileSpec]:
2424
+ in_files = input_files
2425
+ for ifg in self.input_file_generators:
2426
+ if ifg.input_file not in in_files:
2427
+ in_files.append(ifg.input_file)
2428
+ return in_files
2429
+
2430
+ def __resolve_output_files(self, output_files: list[FileSpec]) -> list[FileSpec]:
2431
+ out_files = output_files
2432
+ for ofp in self.output_file_parsers:
2433
+ for out_file in ofp.output_files:
2434
+ if out_file not in out_files:
2435
+ out_files.append(out_file)
2436
+ return out_files
2437
+
2438
+ def __repr__(self) -> str:
2439
+ # TODO: include program and other script attributes etc
2440
+ IFGs = {
2441
+ ifg.input_file.label: [inp.typ for inp in ifg.inputs]
2442
+ for ifg in self.input_file_generators
2443
+ }
2444
+ OFPs = {
2445
+ ofp.output.typ if ofp.output else f"OFP_{idx}": [
2446
+ out_file.label for out_file in ofp.output_files
2447
+ ]
2448
+ for idx, ofp in enumerate(self.output_file_parsers)
2449
+ }
2450
+
2451
+ out: list[str] = []
2452
+ if self.commands:
2453
+ out.append(f"commands={self.commands!r}")
2454
+ if self.script:
2455
+ out.append(f"script={self.script!r}")
2456
+ if self.jinja_template:
2457
+ out.append(f"jinja_template={self.jinja_template!r}")
2458
+ if self.environments:
2459
+ out.append(f"environments={self.environments!r}")
2460
+ if IFGs:
2461
+ out.append(f"input_file_generators={IFGs!r}")
2462
+ if OFPs:
2463
+ out.append(f"output_file_parsers={OFPs!r}")
2464
+ if self.rules:
2465
+ out.append(f"rules={self.rules!r}")
2466
+
2467
+ return f"{self.__class__.__name__}({', '.join(out)})"
2468
+
2469
+ def __eq__(self, other: Any) -> bool:
2470
+ # TODO: include program and other script attributes etc
2471
+ if not isinstance(other, self.__class__):
2472
+ return False
2473
+ return (
2474
+ self.commands == other.commands
2475
+ and self.script == other.script
2476
+ and self.jinja_template == other.jinja_template
2477
+ and self.environments == other.environments
2478
+ and self.abortable == other.abortable
2479
+ and self.input_file_generators == other.input_file_generators
2480
+ and self.output_file_parsers == other.output_file_parsers
2481
+ and self.rules == other.rules
2482
+ )
2483
+
2484
+ @staticmethod
2485
+ def env_spec_to_hashable(
2486
+ env_spec: Mapping[str, Any],
2487
+ ) -> tuple[tuple[str, ...], tuple[Any, ...]]:
2488
+ keys, values = zip(*env_spec.items()) if env_spec else ((), ())
2489
+ return tuple(keys), tuple(values)
2490
+
2491
+ @staticmethod
2492
+ def env_spec_from_hashable(
2493
+ env_spec_h: tuple[tuple[str, ...], tuple[Any, ...]],
2494
+ ) -> dict[str, Any]:
2495
+ return dict(zip(*env_spec_h))
2496
+
2497
+ def get_script_determinants(self) -> tuple:
2498
+ """Get the attributes that affect the script."""
2499
+ return (
2500
+ self.script,
2501
+ self.script_data_in,
2502
+ self.script_data_out,
2503
+ self.data_files_use_opt,
2504
+ self.script_exe,
2505
+ )
2506
+
2507
+ def get_script_determinant_hash(self, env_specs: dict | None = None) -> int:
2508
+ """Get a hash of the instance attributes that uniquely determine the script.
2509
+
2510
+ The hash is not stable across sessions or machines.
2511
+
2512
+ """
2513
+ env_specs = env_specs or {}
2514
+ return get_hash(
2515
+ (self.get_script_determinants(), self.env_spec_to_hashable(env_specs))
2516
+ )
2517
+
2518
+ @classmethod
2519
+ def _json_like_constructor(cls, json_like) -> Self:
2520
+ """Invoked by `JSONLike.from_json_like` instead of `__init__`."""
2521
+ _from_expand = json_like.pop("_from_expand", None)
2522
+ obj = cls(**json_like)
2523
+ obj._from_expand = _from_expand
2524
+ return obj
2525
+
2526
+ def get_parameter_dependence(self, parameter: SchemaParameter) -> ParameterDependence:
2527
+ """Find if/where a given parameter is used by the action."""
2528
+ # names of input files whose generation requires this parameter
2529
+ writer_files = [
2530
+ ifg.input_file
2531
+ for ifg in self.input_file_generators
2532
+ if parameter.parameter in ifg.inputs
2533
+ ]
2534
+ # TODO: indices of commands in which this parameter appears
2535
+ commands: list[int] = []
2536
+ return {"input_file_writers": writer_files, "commands": commands}
2537
+
2538
+ def __get_resolved_action_env(
2539
+ self,
2540
+ relevant_scopes: tuple[ActionScopeType, ...],
2541
+ input_file_generator: InputFileGenerator | None = None,
2542
+ output_file_parser: OutputFileParser | None = None,
2543
+ commands: list[Command] | None = None,
2544
+ ) -> ActionEnvironment:
2545
+ possible = [
2546
+ env
2547
+ for env in self.environments
2548
+ if env.scope and env.scope.typ in relevant_scopes
2549
+ ]
2550
+ if not possible:
2551
+ if input_file_generator:
2552
+ raise MissingCompatibleActionEnvironment(
2553
+ f"input file generator {input_file_generator.input_file.label!r}"
2554
+ )
2555
+ elif output_file_parser:
2556
+ if output_file_parser.output is not None:
2557
+ ofp_id = output_file_parser.output.typ
2558
+ else:
2559
+ ofp_id = "<unnamed>"
2560
+ raise MissingCompatibleActionEnvironment(f"output file parser {ofp_id!r}")
2561
+ else:
2562
+ raise MissingCompatibleActionEnvironment(f"commands {commands!r}")
2563
+
2564
+ # get max by scope type specificity:
2565
+ return max(possible, key=lambda i: i.scope.typ.value)
2566
+
2567
+ def get_input_file_generator_action_env(
2568
+ self, input_file_generator: InputFileGenerator
2569
+ ) -> ActionEnvironment:
2570
+ """
2571
+ Get the actual environment to use for an input file generator.
2572
+ """
2573
+ return self.__get_resolved_action_env(
2574
+ relevant_scopes=(
2575
+ ActionScopeType.ANY,
2576
+ ActionScopeType.PROCESSING,
2577
+ ActionScopeType.INPUT_FILE_GENERATOR,
2578
+ ),
2579
+ input_file_generator=input_file_generator,
2580
+ )
2581
+
2582
+ def get_output_file_parser_action_env(
2583
+ self, output_file_parser: OutputFileParser
2584
+ ) -> ActionEnvironment:
2585
+ """
2586
+ Get the actual environment to use for an output file parser.
2587
+ """
2588
+ return self.__get_resolved_action_env(
2589
+ relevant_scopes=(
2590
+ ActionScopeType.ANY,
2591
+ ActionScopeType.PROCESSING,
2592
+ ActionScopeType.OUTPUT_FILE_PARSER,
2593
+ ),
2594
+ output_file_parser=output_file_parser,
2595
+ )
2596
+
2597
+ def get_commands_action_env(self) -> ActionEnvironment:
2598
+ """
2599
+ Get the actual environment to use for the action commands.
2600
+ """
2601
+ return self.__get_resolved_action_env(
2602
+ relevant_scopes=(ActionScopeType.ANY, ActionScopeType.MAIN),
2603
+ commands=self.commands,
2604
+ )
2605
+
2606
+ def get_environment_name(self) -> str:
2607
+ """
2608
+ Get the name of the environment associated with this action.
2609
+ """
2610
+ return self.get_environment_spec()["name"]
2611
+
2612
+ def get_environment_spec(self) -> Mapping[str, Any]:
2613
+ """
2614
+ Get the specification for the environment of this action, assuming it has been
2615
+ expanded.
2616
+ """
2617
+ if not self._from_expand:
2618
+ raise RuntimeError(
2619
+ "Cannot choose a single environment from this action because it is not "
2620
+ "expanded, meaning multiple action environments might exist."
2621
+ )
2622
+ assert len(self.environments) == 1 # expanded action should have only one
2623
+ return self.environments[0].environment
2624
+
2625
+ def get_environment(self) -> Environment:
2626
+ """
2627
+ Get the environment in which this action will run (assuming only one environment
2628
+ of the specified name exists).
2629
+ """
2630
+ # note: this will raise if there are multiple environments defined with the
2631
+ # required name. In a workflow, the user is expected to provide specifier
2632
+ # key-values to filter the available environments down to one.
2633
+ return self._app.envs.get(**self.get_environment_spec())
2634
+
2635
+ @staticmethod
2636
+ def is_snippet_script(script: str | None) -> bool:
2637
+ """Returns True if the provided script string represents a script snippets that is
2638
+ to be modified before execution (e.g. to receive and provide parameter data)."""
2639
+ if script is None:
2640
+ return False
2641
+ return script.startswith("<<script:")
2642
+
2643
+ __SCRIPT_NAME_RE: ClassVar[Pattern] = re.compile(
2644
+ r"\<\<script:(?:.*(?:\/|\\))*(.*)\>\>"
2645
+ )
2646
+
2647
+ @classmethod
2648
+ def get_script_name(cls, script: str) -> str:
2649
+ """Return the script name.
2650
+
2651
+ If `script` is a snippet script path, this method returns the name of the script
2652
+ (i.e. the final component of the path). If `script` is not a snippet script path
2653
+ (does not start with "<<script:"), then `script` is simply returned.
2654
+
2655
+ """
2656
+ if cls.is_snippet_script(script):
2657
+ if not (match_obj := cls.__SCRIPT_NAME_RE.match(script)):
2658
+ raise ValueError("incomplete <<script:>>")
2659
+ return match_obj[1]
2660
+ # a script we can expect in the working directory, which might have been generated
2661
+ # by a previous action:
2662
+ return script
2663
+
2664
+ @overload
2665
+ def get_script_artifact_name(
2666
+ self,
2667
+ env_spec: Mapping[str, Any],
2668
+ act_idx: int,
2669
+ ret_specifiers: Literal[False] = False,
2670
+ include_suffix: bool = True,
2671
+ specs_suffix_delim: str = ".",
2672
+ ) -> tuple[str, Path]: ...
2673
+
2674
+ @overload
2675
+ def get_script_artifact_name(
2676
+ self,
2677
+ env_spec: Mapping[str, Any],
2678
+ act_idx: int,
2679
+ ret_specifiers: Literal[True],
2680
+ include_suffix: bool = True,
2681
+ specs_suffix_delim: str = ".",
2682
+ ) -> tuple[str, Path, dict]: ...
2683
+
2684
+ def get_script_artifact_name(
2685
+ self,
2686
+ env_spec: Mapping[str, Any],
2687
+ act_idx: int,
2688
+ ret_specifiers: bool = False,
2689
+ include_suffix: bool = True,
2690
+ specs_suffix_delim: str = ".",
2691
+ ) -> tuple[str, Path] | tuple[str, Path, dict]:
2692
+ """Return the script name that is used when writing the script to the artifacts
2693
+ directory within the workflow.
2694
+
2695
+ Like `Action.get_script_name`, this is only applicable for snippet scripts.
2696
+
2697
+ """
2698
+ snip_path_specs = self.get_snippet_script_path(
2699
+ self.script,
2700
+ env_spec,
2701
+ ret_specifiers=True,
2702
+ )
2703
+ assert snip_path_specs
2704
+ snip_path, specifiers = snip_path_specs
2705
+ specs_suffix = "__".join(f"{k}_{v}" for k, v in specifiers.items())
2706
+ if specs_suffix:
2707
+ specs_suffix = f"{specs_suffix_delim}{specs_suffix}"
2708
+
2709
+ name = f"{self.task_schema.name}_act_{act_idx}{specs_suffix}"
2710
+ if include_suffix:
2711
+ name += snip_path.suffix
2712
+
2713
+ if ret_specifiers:
2714
+ return name, snip_path, specifiers
2715
+ else:
2716
+ return name, snip_path
2717
+
2718
+ __SCRIPT_RE: ClassVar[Pattern] = re.compile(r"\<\<script:(.*:?)\>\>")
2719
+ __ENV_RE: ClassVar[Pattern] = re.compile(r"\<\<env:(.*?)\>\>")
2720
+
2721
+ @overload
2722
+ @classmethod
2723
+ def get_snippet_script_str(
2724
+ cls,
2725
+ script: str,
2726
+ env_spec: Mapping[str, Any] | None = None,
2727
+ ret_specifiers: Literal[False] = False,
2728
+ ) -> str: ...
2729
+
2730
+ @overload
2731
+ @classmethod
2732
+ def get_snippet_script_str(
2733
+ cls,
2734
+ script: str,
2735
+ env_spec: Mapping[str, Any] | None = None,
2736
+ *,
2737
+ ret_specifiers: Literal[True],
2738
+ ) -> tuple[str, dict[str, Any]]: ...
2739
+
2740
+ @overload
2741
+ @classmethod
2742
+ def get_snippet_script_str(
2743
+ cls,
2744
+ script: str,
2745
+ env_spec: Mapping[str, Any] | None = None,
2746
+ *,
2747
+ ret_specifiers: bool,
2748
+ ) -> str | tuple[str, dict[str, Any]]: ...
2749
+
2750
+ @classmethod
2751
+ def get_snippet_script_str(
2752
+ cls,
2753
+ script: str,
2754
+ env_spec: Mapping[str, Any] | None = None,
2755
+ ret_specifiers: bool = False,
2756
+ ) -> str | tuple[str, dict[str, Any]]:
2757
+ """Return the specified snippet `script` with variable substitutions completed.
2758
+
2759
+ Parameters
2760
+ ----------
2761
+ ret_specifiers
2762
+ If True, also return a list of environment specifiers as a dict whose keys are
2763
+ specifier keys found in the `script` path and whose values are the
2764
+ corresponding values extracted from `env_spec`.
2765
+
2766
+ """
2767
+ if not cls.is_snippet_script(script):
2768
+ raise ValueError(
2769
+ f"Must be an app-data script name (e.g. "
2770
+ f"<<script:path/to/app/data/script.py>>), but received {script}"
2771
+ )
2772
+ if not (match_obj := cls.__SCRIPT_RE.match(script)):
2773
+ raise ValueError("incomplete <<script:>>")
2774
+ out: str = match_obj[1]
2775
+
2776
+ if env_spec is not None:
2777
+ specifiers: dict[str, Any] = {}
2778
+
2779
+ def repl(match_obj):
2780
+ spec = match_obj[1]
2781
+ specifiers[spec] = env_spec[spec]
2782
+ return str(env_spec[spec])
2783
+
2784
+ out = cls.__ENV_RE.sub(
2785
+ repl=repl,
2786
+ string=out,
2787
+ )
2788
+ if ret_specifiers:
2789
+ return (out, specifiers)
2790
+ return out
2791
+
2792
+ @classmethod
2793
+ @overload
2794
+ def get_snippet_script_path(
2795
+ cls,
2796
+ script_path: str | None,
2797
+ env_spec: Mapping[str, Any] | None = None,
2798
+ *,
2799
+ ret_specifiers: Literal[True],
2800
+ ) -> tuple[Path, dict[str, Any]] | None: ...
2801
+
2802
+ @classmethod
2803
+ @overload
2804
+ def get_snippet_script_path(
2805
+ cls,
2806
+ script_path: str | None,
2807
+ env_spec: Mapping[str, Any] | None = None,
2808
+ *,
2809
+ ret_specifiers: Literal[False] = False,
2810
+ ) -> Path | None: ...
2811
+
2812
+ @classmethod
2813
+ def get_snippet_script_path(
2814
+ cls,
2815
+ script_path: str | None,
2816
+ env_spec: Mapping[str, Any] | None = None,
2817
+ *,
2818
+ ret_specifiers: bool = False,
2819
+ ) -> Path | tuple[Path, dict[str, Any]] | None:
2820
+ """Return the specified snippet `script` path, or None if there is no snippet.
2821
+
2822
+ Parameters
2823
+ ----------
2824
+ ret_specifiers
2825
+ If True, also return a list of environment specifiers as a dict whose keys are
2826
+ specifier keys found in the `script` path and whose values are the
2827
+ corresponding values extracted from `env_spec`.
2828
+
2829
+ """
2830
+ if not cls.is_snippet_script(script_path):
2831
+ return None
2832
+
2833
+ assert script_path is not None
2834
+ path_ = cls.get_snippet_script_str(
2835
+ script_path, env_spec, ret_specifiers=ret_specifiers
2836
+ )
2837
+ if ret_specifiers:
2838
+ assert isinstance(path_, tuple)
2839
+ path_str, specifiers = path_
2840
+ else:
2841
+ assert isinstance(path_, str)
2842
+ path_str = path_
2843
+
2844
+ path = Path(cls._app.scripts.get(path_str, path_str))
2845
+
2846
+ if ret_specifiers:
2847
+ return path, specifiers
2848
+ else:
2849
+ return path
2850
+
2851
+ @staticmethod
2852
+ def __get_param_dump_file_stem(block_act_key: BlockActionKey) -> str:
2853
+ return RunDirAppFiles.get_run_param_dump_file_prefix(block_act_key)
2854
+
2855
+ @staticmethod
2856
+ def __get_param_load_file_stem(block_act_key: BlockActionKey) -> str:
2857
+ return RunDirAppFiles.get_run_param_load_file_prefix(block_act_key)
2858
+
2859
+ def get_param_dump_file_path_JSON(
2860
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2861
+ ) -> Path:
2862
+ """
2863
+ Get the path of the JSON dump file.
2864
+ """
2865
+ directory = directory or Path()
2866
+ return directory.joinpath(
2867
+ self.__get_param_dump_file_stem(block_act_key) + ".json"
2868
+ )
2869
+
2870
+ def get_param_dump_file_path_HDF5(
2871
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2872
+ ) -> Path:
2873
+ """
2874
+ Get the path of the HDF5 dump file.
2875
+ """
2876
+ directory = directory or Path()
2877
+ return directory.joinpath(self.__get_param_dump_file_stem(block_act_key) + ".h5")
2878
+
2879
+ def get_param_load_file_path_JSON(
2880
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2881
+ ) -> Path:
2882
+ """
2883
+ Get the path of the JSON load file.
2884
+ """
2885
+ directory = directory or Path()
2886
+ return directory.joinpath(
2887
+ self.__get_param_load_file_stem(block_act_key) + ".json"
2888
+ )
2889
+
2890
+ def get_param_load_file_path_HDF5(
2891
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2892
+ ) -> Path:
2893
+ """
2894
+ Get the path of the HDF5 load file.
2895
+ """
2896
+ directory = directory or Path()
2897
+ return directory.joinpath(self.__get_param_load_file_stem(block_act_key) + ".h5")
2898
+
2899
+ def expand(self) -> Sequence[Action]:
2900
+ """
2901
+ Expand this action into a list of actions if necessary.
2902
+ This converts input file generators and output file parsers into their own actions.
2903
+ """
2904
+ if self._from_expand:
2905
+ # already expanded
2906
+ return [self]
2907
+
2908
+ # run main if:
2909
+ # - one or more output files are not passed
2910
+ # run IFG if:
2911
+ # - one or more output files are not passed
2912
+ # - AND input file is not passed
2913
+ # always run OPs, for now
2914
+
2915
+ main_rules = self.rules + [
2916
+ self._app.ActionRule.check_missing(f"output_files.{of.label}")
2917
+ for of in self.output_files
2918
+ ]
2919
+
2920
+ # note we keep the IFG/OPs in the new actions, so we can check the parameters
2921
+ # used/produced.
2922
+
2923
+ inp_files: list[FileSpec] = []
2924
+ inp_acts: list[Action] = []
2925
+
2926
+ app_caps = self._app.package_name.upper()
2927
+
2928
+ script_cmd_vars = {
2929
+ "script_name": f"${app_caps}_RUN_SCRIPT_NAME",
2930
+ "script_name_no_ext": f"${app_caps}_RUN_SCRIPT_NAME_NO_EXT",
2931
+ "script_dir": f"${app_caps}_RUN_SCRIPT_DIR",
2932
+ "script_path": f"${app_caps}_RUN_SCRIPT_PATH",
2933
+ }
2934
+
2935
+ for ifg in self.input_file_generators:
2936
+ script_exe = "python_script"
2937
+ exe = f"<<executable:{script_exe}>>"
2938
+ variables = script_cmd_vars if ifg.script else {}
2939
+ act_i = self._app.Action(
2940
+ commands=(
2941
+ [self._app.Command(executable=exe, variables=variables)]
2942
+ if ifg.script
2943
+ else None
2944
+ ),
2945
+ input_file_generators=[ifg],
2946
+ environments=[self.get_input_file_generator_action_env(ifg)],
2947
+ rules=main_rules + ifg.get_action_rules(),
2948
+ script=ifg.script,
2949
+ script_data_in=ifg.script_data_in or "direct",
2950
+ script_data_out=ifg.script_data_out or "direct",
2951
+ script_exe=script_exe,
2952
+ script_pass_env_spec=ifg.script_pass_env_spec,
2953
+ jinja_template=ifg.jinja_template,
2954
+ jinja_template_path=ifg.jinja_template_path,
2955
+ abortable=ifg.abortable,
2956
+ requires_dir=ifg.requires_dir,
2957
+ )
2958
+ act_i._task_schema = self.task_schema
2959
+ if ifg.input_file not in inp_files:
2960
+ inp_files.append(ifg.input_file)
2961
+ act_i.process_action_data_formats()
2962
+ act_i._from_expand = True
2963
+ inp_acts.append(act_i)
2964
+
2965
+ out_files: list[FileSpec] = []
2966
+ out_acts: list[Action] = []
2967
+ for ofp in self.output_file_parsers:
2968
+ script_exe = "python_script"
2969
+ exe = f"<<executable:{script_exe}>>"
2970
+ variables = script_cmd_vars if ofp.script else {}
2971
+ act_i = self._app.Action(
2972
+ commands=[self._app.Command(executable=exe, variables=variables)],
2973
+ output_file_parsers=[ofp],
2974
+ environments=[self.get_output_file_parser_action_env(ofp)],
2975
+ rules=list(self.rules) + ofp.get_action_rules(),
2976
+ script=ofp.script,
2977
+ script_data_in="direct",
2978
+ script_data_out="direct",
2979
+ script_exe=script_exe,
2980
+ script_pass_env_spec=ofp.script_pass_env_spec,
2981
+ abortable=ofp.abortable,
2982
+ requires_dir=ofp.requires_dir,
2983
+ )
2984
+ act_i._task_schema = self.task_schema
2985
+ for j in ofp.output_files:
2986
+ if j not in out_files:
2987
+ out_files.append(j)
2988
+ act_i.process_action_data_formats()
2989
+ act_i._from_expand = True
2990
+ out_acts.append(act_i)
2991
+
2992
+ commands = self.commands
2993
+ if self.script:
2994
+ commands += [
2995
+ self._app.Command(
2996
+ executable=f"<<executable:{self.script_exe}>>",
2997
+ arguments=self.get_input_output_file_command_args("script"),
2998
+ variables=script_cmd_vars,
2999
+ )
3000
+ ]
3001
+
3002
+ if self.has_program:
3003
+ variables = {
3004
+ "program_name": f"${app_caps}_RUN_PROGRAM_NAME",
3005
+ "program_name_no_ext": f"${app_caps}_RUN_PROGRAM_NAME_NO_EXT",
3006
+ "program_dir": f"${app_caps}_RUN_PROGRAM_DIR",
3007
+ "program_path": f"${app_caps}_RUN_PROGRAM_PATH",
3008
+ }
3009
+ commands += [
3010
+ self._app.Command(
3011
+ executable=f"<<executable:{self.program_exe}>>",
3012
+ arguments=self.get_input_output_file_command_args("program"),
3013
+ variables=variables,
3014
+ )
3015
+ ]
3016
+
3017
+ # TODO: store script_args? and build command with executable syntax?
3018
+ main_act = self._app.Action(
3019
+ commands=commands,
3020
+ script=self.script,
3021
+ script_data_in=self.script_data_in,
3022
+ script_data_out=self.script_data_out,
3023
+ script_exe=self.script_exe,
3024
+ script_pass_env_spec=self.script_pass_env_spec,
3025
+ jinja_template=self.jinja_template,
3026
+ jinja_template_path=self.jinja_template_path,
3027
+ program=self.program,
3028
+ program_path=self.program_path,
3029
+ program_exe=self.program_exe,
3030
+ program_data_in=self.program_data_in,
3031
+ program_data_out=self.program_data_out,
3032
+ environments=[self.get_commands_action_env()],
3033
+ abortable=self.abortable,
3034
+ rules=main_rules,
3035
+ input_files=inp_files,
3036
+ output_files=out_files,
3037
+ save_files=self.save_files,
3038
+ clean_up=self.clean_up,
3039
+ requires_dir=self.requires_dir,
3040
+ )
3041
+ main_act._task_schema = self.task_schema
3042
+ main_act._from_expand = True
3043
+ main_act.process_action_data_formats()
3044
+
3045
+ return [*inp_acts, main_act, *out_acts]
3046
+
3047
+ # note: we use "parameter" rather than "input", because it could be a schema input
3048
+ # or schema output.
3049
+ __PARAMS_RE: ClassVar[Pattern] = re.compile(
3050
+ r"\<\<(?:\w+(?:\[(?:.*)\])?\()?parameter:(.*?)\)?\>\>"
3051
+ )
3052
+
3053
+ def get_command_input_types(self, sub_parameters: bool = False) -> tuple[str, ...]:
3054
+ """Get parameter types from commands.
3055
+
3056
+ Parameters
3057
+ ----------
3058
+ sub_parameters:
3059
+ If True, sub-parameters (i.e. dot-delimited parameter types) will be returned
3060
+ untouched. If False (default), only return the root parameter type and
3061
+ disregard the sub-parameter part.
3062
+ """
3063
+ params: set[str] = set()
3064
+ for command in self.commands:
3065
+ params.update(
3066
+ val[1] if sub_parameters else val[1].split(".")[0]
3067
+ for val in self.__PARAMS_RE.finditer(command.command or "")
3068
+ )
3069
+ for arg in command.arguments or ():
3070
+ params.update(
3071
+ val[1] if sub_parameters else val[1].split(".")[0]
3072
+ for val in self.__PARAMS_RE.finditer(arg)
3073
+ )
3074
+ # TODO: consider stdin?
3075
+ return tuple(params)
3076
+
3077
+ __FILES_RE: ClassVar[Pattern] = re.compile(r"\<\<file:(.*?)\>\>")
3078
+
3079
+ def get_command_file_labels(self) -> tuple[str, ...]:
3080
+ """Get input files types from commands."""
3081
+ files: set[str] = set()
3082
+ for command in self.commands:
3083
+ files.update(self.__FILES_RE.findall(command.command or ""))
3084
+ for arg in command.arguments or ():
3085
+ files.update(self.__FILES_RE.findall(arg))
3086
+ # TODO: consider stdin?
3087
+ return tuple(files)
3088
+
3089
+ def get_command_output_types(self) -> tuple[str, ...]:
3090
+ """Get parameter types from command stdout and stderr arguments."""
3091
+ params: set[str] = set()
3092
+ for command in self.commands:
3093
+ out_params = command.get_output_types()
3094
+ if out_params["stdout"]:
3095
+ params.add(out_params["stdout"])
3096
+ if out_params["stderr"]:
3097
+ params.add(out_params["stderr"])
3098
+ return tuple(params)
3099
+
3100
+ def get_command_parameter_types(
3101
+ self, sub_parameters: bool = False
3102
+ ) -> tuple[str, ...]:
3103
+ """Get all parameter types that appear in the commands of this action.
3104
+
3105
+ Parameters
3106
+ ----------
3107
+ sub_parameters
3108
+ If True, sub-parameter inputs (i.e. dot-delimited input types) will be
3109
+ returned untouched. If False (default), only return the root parameter type
3110
+ and disregard the sub-parameter part.
3111
+ """
3112
+ # TODO: not sure if we need `input_files`
3113
+ return tuple(
3114
+ f"inputs.{i}" for i in self.get_command_input_types(sub_parameters)
3115
+ ) + tuple(f"input_files.{i}" for i in self.get_command_file_labels())
3116
+
3117
+ @property
3118
+ def has_main_script_or_program(self) -> bool:
3119
+ return bool(
3120
+ self.has_program
3121
+ or (
3122
+ self.script
3123
+ if not self._from_expand
3124
+ else self.script
3125
+ and not self.input_file_generators
3126
+ and not self.output_file_parsers
3127
+ )
3128
+ )
3129
+
3130
+ def _get_jinja_template_input_types(self) -> set[str]:
3131
+ try:
3132
+ path = self.get_jinja_template_resolved_path()
3133
+ except ValueError:
3134
+ # TODO: also include here any inputs that appear as variable substitutions
3135
+ # in the path?
3136
+ # path might have as yet unsubstituted variables:
3137
+ if ifgs := self.input_file_generators:
3138
+ # can use inputs of IFP:
3139
+ return set(inp.typ for inp in ifgs[0].inputs)
3140
+ else:
3141
+ # TODO: could use script_data_in, but should be template->data in:
3142
+ # for now assume all schema input types
3143
+ return set(self.task_schema.input_types)
3144
+ else:
3145
+ return self.get_jinja_template_inputs(path, include_prefix=False)
3146
+
3147
+ def get_input_types(self, sub_parameters: bool = False) -> tuple[str, ...]:
3148
+ """Get the input types that are consumed by commands and input file generators of
3149
+ this action.
3150
+
3151
+ Parameters
3152
+ ----------
3153
+ sub_parameters:
3154
+ If True, sub-parameters (i.e. dot-delimited parameter types) in command line
3155
+ inputs will be returned untouched. If False (default), only return the root
3156
+ parameter type and disregard the sub-parameter part.
3157
+ """
3158
+ if self.has_main_script_or_program:
3159
+ # TODO: refine this according to `script_data_in/program_data_in`, since this
3160
+ # can be used to control the inputs/outputs of a script/program.
3161
+ params = set(self.task_schema.input_types)
3162
+ else:
3163
+ in_lab_map = self.task_schema.input_type_labels_map
3164
+ params = set(self.get_command_input_types(sub_parameters))
3165
+ for ifg in self.input_file_generators:
3166
+ params.update(
3167
+ lab_j for inp in ifg.inputs for lab_j in in_lab_map[inp.typ]
3168
+ )
3169
+ for ofp in self.output_file_parsers:
3170
+ params.update(
3171
+ lab_j
3172
+ for inp_typ in (ofp.inputs or ())
3173
+ for lab_j in in_lab_map[inp_typ]
3174
+ )
3175
+
3176
+ if self.jinja_template:
3177
+ params.update(self._get_jinja_template_input_types())
3178
+ return tuple(params)
3179
+
3180
+ def get_output_types(self) -> tuple[str, ...]:
3181
+ """Get the output types that are produced by command standard outputs and errors,
3182
+ and by output file parsers of this action."""
3183
+ if self.has_main_script_or_program:
3184
+ params = set(self.task_schema.output_types)
3185
+ # TODO: refine this according to `script_data_out`, since this can be used
3186
+ # to control the inputs/outputs of a script.
3187
+ else:
3188
+ params = set(self.get_command_output_types())
3189
+ for ofp in self.output_file_parsers:
3190
+ if ofp.output is not None:
3191
+ params.add(ofp.output.typ)
3192
+ params.update(ofp.outputs or ())
3193
+ return tuple(params)
3194
+
3195
+ def get_input_file_labels(self) -> tuple[str, ...]:
3196
+ """
3197
+ Get the labels from the input files.
3198
+ """
3199
+ return tuple(in_f.label for in_f in self.input_files)
3200
+
3201
+ def get_output_file_labels(self) -> tuple[str, ...]:
3202
+ """
3203
+ Get the labels from the output files.
3204
+ """
3205
+ return tuple(out_f.label for out_f in self.output_files)
3206
+
3207
+ @TimeIt.decorator
3208
+ def generate_data_index(
3209
+ self,
3210
+ act_idx: int,
3211
+ EAR_ID: int,
3212
+ schema_data_idx: DataIndex,
3213
+ all_data_idx: dict[tuple[int, int], DataIndex],
3214
+ workflow: Workflow,
3215
+ param_source: ParamSource,
3216
+ ) -> list[int | list[int]]:
3217
+ """Generate the data index for this action of an element iteration whose overall
3218
+ data index is passed.
3219
+
3220
+ This mutates `all_data_idx`.
3221
+ """
3222
+
3223
+ # output keys must be processed first for this to work, since when processing an
3224
+ # output key, we may need to update the index of an output in a previous action's
3225
+ # data index, which could affect the data index in an input of this action.
3226
+ keys = [f"outputs.{typ}" for typ in self.get_output_types()]
3227
+ keys.extend(f"inputs.{typ}" for typ in self.get_input_types())
3228
+ keys.extend(f"input_files.{file.label}" for file in self.input_files)
3229
+ keys.extend(f"output_files.{file.label}" for file in self.output_files)
3230
+
3231
+ # these are consumed by the OFP, so should not be considered to generate new data:
3232
+ OFP_outs = {j for ofp in self.output_file_parsers for j in ofp.outputs or ()}
3233
+
3234
+ # keep all resources and repeats data:
3235
+ sub_data_idx = {
3236
+ k: v
3237
+ for k, v in schema_data_idx.items()
3238
+ if ("resources" in k or "repeats" in k)
3239
+ }
3240
+ param_src_update: list[int | list[int]] = []
3241
+ for key in keys:
3242
+ sub_param_idx: dict[str, int | list[int]] = {}
3243
+ if (
3244
+ key.startswith("input_files")
3245
+ or key.startswith("output_files")
3246
+ or key.startswith("inputs")
3247
+ or (
3248
+ key.startswith("outputs") and key.removeprefix("outputs.") in OFP_outs
3249
+ )
3250
+ ):
3251
+ # look for an index in previous data indices (where for inputs we look
3252
+ # for *output* parameters of the same name):
3253
+ k_idx: int | list[int] | None = None
3254
+ for prev_data_idx in all_data_idx.values():
3255
+ if key.startswith("inputs"):
3256
+ k_param = key.removeprefix("inputs.")
3257
+ k_out = f"outputs.{k_param}"
3258
+ if k_out in prev_data_idx:
3259
+ k_idx = prev_data_idx[k_out]
3260
+ elif key in prev_data_idx:
3261
+ k_idx = prev_data_idx[key]
3262
+
3263
+ if k_idx is None:
3264
+ # otherwise take from the schema_data_idx:
3265
+ if key in schema_data_idx:
3266
+ k_idx = schema_data_idx[key]
3267
+ prefix = f"{key}." # sub-parameter (note dot)
3268
+ # add any associated sub-parameters:
3269
+ sub_param_idx.update(
3270
+ (k, v)
3271
+ for k, v in schema_data_idx.items()
3272
+ if k.startswith(prefix)
3273
+ )
3274
+ else:
3275
+ # otherwise we need to allocate a new parameter datum:
3276
+ # (for input/output_files keys)
3277
+ k_idx = workflow._add_unset_parameter_data(param_source)
3278
+
3279
+ else:
3280
+ # outputs
3281
+ k_idx = None
3282
+ for (_, EAR_ID_i), prev_data_idx in all_data_idx.items():
3283
+ if key in prev_data_idx:
3284
+ k_idx = prev_data_idx[key]
3285
+
3286
+ # allocate a new parameter datum for this intermediate output:
3287
+ param_source_i = copy.copy(param_source)
3288
+ param_source_i["EAR_ID"] = EAR_ID_i
3289
+ new_k_idx = workflow._add_unset_parameter_data(param_source_i)
3290
+
3291
+ # mutate `all_data_idx`:
3292
+ prev_data_idx[key] = new_k_idx
3293
+
3294
+ if k_idx is None:
3295
+ # otherwise take from the schema_data_idx:
3296
+ k_idx = schema_data_idx[key]
3297
+
3298
+ # can now set the EAR/act idx in the associated parameter source
3299
+ param_src_update.append(k_idx)
3300
+
3301
+ sub_data_idx[key] = k_idx
3302
+ sub_data_idx.update(sub_param_idx)
3303
+
3304
+ all_data_idx[act_idx, EAR_ID] = sub_data_idx
3305
+
3306
+ return param_src_update
3307
+
3308
+ def get_possible_scopes(self) -> tuple[ActionScope, ...]:
3309
+ """Get the action scopes that are inclusive of this action, ordered by decreasing
3310
+ specificity."""
3311
+
3312
+ scope = self.get_precise_scope()
3313
+ if self.input_file_generators:
3314
+ return (
3315
+ scope,
3316
+ self._app.ActionScope.input_file_generator(),
3317
+ self._app.ActionScope.processing(),
3318
+ self._app.ActionScope.any(),
3319
+ )
3320
+ elif self.output_file_parsers:
3321
+ return (
3322
+ scope,
3323
+ self._app.ActionScope.output_file_parser(),
3324
+ self._app.ActionScope.processing(),
3325
+ self._app.ActionScope.any(),
3326
+ )
3327
+ else:
3328
+ return (scope, self._app.ActionScope.any())
3329
+
3330
+ def _get_possible_scopes_reversed(self) -> Iterator[ActionScope]:
3331
+ """Get the action scopes that are inclusive of this action, ordered by increasing
3332
+ specificity."""
3333
+
3334
+ # Fail early if a failure is possible
3335
+ precise_scope = self.get_precise_scope()
3336
+ yield self._app.ActionScope.any()
3337
+ if self.input_file_generators:
3338
+ yield self._app.ActionScope.processing()
3339
+ yield self._app.ActionScope.input_file_generator()
3340
+ elif self.output_file_parsers:
3341
+ yield self._app.ActionScope.processing()
3342
+ yield self._app.ActionScope.output_file_parser()
3343
+ yield precise_scope
3344
+
3345
+ def get_precise_scope(self) -> ActionScope:
3346
+ """
3347
+ Get the exact scope of this action.
3348
+ The action must have been expanded prior to calling this.
3349
+ """
3350
+ if not self._from_expand:
3351
+ raise RuntimeError(
3352
+ "Precise scope cannot be unambiguously defined until the Action has been "
3353
+ "expanded."
3354
+ )
3355
+
3356
+ if self.input_file_generators:
3357
+ return self._app.ActionScope.input_file_generator(
3358
+ file=self.input_file_generators[0].input_file.label
3359
+ )
3360
+ elif self.output_file_parsers:
3361
+ if self.output_file_parsers[0].output is not None:
3362
+ return self._app.ActionScope.output_file_parser(
3363
+ output=self.output_file_parsers[0].output
3364
+ )
3365
+ else:
3366
+ return self._app.ActionScope.output_file_parser()
3367
+ else:
3368
+ return self._app.ActionScope.main()
3369
+
3370
+ def is_input_type_required(
3371
+ self, typ: str, provided_files: Container[FileSpec]
3372
+ ) -> bool:
3373
+ """
3374
+ Determine if the given input type is required by this action.
3375
+ """
3376
+ # TODO: for now assume a script takes all inputs
3377
+ if self.has_main_script_or_program:
3378
+ return True
3379
+
3380
+ # typ is required if is appears in any command:
3381
+ if typ in self.get_command_input_types():
3382
+ return True
3383
+
3384
+ # typ is required if used in any input file generators and input file is not
3385
+ # provided:
3386
+ in_lab_map = self.task_schema.input_type_labels_map
3387
+ for ifg in self.input_file_generators:
3388
+ if typ in (
3389
+ lab_typ for inp in ifg.inputs for lab_typ in in_lab_map[inp.typ]
3390
+ ) and (ifg.input_file not in provided_files):
3391
+ return True
3392
+
3393
+ # typ is required if it is in the set of Jinja template undeclared variables
3394
+ if self.jinja_template:
3395
+ if typ in self._get_jinja_template_input_types():
3396
+ return True
3397
+
3398
+ # typ is required if used in any output file parser
3399
+ return any(
3400
+ typ in in_lab_map[inp_typ]
3401
+ for ofp in self.output_file_parsers
3402
+ for inp_typ in (ofp.inputs or ())
3403
+ )
3404
+
3405
+ @TimeIt.decorator
3406
+ def test_rules(self, element_iter: ElementIteration) -> tuple[bool, list[int]]:
3407
+ """Test all rules against the specified element iteration."""
3408
+ if any(not rule.test(element_iteration=element_iter) for rule in self.rules):
3409
+ return False, []
3410
+ return True, [
3411
+ cmd_idx
3412
+ for cmd_idx, cmd in enumerate(self.commands)
3413
+ if all(rule.test(element_iteration=element_iter) for rule in cmd.rules)
3414
+ ]
3415
+
3416
+ @TimeIt.decorator
3417
+ def get_required_executables(self) -> Iterator[str]:
3418
+ """Return executable labels required by this action."""
3419
+ for command in self.commands:
3420
+ yield from command.get_required_executables()
3421
+
3422
+ def compose_source(self, snip_path: Path) -> str:
3423
+ """Generate the file contents of this source."""
3424
+
3425
+ script_name = snip_path.name
3426
+ with snip_path.open("rt") as fp:
3427
+ script_str = fp.read()
3428
+
3429
+ if not self.script_is_python_snippet:
3430
+ return script_str
3431
+
3432
+ if self.is_OFP and self.output_file_parsers[0].output is None:
3433
+ # might be used just for saving files:
3434
+ return ""
3435
+
3436
+ app_caps = self._app.package_name.upper()
3437
+ py_imports = dedent(
3438
+ """\
3439
+ import argparse
3440
+ import os
3441
+ from pathlib import Path
3442
+
3443
+ import {app_module} as app
3444
+
3445
+ std_path = os.getenv("{app_caps}_RUN_STD_PATH")
3446
+ log_path = os.getenv("{app_caps}_RUN_LOG_PATH")
3447
+ run_id = int(os.getenv("{app_caps}_RUN_ID"))
3448
+ wk_path = os.getenv("{app_caps}_WK_PATH")
3449
+
3450
+ with app.redirect_std_to_file(std_path):
3451
+
3452
+ """
3453
+ ).format(app_module=self._app.module, app_caps=app_caps)
3454
+
3455
+ # we must load the workflow (must be python):
3456
+ # (note: we previously only loaded the workflow if there were any direct inputs
3457
+ # or outputs; now we always load so we can use the method
3458
+ # `get_py_script_func_kwargs`)
3459
+ py_main_block_workflow_load = dedent(
3460
+ """\
3461
+ app.load_config(
3462
+ log_file_path=Path(log_path),
3463
+ config_dir=r"{cfg_dir}",
3464
+ config_key=r"{cfg_invoc_key}",
3465
+ )
3466
+ wk = app.Workflow(wk_path)
3467
+ EAR = wk.get_EARs_from_IDs([run_id])[0]
3468
+ """
3469
+ ).format(
3470
+ cfg_dir=self._app.config.config_directory,
3471
+ cfg_invoc_key=self._app.config.config_key,
3472
+ app_caps=app_caps,
3473
+ )
3474
+
3475
+ tab_indent = " "
3476
+ tab_indent_2 = 2 * tab_indent
3477
+
3478
+ func_kwargs_str = dedent(
3479
+ """\
3480
+ blk_act_key = (
3481
+ os.environ["{app_caps}_JS_IDX"],
3482
+ os.environ["{app_caps}_BLOCK_IDX"],
3483
+ os.environ["{app_caps}_BLOCK_ACT_IDX"],
3484
+ )
3485
+ with EAR.raise_on_failure_threshold() as unset_params:
3486
+ func_kwargs = EAR.get_py_script_func_kwargs(
3487
+ raise_on_unset=False,
3488
+ add_script_files=True,
3489
+ blk_act_key=blk_act_key,
3490
+ )
3491
+ """
3492
+ ).format(app_caps=app_caps)
3493
+
3494
+ script_main_func = Path(script_name).stem
3495
+ func_invoke_str = f"{script_main_func}(**func_kwargs)"
3496
+ if not self.is_OFP and "direct" in self.script_data_out_grouped:
3497
+ py_main_block_invoke = f"outputs = {func_invoke_str}"
3498
+ py_main_block_outputs = dedent(
3499
+ """\
3500
+ with app.redirect_std_to_file(std_path):
3501
+ for name_i, out_i in outputs.items():
3502
+ wk.set_parameter_value(param_id=EAR.data_idx[f"outputs.{name_i}"], value=out_i)
3503
+ """
3504
+ )
3505
+ elif self.is_OFP:
3506
+ py_main_block_invoke = f"output = {func_invoke_str}"
3507
+ assert self.output_file_parsers[0].output
3508
+ py_main_block_outputs = dedent(
3509
+ """\
3510
+ with app.redirect_std_to_file(std_path):
3511
+ wk.save_parameter(name="outputs.{output_typ}", value=output, EAR_ID=run_id)
3512
+ """
3513
+ ).format(output_typ=self.output_file_parsers[0].output.typ)
3514
+ else:
3515
+ py_main_block_invoke = func_invoke_str
3516
+ py_main_block_outputs = ""
3517
+
3518
+ wk_load = (
3519
+ "\n" + indent(py_main_block_workflow_load, tab_indent_2)
3520
+ if py_main_block_workflow_load
3521
+ else ""
3522
+ )
3523
+ py_main_block = dedent(
3524
+ """\
3525
+ if __name__ == "__main__":
3526
+ {py_imports}{wk_load}
3527
+ {func_kwargs}
3528
+ {invoke}
3529
+ {outputs}
3530
+ """
3531
+ ).format(
3532
+ py_imports=indent(py_imports, tab_indent),
3533
+ wk_load=wk_load,
3534
+ func_kwargs=indent(func_kwargs_str, tab_indent_2),
3535
+ invoke=indent(py_main_block_invoke, tab_indent),
3536
+ outputs=indent(dedent(py_main_block_outputs), tab_indent),
3537
+ )
3538
+
3539
+ out = dedent(
3540
+ """\
3541
+ {script_str}
3542
+ {main_block}
3543
+ """
3544
+ ).format(
3545
+ script_str=script_str,
3546
+ main_block=py_main_block,
3547
+ )
3548
+
3549
+ return out
3550
+
3551
+ def get_parameter_names(self, prefix: str) -> list[str]:
3552
+ """Get parameter types associated with a given prefix.
3553
+
3554
+ For example, with the prefix "inputs", this would return `['p1', 'p2']` for an
3555
+ action that has input types `p1` and `p2`. For inputs, labels are ignored. For
3556
+ example, for an action that accepts two inputs of the same type `p1`, with labels
3557
+ `one` and `two`, this method would return (for the "inputs" prefix):
3558
+ `['p1[one]', 'p1[two]']`.
3559
+
3560
+ This method is distinct from `TaskSchema.get_parameter_names` in that it
3561
+ returns action-level input/output/file types/labels, whereas
3562
+ `TaskSchema.get_parameter_names` returns schema-level inputs/outputs.
3563
+
3564
+ Parameters
3565
+ ----------
3566
+ prefix
3567
+ One of "inputs", "outputs", "input_files", "output_files".
3568
+
3569
+ """
3570
+ if prefix == "inputs":
3571
+ single_lab_lookup = self.task_schema._get_single_label_lookup()
3572
+ return [single_lab_lookup.get(i, i) for i in self.get_input_types()]
3573
+ elif prefix == "outputs":
3574
+ return list(self.get_output_types())
3575
+ elif prefix == "input_files":
3576
+ return list(self.get_input_file_labels())
3577
+ elif prefix == "output_files":
3578
+ return list(self.get_output_file_labels())
3579
+ else:
3580
+ raise ValueError(f"unexpected prefix: {prefix}")
3581
+
3582
+ def get_prefixed_data_names(self) -> Mapping[str, list[str]]:
3583
+ return {
3584
+ "inputs": [f"inputs.{inp}" for inp in self.get_parameter_names("inputs")],
3585
+ "outputs": [f"outputs.{out}" for out in self.get_parameter_names("outputs")],
3586
+ "input_files": [
3587
+ f"input_files.{in_file}"
3588
+ for in_file in self.get_parameter_names("input_files")
3589
+ ],
3590
+ "output_files": [
3591
+ f"output_files.{out_file}"
3592
+ for out_file in self.get_parameter_names("output_files")
3593
+ ],
3594
+ }
3595
+
3596
+ def get_prefixed_data_names_flat(self) -> list[str]:
3597
+ return list(chain.from_iterable(self.get_prefixed_data_names().values()))
3598
+
3599
+ def get_commands_file_hash(
3600
+ self, data_idx: DataIndex, action_idx: int, env_spec_hashable: tuple = ()
3601
+ ) -> int:
3602
+ """Get a hash that can be used to group together runs that will have the same
3603
+ commands file.
3604
+
3605
+ This hash is not stable across sessions or machines.
3606
+
3607
+ """
3608
+ # TODO: support <<resource:RESOURCE_NAME>> in commands, and reflect that here
3609
+ # TODO: support <<parameter:PARAMETER_NAME>> and <<resource:RESOURCE_NAME>> in
3610
+ # environment setup and executable commands, and reflect that here
3611
+
3612
+ # filter data index by input parameters that appear in the commands, or are used in
3613
+ # rules in conditional commands:
3614
+ param_types = self.get_command_parameter_types()
3615
+
3616
+ relevant_paths: list[str] = []
3617
+ for i in param_types:
3618
+ relevant_paths.extend(
3619
+ list(WorkflowTask._get_relevant_paths(data_idx, i.split(".")).keys())
3620
+ )
3621
+
3622
+ # hash any relevant data index from rule path
3623
+ for cmd in self.commands:
3624
+ for act_rule in cmd.rules:
3625
+ rule_path = act_rule.rule.path
3626
+ assert rule_path
3627
+ rule_path_split = rule_path.split(".")
3628
+ if rule_path.startswith("resources."):
3629
+ # include all resource paths for now:
3630
+ relevant_paths.extend(
3631
+ list(
3632
+ WorkflowTask._get_relevant_paths(
3633
+ data_idx, ["resources"]
3634
+ ).keys()
3635
+ )
3636
+ )
3637
+ else:
3638
+ relevant_paths.extend(
3639
+ list(
3640
+ WorkflowTask._get_relevant_paths(
3641
+ data_idx, rule_path_split
3642
+ ).keys()
3643
+ )
3644
+ )
3645
+
3646
+ # note we don't need to consider action-level rules, since these determine
3647
+ # whether a run will be included in a submission or not; this method is only
3648
+ # called on runs that are part of a submission, at which point action-level rules
3649
+ # are irrelevant.
3650
+
3651
+ relevant_data_idx = {k: v for k, v in data_idx.items() if k in relevant_paths}
3652
+
3653
+ try:
3654
+ schema_name = self.task_schema.name
3655
+ except AssertionError:
3656
+ # allows for testing without making a schema
3657
+ schema_name = ""
3658
+
3659
+ return get_hash(
3660
+ (
3661
+ schema_name,
3662
+ action_idx,
3663
+ relevant_data_idx,
3664
+ env_spec_hashable,
3665
+ )
3666
+ )
3667
+
3668
+ @classmethod
3669
+ def get_block_act_idx_shell_vars(cls) -> BlockActionKey:
3670
+ """Return a the jobscript index, block index, and block action idx shell
3671
+ environment variable names formatted for shell substitution.
3672
+
3673
+ Notes
3674
+ -----
3675
+ This seem so be shell-agnostic, at least for those currently supported.
3676
+
3677
+ """
3678
+ app_caps = cls._app.package_name.upper()
3679
+ return (
3680
+ f"${{{app_caps}_JS_IDX}}",
3681
+ f"${{{app_caps}_BLOCK_IDX}}",
3682
+ f"${{{app_caps}_BLOCK_ACT_IDX}}",
3683
+ )
3684
+
3685
+ def get_input_output_file_paths(
3686
+ self,
3687
+ type: Literal["script", "program"],
3688
+ block_act_key: BlockActionKey,
3689
+ directory: Path | None = None,
3690
+ ) -> dict[str, dict[str, Path]]:
3691
+ """Get the names (as `Path`s) of script or program input and output files for this
3692
+ action."""
3693
+ in_out_paths: dict[str, dict[str, Path]] = {
3694
+ "inputs": {},
3695
+ "outputs": {},
3696
+ }
3697
+ dat_in_grp = {
3698
+ "script": self.script_data_in_grouped,
3699
+ "program": self.program_data_in_grouped,
3700
+ }
3701
+ dat_out_grp = {
3702
+ "script": self.script_data_out_grouped,
3703
+ "program": self.program_data_out_grouped,
3704
+ }
3705
+
3706
+ for fmt in dat_in_grp[type]:
3707
+ if fmt == "json":
3708
+ path = self.get_param_dump_file_path_JSON(
3709
+ block_act_key, directory=directory
3710
+ )
3711
+ elif fmt == "hdf5":
3712
+ path = self.get_param_dump_file_path_HDF5(
3713
+ block_act_key, directory=directory
3714
+ )
3715
+ else:
3716
+ continue
3717
+ in_out_paths["inputs"][fmt] = path
3718
+
3719
+ for fmt in dat_out_grp[type]:
3720
+ if fmt == "json":
3721
+ path = self.get_param_load_file_path_JSON(
3722
+ block_act_key, directory=directory
3723
+ )
3724
+ elif fmt == "hdf5":
3725
+ path = self.get_param_load_file_path_HDF5(
3726
+ block_act_key, directory=directory
3727
+ )
3728
+ else:
3729
+ continue
3730
+ in_out_paths["outputs"][fmt] = path
3731
+
3732
+ return in_out_paths
3733
+
3734
+ def get_input_output_file_command_args(
3735
+ self, type: Literal["script", "program"]
3736
+ ) -> list[str]:
3737
+ """Get the script or program input and output file names as command line
3738
+ arguments."""
3739
+ in_out_names = self.get_input_output_file_paths(
3740
+ type, self.get_block_act_idx_shell_vars()
3741
+ )
3742
+ args: list[str] = []
3743
+ for fmt, path in in_out_names["inputs"].items():
3744
+ if self.data_files_use_opt:
3745
+ args.append(f"--inputs-{fmt}")
3746
+ args.append(str(path))
3747
+ for fmt, path in in_out_names["outputs"].items():
3748
+ if self.data_files_use_opt:
3749
+ args.append(f"--outputs-{fmt}")
3750
+ args.append(str(path))
3751
+
3752
+ return args
3753
+
3754
+ def get_jinja_template_resolved_path(self, path: str | None = None) -> Path:
3755
+ """
3756
+ Return the file system path to the associated Jinja template if there is one.
3757
+
3758
+ Parameters
3759
+ ----------
3760
+ path
3761
+ The path might include variable substitutions, in which case the builtin or
3762
+ external path with all substitutions can be provided by this argument.
3763
+
3764
+ Notes
3765
+ -----
3766
+ In the case where there are no variable substitutions in the (builtin key or
3767
+ external) path to the Jinja template file, this method will resolve the real file
3768
+ system path correctly without needing the `path` argument. However, if there are
3769
+ variable substitutions, then the substituted version of the (builtin key or
3770
+ external) path must be provided via the `path` argument.
3771
+
3772
+ """
3773
+ if path := path or self.jinja_template_or_template_path:
3774
+ try:
3775
+ resolved = (
3776
+ self._app.jinja_templates[path] if self.jinja_template else Path(path)
3777
+ )
3778
+ assert resolved.is_file()
3779
+ return resolved
3780
+ except (KeyError, AssertionError):
3781
+ via_msg = "a builtin path" if self.jinja_template else "an external path"
3782
+ raise ValueError(
3783
+ f"Jinja template specified at via {via_msg} ({path!r}) is not a file."
3784
+ )
3785
+ else:
3786
+ raise ValueError("No associated Jinja template.")
3787
+
3788
+ @staticmethod
3789
+ def _get_jinja_env_obj(path: Path) -> JinjaEnvironment:
3790
+ """
3791
+ Load the Jinja environment object using a file system loader for the parent
3792
+ directory of the specified path.
3793
+
3794
+ Parameters
3795
+ ----------
3796
+ path
3797
+ The actual path to the Jinja template file.
3798
+ """
3799
+ return JinjaEnvironment(loader=JinjaFileSystemLoader(path.parent))
3800
+
3801
+ @classmethod
3802
+ def _get_jinja_template_obj(cls, path: Path) -> JinjaTemplate:
3803
+ """
3804
+ Load the Jinja template object for the specified Jinja template.
3805
+
3806
+ Parameters
3807
+ ----------
3808
+ path
3809
+ The actual path to the Jinja template file.
3810
+ """
3811
+ return cls._get_jinja_env_obj(path).get_template(path.name)
3812
+
3813
+ @classmethod
3814
+ def _get_jinja_template_inputs(cls, path: Path) -> set[str]:
3815
+ """
3816
+ Retrieve the set of undeclared inputs in the specified Jinja template.
3817
+
3818
+ Parameters
3819
+ ----------
3820
+ path
3821
+ The actual path to the Jinja template file.
3822
+ """
3823
+ jinja_env = cls._get_jinja_env_obj(path)
3824
+ loader = jinja_env.loader
3825
+ assert loader
3826
+ source = loader.get_source(jinja_env, path.name)[0]
3827
+ parsed = jinja_env.parse(source)
3828
+ return jinja_meta.find_undeclared_variables(parsed)
3829
+
3830
+ def get_jinja_template_inputs(
3831
+ self, path: Path, include_prefix: bool = False
3832
+ ) -> set[str]:
3833
+ """
3834
+ Retrieve the set of undeclared inputs in Jinja template associated with this
3835
+ action, if there is one.
3836
+
3837
+ Parameters
3838
+ ----------
3839
+ path
3840
+ The actual path to the Jinja template file.
3841
+ """
3842
+
3843
+ return set(
3844
+ f"inputs.{inp}" if include_prefix else inp
3845
+ for inp in self._get_jinja_template_inputs(path)
3846
+ )
3847
+
3848
+ def render_jinja_template(self, input_vals: Mapping[str, Any], path: Path) -> str:
3849
+ """
3850
+ Render the Jinja template associated with this action, if there is one.
3851
+
3852
+ Parameters
3853
+ ----------
3854
+ path
3855
+ The actual path to the Jinja template file.
3856
+ """
3857
+ return self._get_jinja_template_obj(path).render(**input_vals)