hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
  2. hpcflow/_version.py +1 -1
  3. hpcflow/data/scripts/bad_script.py +2 -0
  4. hpcflow/data/scripts/do_nothing.py +2 -0
  5. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  6. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  7. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  8. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  11. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  12. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  13. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  15. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  16. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  23. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  24. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  25. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  26. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  27. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  28. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  29. hpcflow/data/scripts/script_exit_test.py +5 -0
  30. hpcflow/data/template_components/environments.yaml +1 -1
  31. hpcflow/sdk/__init__.py +5 -0
  32. hpcflow/sdk/app.py +150 -89
  33. hpcflow/sdk/cli.py +263 -84
  34. hpcflow/sdk/cli_common.py +99 -5
  35. hpcflow/sdk/config/callbacks.py +38 -1
  36. hpcflow/sdk/config/config.py +102 -13
  37. hpcflow/sdk/config/errors.py +19 -5
  38. hpcflow/sdk/config/types.py +3 -0
  39. hpcflow/sdk/core/__init__.py +25 -1
  40. hpcflow/sdk/core/actions.py +914 -262
  41. hpcflow/sdk/core/cache.py +76 -34
  42. hpcflow/sdk/core/command_files.py +14 -128
  43. hpcflow/sdk/core/commands.py +35 -6
  44. hpcflow/sdk/core/element.py +122 -50
  45. hpcflow/sdk/core/errors.py +58 -2
  46. hpcflow/sdk/core/execute.py +207 -0
  47. hpcflow/sdk/core/loop.py +408 -50
  48. hpcflow/sdk/core/loop_cache.py +4 -4
  49. hpcflow/sdk/core/parameters.py +382 -37
  50. hpcflow/sdk/core/run_dir_files.py +13 -40
  51. hpcflow/sdk/core/skip_reason.py +7 -0
  52. hpcflow/sdk/core/task.py +119 -30
  53. hpcflow/sdk/core/task_schema.py +68 -0
  54. hpcflow/sdk/core/test_utils.py +66 -27
  55. hpcflow/sdk/core/types.py +54 -1
  56. hpcflow/sdk/core/utils.py +78 -7
  57. hpcflow/sdk/core/workflow.py +1538 -336
  58. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  59. hpcflow/sdk/demo/cli.py +7 -0
  60. hpcflow/sdk/helper/cli.py +1 -0
  61. hpcflow/sdk/log.py +42 -15
  62. hpcflow/sdk/persistence/base.py +405 -53
  63. hpcflow/sdk/persistence/json.py +177 -52
  64. hpcflow/sdk/persistence/pending.py +237 -69
  65. hpcflow/sdk/persistence/store_resource.py +3 -2
  66. hpcflow/sdk/persistence/types.py +15 -4
  67. hpcflow/sdk/persistence/zarr.py +928 -81
  68. hpcflow/sdk/submission/jobscript.py +1408 -489
  69. hpcflow/sdk/submission/schedulers/__init__.py +40 -5
  70. hpcflow/sdk/submission/schedulers/direct.py +33 -19
  71. hpcflow/sdk/submission/schedulers/sge.py +51 -16
  72. hpcflow/sdk/submission/schedulers/slurm.py +44 -16
  73. hpcflow/sdk/submission/schedulers/utils.py +7 -2
  74. hpcflow/sdk/submission/shells/base.py +68 -20
  75. hpcflow/sdk/submission/shells/bash.py +222 -129
  76. hpcflow/sdk/submission/shells/powershell.py +200 -150
  77. hpcflow/sdk/submission/submission.py +852 -119
  78. hpcflow/sdk/submission/types.py +18 -21
  79. hpcflow/sdk/typing.py +24 -5
  80. hpcflow/sdk/utils/arrays.py +71 -0
  81. hpcflow/sdk/utils/deferred_file.py +55 -0
  82. hpcflow/sdk/utils/hashing.py +16 -0
  83. hpcflow/sdk/utils/patches.py +12 -0
  84. hpcflow/sdk/utils/strings.py +33 -0
  85. hpcflow/tests/api/test_api.py +32 -0
  86. hpcflow/tests/conftest.py +19 -0
  87. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  88. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  89. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  90. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  91. hpcflow/tests/scripts/test_main_scripts.py +821 -70
  92. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  93. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  94. hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
  95. hpcflow/tests/unit/test_action.py +176 -0
  96. hpcflow/tests/unit/test_app.py +20 -0
  97. hpcflow/tests/unit/test_cache.py +46 -0
  98. hpcflow/tests/unit/test_cli.py +133 -0
  99. hpcflow/tests/unit/test_config.py +122 -1
  100. hpcflow/tests/unit/test_element_iteration.py +47 -0
  101. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  102. hpcflow/tests/unit/test_loop.py +1332 -27
  103. hpcflow/tests/unit/test_meta_task.py +325 -0
  104. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  105. hpcflow/tests/unit/test_parameter.py +13 -0
  106. hpcflow/tests/unit/test_persistence.py +190 -8
  107. hpcflow/tests/unit/test_run.py +109 -3
  108. hpcflow/tests/unit/test_run_directories.py +29 -0
  109. hpcflow/tests/unit/test_shell.py +20 -0
  110. hpcflow/tests/unit/test_submission.py +5 -76
  111. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  112. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  113. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  114. hpcflow/tests/unit/utils/test_patches.py +5 -0
  115. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  116. hpcflow/tests/workflows/__init__.py +0 -0
  117. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  118. hpcflow/tests/workflows/test_jobscript.py +332 -0
  119. hpcflow/tests/workflows/test_run_status.py +198 -0
  120. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  121. hpcflow/tests/workflows/test_submission.py +140 -0
  122. hpcflow/tests/workflows/test_workflows.py +142 -2
  123. hpcflow/tests/workflows/test_zip.py +18 -0
  124. hpcflow/viz_demo.ipynb +6587 -3
  125. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
  126. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  127. hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
  128. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  129. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  130. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Actions are base components of elements.
3
3
  Element action runs (EARs) are the basic components of any enactment;
4
- they may be grouped together within a jobscript for efficiency.
4
+ they may be grouped together within a jobscript for efficiency.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -9,6 +9,8 @@ from collections.abc import Mapping
9
9
  import copy
10
10
  from dataclasses import dataclass
11
11
  import json
12
+ import contextlib
13
+ from collections import defaultdict
12
14
  from pathlib import Path
13
15
  import re
14
16
  from textwrap import indent, dedent
@@ -20,6 +22,8 @@ from watchdog.utils.dirsnapshot import DirectorySnapshotDiff
20
22
  from hpcflow.sdk.core import ABORT_EXIT_CODE
21
23
  from hpcflow.sdk.core.app_aware import AppAware
22
24
  from hpcflow.sdk.core.enums import ActionScopeType, EARStatus
25
+ from hpcflow.sdk.core.skip_reason import SkipReason
26
+ from hpcflow.sdk.core.task import WorkflowTask
23
27
  from hpcflow.sdk.core.errors import (
24
28
  ActionEnvironmentMissingNameError,
25
29
  MissingCompatibleActionEnvironment,
@@ -27,6 +31,9 @@ from hpcflow.sdk.core.errors import (
27
31
  UnknownScriptDataKey,
28
32
  UnknownScriptDataParameter,
29
33
  UnsupportedScriptDataFormat,
34
+ UnsetParameterDataError,
35
+ UnsetParameterFractionLimitExceededError,
36
+ UnsetParameterNumberLimitExceededError,
30
37
  )
31
38
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
32
39
  from hpcflow.sdk.core.parameters import ParameterValue
@@ -35,10 +42,13 @@ from hpcflow.sdk.core.utils import (
35
42
  JSONLikeDirSnapShot,
36
43
  split_param_label,
37
44
  swap_nested_dict_keys,
45
+ get_relative_path,
38
46
  )
39
47
  from hpcflow.sdk.log import TimeIt
40
48
  from hpcflow.sdk.core.run_dir_files import RunDirAppFiles
41
49
  from hpcflow.sdk.submission.enums import SubmissionStatus
50
+ from hpcflow.sdk.submission.submission import Submission
51
+ from hpcflow.sdk.utils.hashing import get_hash
42
52
 
43
53
  if TYPE_CHECKING:
44
54
  from collections.abc import Callable, Container, Iterable, Iterator, Sequence
@@ -49,6 +59,7 @@ if TYPE_CHECKING:
49
59
  from valida.conditions import ConditionLike # type: ignore
50
60
 
51
61
  from ..typing import DataIndex, ParamSource
62
+ from ..submission.shells import Shell
52
63
  from ..submission.jobscript import Jobscript
53
64
  from .commands import Command
54
65
  from .command_files import InputFileGenerator, OutputFileParser, FileSpec
@@ -66,8 +77,34 @@ if TYPE_CHECKING:
66
77
  from .rule import Rule
67
78
  from .task import WorkflowTask
68
79
  from .task_schema import TaskSchema
69
- from .types import ParameterDependence, ScriptData
80
+ from .types import ParameterDependence, ScriptData, BlockActionKey
70
81
  from .workflow import Workflow
82
+ from .object_list import EnvironmentsList
83
+
84
+ ACTION_SCOPE_REGEX = r"(\w*)(?:\[(.*)\])?"
85
+
86
+
87
+ @dataclass
88
+ class UnsetParamTracker:
89
+ """Class to track run IDs that are the sources of unset parameter data for some input
90
+ parameter type.
91
+
92
+ Attributes
93
+ ----------
94
+ run_ids
95
+ Set of integer run IDs that have been tracked.
96
+ group_size
97
+ The size of the group, if the associated SchemaInput in question is a group.
98
+
99
+ Notes
100
+ -----
101
+ Objects of this class are instantiated within
102
+ `WorkflowTask._get_merged_parameter_data` when we are tracking unset parameters.
103
+
104
+ """
105
+
106
+ run_ids: set[int]
107
+ group_size: int
71
108
 
72
109
 
73
110
  #: Keyword arguments permitted for particular scopes.
@@ -138,11 +175,13 @@ class ElementActionRun(AppAware):
138
175
  snapshot_start: dict[str, Any] | None,
139
176
  snapshot_end: dict[str, Any] | None,
140
177
  submission_idx: int | None,
178
+ commands_file_ID: int | None,
141
179
  success: bool | None,
142
- skip: bool,
180
+ skip: int,
143
181
  exit_code: int | None,
144
182
  metadata: dict[str, Any],
145
183
  run_hostname: str | None,
184
+ port_number: int | None,
146
185
  ) -> None:
147
186
  self._id = id_
148
187
  self._is_pending = is_pending
@@ -153,6 +192,7 @@ class ElementActionRun(AppAware):
153
192
  self._start_time = start_time
154
193
  self._end_time = end_time
155
194
  self._submission_idx = submission_idx
195
+ self._commands_file_ID = commands_file_ID
156
196
  self._success = success
157
197
  self._skip = skip
158
198
  self._snapshot_start = snapshot_start
@@ -160,6 +200,7 @@ class ElementActionRun(AppAware):
160
200
  self._exit_code = exit_code
161
201
  self._metadata = metadata
162
202
  self._run_hostname = run_hostname
203
+ self._port_number = port_number
163
204
 
164
205
  # assigned on first access of corresponding properties:
165
206
  self._inputs: ElementInputs | None = None
@@ -260,6 +301,10 @@ class ElementActionRun(AppAware):
260
301
  """
261
302
  return self._run_hostname
262
303
 
304
+ @property
305
+ def port_number(self):
306
+ return self._port_number
307
+
263
308
  @property
264
309
  def start_time(self) -> datetime | None:
265
310
  """
@@ -281,6 +326,10 @@ class ElementActionRun(AppAware):
281
326
  """
282
327
  return self._submission_idx
283
328
 
329
+ @property
330
+ def commands_file_ID(self):
331
+ return self._commands_file_ID
332
+
284
333
  @property
285
334
  def success(self) -> bool | None:
286
335
  """
@@ -289,12 +338,16 @@ class ElementActionRun(AppAware):
289
338
  return self._success
290
339
 
291
340
  @property
292
- def skip(self) -> bool:
341
+ def skip(self) -> int:
293
342
  """
294
343
  Was the EAR skipped?
295
344
  """
296
345
  return self._skip
297
346
 
347
+ @property
348
+ def skip_reason(self):
349
+ return SkipReason(self.skip)
350
+
298
351
  @property
299
352
  def snapshot_start(self) -> JSONLikeDirSnapShot | None:
300
353
  """
@@ -610,6 +663,7 @@ class ElementActionRun(AppAware):
610
663
  return self._output_files
611
664
 
612
665
  @property
666
+ @TimeIt.decorator
613
667
  def env_spec(self) -> Mapping[str, Any]:
614
668
  """
615
669
  Environment details.
@@ -618,6 +672,33 @@ class ElementActionRun(AppAware):
618
672
  return {}
619
673
  return envs[self.action.get_environment_name()]
620
674
 
675
+ @property
676
+ @TimeIt.decorator
677
+ def env_spec_hashable(self) -> tuple:
678
+ return self.action.env_spec_to_hashable(self.env_spec)
679
+
680
+ def get_directory(self) -> Path | None:
681
+ """
682
+ Get the working directory, if one is required.
683
+ """
684
+ return self.workflow.get_run_directories(run_ids=[self.id_])[0]
685
+
686
+ def get_app_log_path(self) -> Path:
687
+ assert self.submission_idx is not None
688
+ return Submission.get_app_log_file_path(
689
+ self.workflow.submissions_path,
690
+ self.submission_idx,
691
+ self.id_,
692
+ )
693
+
694
+ def get_app_std_path(self) -> Path:
695
+ assert self.submission_idx is not None
696
+ std_dir = Submission.get_app_std_path(
697
+ self.workflow.submissions_path,
698
+ self.submission_idx,
699
+ )
700
+ return std_dir / f"{self.id_}.txt" # TODO: refactor
701
+
621
702
  @TimeIt.decorator
622
703
  def get_resources(self) -> Mapping[str, Any]:
623
704
  """Resolve specific resources for this EAR, considering all applicable scopes and
@@ -660,6 +741,7 @@ class ElementActionRun(AppAware):
660
741
  self,
661
742
  inputs: Sequence[str] | Mapping[str, Mapping[str, Any]] | None = None,
662
743
  label_dict: bool = True,
744
+ raise_on_unset: bool = False,
663
745
  ) -> Mapping[str, Mapping[str, Any]]:
664
746
  """Get a dict of (optionally a subset of) inputs values for this run.
665
747
 
@@ -686,12 +768,14 @@ class ElementActionRun(AppAware):
686
768
  val_i = {
687
769
  f"iteration_{run_i.element_iteration.index}": {
688
770
  "loop_idx": run_i.element_iteration.loop_idx,
689
- "value": run_i.get(f"inputs.{inp_name}"),
771
+ "value": run_i.get(
772
+ f"inputs.{inp_name}", raise_on_unset=raise_on_unset
773
+ ),
690
774
  }
691
775
  for run_i in self.get_all_previous_iteration_runs(include_self=True)
692
776
  }
693
777
  else:
694
- val_i = self.get(f"inputs.{inp_name}")
778
+ val_i = self.get(f"inputs.{inp_name}", raise_on_unset=raise_on_unset)
695
779
 
696
780
  key, label_i = self.__split_input_name(inp_name, label_dict)
697
781
  if label_i:
@@ -725,14 +809,16 @@ class ElementActionRun(AppAware):
725
809
  return key.split(".")[-1], (label if label_dict else None)
726
810
 
727
811
  def get_input_values_direct(
728
- self, label_dict: bool = True
812
+ self, label_dict: bool = True, raise_on_unset: bool = False
729
813
  ) -> Mapping[str, Mapping[str, Any]]:
730
814
  """Get a dict of input values that are to be passed directly to a Python script
731
815
  function."""
732
816
  inputs = self.action.script_data_in_grouped.get("direct", {})
733
- return self.get_input_values(inputs=inputs, label_dict=label_dict)
817
+ return self.get_input_values(
818
+ inputs=inputs, label_dict=label_dict, raise_on_unset=raise_on_unset
819
+ )
734
820
 
735
- def get_IFG_input_values(self) -> Mapping[str, Any]:
821
+ def get_IFG_input_values(self, raise_on_unset: bool = False) -> Mapping[str, Any]:
736
822
  """
737
823
  Get a dict of input values that are to be passed via an input file generator.
738
824
  """
@@ -741,12 +827,11 @@ class ElementActionRun(AppAware):
741
827
  "Cannot get input file generator inputs from this EAR because the "
742
828
  "associated action is not expanded, meaning multiple IFGs might exists."
743
829
  )
744
- input_types = {param.typ for param in self.action.input_file_generators[0].inputs}
745
- inputs: dict[str, Any] = {}
746
- for inp in self.inputs:
747
- assert isinstance(inp, self._app.ElementParameter)
748
- if (typ := inp.path[len("inputs.") :]) in input_types:
749
- inputs[typ] = inp.value
830
+ input_types = [i.typ for i in self.action.input_file_generators[0].inputs]
831
+ inputs = {
832
+ typ_i: self.get(f"inputs.{typ_i}", raise_on_unset=raise_on_unset)
833
+ for typ_i in input_types
834
+ }
750
835
 
751
836
  if self.action.script_pass_env_spec:
752
837
  inputs["env_spec"] = self.env_spec
@@ -769,7 +854,9 @@ class ElementActionRun(AppAware):
769
854
  for file_spec in self.action.output_file_parsers[0].output_files
770
855
  }
771
856
 
772
- def get_OFP_inputs(self) -> Mapping[str, str | list[str] | Mapping[str, Any]]:
857
+ def get_OFP_inputs(
858
+ self, raise_on_unset: bool = False
859
+ ) -> Mapping[str, str | list[str] | Mapping[str, Any]]:
773
860
  """
774
861
  Get a dict of input values that are to be passed to output file parsers.
775
862
  """
@@ -778,49 +865,101 @@ class ElementActionRun(AppAware):
778
865
  "Cannot get output file parser inputs from this from EAR because the "
779
866
  "associated action is not expanded, meaning multiple OFPs might exist."
780
867
  )
781
- inputs: dict[str, str | list[str] | Mapping[str, Any]] = {
782
- inp_typ: self.get(f"inputs.{inp_typ}")
783
- for inp_typ in self.action.output_file_parsers[0].inputs or ()
784
- }
868
+ inputs: dict[
869
+ str, str | list[str] | Mapping[str, Any]
870
+ ] = {} # not sure this type is correct
871
+ for inp_typ in self.action.output_file_parsers[0].inputs or []:
872
+ inputs[inp_typ] = self.get(f"inputs.{inp_typ}", raise_on_unset=raise_on_unset)
785
873
 
786
874
  if self.action.script_pass_env_spec:
787
875
  inputs["env_spec"] = self.env_spec
788
876
 
789
877
  return inputs
790
878
 
791
- def get_OFP_outputs(self) -> Mapping[str, str | list[str]]:
879
+ def get_OFP_outputs(
880
+ self, raise_on_unset: bool = False
881
+ ) -> Mapping[str, str | list[str]]:
792
882
  """
793
- Get the outputs obtained by parsing an output file.
883
+ Get the outputs that are required to execute an output file parser.
794
884
  """
795
885
  if not self.action._from_expand:
796
886
  raise RuntimeError(
797
887
  "Cannot get output file parser outputs from this from EAR because the "
798
888
  "associated action is not expanded, meaning multiple OFPs might exist."
799
889
  )
800
- return {
801
- out_typ: self.get(f"outputs.{out_typ}")
802
- for out_typ in self.action.output_file_parsers[0].outputs or ()
803
- }
890
+ outputs: dict[str, str | list[str]] = {} # not sure this type is correct
891
+ for out_typ in self.action.output_file_parsers[0].outputs or []:
892
+ outputs[out_typ] = self.get(
893
+ f"outputs.{out_typ}", raise_on_unset=raise_on_unset
894
+ )
895
+ return outputs
804
896
 
805
- def write_source(self, js_idx: int, js_act_idx: int) -> None:
897
+ def get_py_script_func_kwargs(
898
+ self,
899
+ raise_on_unset: bool = False,
900
+ add_script_files: bool = False,
901
+ blk_act_key: BlockActionKey | None = None,
902
+ ) -> Mapping[str, Any]:
903
+ """Get function arguments to run the Python script associated with this action.
904
+
905
+ Parameters
906
+ ----------
907
+ raise_on_unset
908
+ If True, raise if unset parameter data is found when trying to retrieve input
909
+ data.
910
+ add_script_files
911
+ If True, include additional keys "_input_files" and "_output_files" that will
912
+ be dicts mapping file formats to file names for script input and output files.
913
+ If True, `js_blk_act_key` must be provided.
914
+ js_blk_act_key
915
+ A three-tuple of integers corresponding to the jobscript index, block index,
916
+ and block-action index.
917
+ """
918
+ kwargs: dict[str, Any] = {}
919
+ if self.action.is_IFG:
920
+ ifg = self.action.input_file_generators[0]
921
+ path = ifg.input_file.name.value()
922
+ assert isinstance(path, str)
923
+ kwargs["path"] = Path(path)
924
+ kwargs.update(self.get_IFG_input_values(raise_on_unset=raise_on_unset))
925
+
926
+ elif self.action.is_OFP:
927
+ kwargs.update(self.get_OFP_output_files())
928
+ kwargs.update(self.get_OFP_inputs(raise_on_unset=raise_on_unset))
929
+ kwargs.update(self.get_OFP_outputs(raise_on_unset=raise_on_unset))
930
+
931
+ if (
932
+ not any((self.action.is_IFG, self.action.is_OFP))
933
+ and self.action.script_data_in_has_direct
934
+ ):
935
+ kwargs.update(self.get_input_values_direct(raise_on_unset=raise_on_unset))
936
+
937
+ if add_script_files:
938
+ assert blk_act_key
939
+ in_out_names = self.action.get_script_input_output_file_paths(blk_act_key)
940
+ in_names, out_names = in_out_names["inputs"], in_out_names["outputs"]
941
+ if in_names:
942
+ kwargs["_input_files"] = in_names
943
+ if out_names:
944
+ kwargs["_output_files"] = out_names
945
+
946
+ return kwargs
947
+
948
+ def write_script_input_files(self, block_act_key: BlockActionKey) -> None:
806
949
  """
807
950
  Write values to files in standard formats.
808
951
  """
809
952
  for fmt, ins in self.action.script_data_in_grouped.items():
810
- in_vals = self.get_input_values(inputs=ins, label_dict=False)
953
+ in_vals = self.get_input_values(
954
+ inputs=ins, label_dict=False, raise_on_unset=False
955
+ )
811
956
  if writer := self.__source_writer_map.get(fmt):
812
- writer(self, in_vals, js_idx, js_act_idx)
813
-
814
- # write the script if it is specified as a app data script, otherwise we assume
815
- # the script already exists in the working directory:
816
- if snip_path := self.action.get_snippet_script_path(
817
- self.action.script, self.env_spec
818
- ):
819
- with Path(snip_path.name).open("wt", newline="\n") as fp:
820
- fp.write(self.action.compose_source(snip_path))
957
+ writer(self, in_vals, block_act_key)
821
958
 
822
959
  def __write_json_inputs(
823
- self, in_vals: Mapping[str, ParameterValue], js_idx: int, js_act_idx: int
960
+ self,
961
+ in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
962
+ block_act_key: BlockActionKey,
824
963
  ):
825
964
  in_vals_processed: dict[str, Any] = {}
826
965
  for k, v in in_vals.items():
@@ -831,21 +970,29 @@ class ElementActionRun(AppAware):
831
970
  except (AttributeError, NotImplementedError):
832
971
  in_vals_processed[k] = v
833
972
 
834
- with self.action.get_param_dump_file_path_JSON(js_idx, js_act_idx).open(
835
- "wt"
836
- ) as fp:
973
+ with self.action.get_param_dump_file_path_JSON(block_act_key).open("wt") as fp:
837
974
  json.dump(in_vals_processed, fp)
838
975
 
839
976
  def __write_hdf5_inputs(
840
- self, in_vals: Mapping[str, ParameterValue], js_idx: int, js_act_idx: int
977
+ self,
978
+ in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
979
+ block_act_key: BlockActionKey,
841
980
  ):
842
981
  import h5py # type: ignore
843
982
 
844
983
  with h5py.File(
845
- self.action.get_param_dump_file_path_HDF5(js_idx, js_act_idx), mode="w"
984
+ self.action.get_param_dump_file_path_HDF5(block_act_key), mode="w"
846
985
  ) as h5file:
847
986
  for k, v in in_vals.items():
848
- v.dump_to_HDF5_group(h5file.create_group(k))
987
+ grp_k = h5file.create_group(k)
988
+ try:
989
+ assert isinstance(v, ParameterValue)
990
+ v.dump_to_HDF5_group(grp_k)
991
+ except AttributeError:
992
+ # probably an element group (i.e. v is a list of `ParameterValue`
993
+ # objects):
994
+ assert isinstance(v, list)
995
+ v[0].dump_element_group_to_HDF5_group(v, grp_k)
849
996
 
850
997
  __source_writer_map: ClassVar[dict[str, Callable[..., None]]] = {
851
998
  "json": __write_json_inputs,
@@ -855,47 +1002,79 @@ class ElementActionRun(AppAware):
855
1002
  def __output_index(self, param_name: str) -> int:
856
1003
  return cast("int", self.data_idx[f"outputs.{param_name}"])
857
1004
 
858
- def _param_save(self, js_idx: int, js_act_idx: int):
1005
+ def _param_save(self, block_act_key: BlockActionKey, run_dir: Path | None = None):
859
1006
  """Save script-generated parameters that are stored within the supported script
860
1007
  data output formats (HDF5, JSON, etc)."""
1008
+ in_out_names = self.action.get_script_input_output_file_paths(
1009
+ block_act_key, directory=run_dir
1010
+ )
1011
+
861
1012
  import h5py # type: ignore
862
1013
 
863
1014
  parameters = self._app.parameters
864
- for fmt in self.action.script_data_out_grouped:
1015
+ for fmt, load_path in in_out_names["outputs"].items():
865
1016
  if fmt == "json":
866
- with self.action.get_param_load_file_path_JSON(js_idx, js_act_idx).open(
867
- mode="rt"
868
- ) as f:
1017
+ with load_path.open(mode="rt") as f:
869
1018
  file_data: dict[str, Any] = json.load(f)
870
1019
  for param_name, param_dat in file_data.items():
871
1020
  param_id = self.__output_index(param_name)
872
1021
  if param_cls := parameters.get(param_name)._force_value_class():
873
- param_cls.save_from_JSON(param_dat, param_id, self.workflow)
874
- else:
875
- # try to save as a primitive:
876
- self.workflow.set_parameter_value(
877
- param_id=param_id, value=param_dat
878
- )
1022
+ try:
1023
+ param_cls.save_from_JSON(
1024
+ param_dat, param_id, self.workflow
1025
+ )
1026
+ continue
1027
+ except NotImplementedError:
1028
+ pass
1029
+ # try to save as a primitive:
1030
+ self.workflow.set_parameter_value(
1031
+ param_id=param_id, value=param_dat
1032
+ )
879
1033
 
880
1034
  elif fmt == "hdf5":
881
- with h5py.File(
882
- self.action.get_param_load_file_path_HDF5(js_idx, js_act_idx),
883
- mode="r",
884
- ) as h5file:
1035
+ with h5py.File(load_path, mode="r") as h5file:
885
1036
  for param_name, h5_grp in h5file.items():
1037
+ param_id = self.__output_index(param_name)
886
1038
  if param_cls := parameters.get(param_name)._force_value_class():
887
- param_cls.save_from_HDF5_group(
888
- h5_grp, self.__output_index(param_name), self.workflow
889
- )
890
- else:
891
- # Unlike with JSON, we've no fallback so we warn
892
- self._app.logger.warning(
893
- "parameter %s could not be saved; serializer not found",
894
- param_name,
895
- )
1039
+ try:
1040
+ param_cls.save_from_HDF5_group(
1041
+ h5_grp, param_id, self.workflow
1042
+ )
1043
+ continue
1044
+ except NotImplementedError:
1045
+ pass
1046
+ # Unlike with JSON, we've no fallback so we warn
1047
+ self._app.logger.warning(
1048
+ "parameter %s could not be saved; serializer not found",
1049
+ param_name,
1050
+ )
1051
+
1052
+ @property
1053
+ def is_snippet_script(self) -> bool:
1054
+ """Returns True if the action script string represents a script snippets that is
1055
+ to be modified before execution (e.g. to receive and provide parameter data)."""
1056
+ try:
1057
+ return self.action.is_snippet_script(self.action.script)
1058
+ except AttributeError:
1059
+ return False
1060
+
1061
+ def get_script_artifact_name(self) -> str:
1062
+ """Return the script name that is used when writing the script to the artifacts
1063
+ directory within the workflow.
1064
+
1065
+ Like `Action.get_script_name`, this is only applicable for snippet scripts.
1066
+
1067
+ """
1068
+ art_name, snip_path = self.action.get_script_artifact_name(
1069
+ env_spec=self.env_spec,
1070
+ act_idx=self.element_action.action_idx,
1071
+ include_suffix=True,
1072
+ specs_suffix_delim=".",
1073
+ )
1074
+ return art_name
896
1075
 
897
1076
  def compose_commands(
898
- self, jobscript: Jobscript, JS_action_idx: int
1077
+ self, environments: EnvironmentsList, shell: Shell
899
1078
  ) -> tuple[str, Mapping[int, Sequence[tuple[str, ...]]]]:
900
1079
  """
901
1080
  Write the EAR's enactment to disk in preparation for submission.
@@ -913,21 +1092,13 @@ class ElementActionRun(AppAware):
913
1092
  self._app.persistence_logger.debug("EAR.compose_commands")
914
1093
  env_spec = self.env_spec
915
1094
 
916
- for ifg in self.action.input_file_generators:
917
- # TODO: there should only be one at this stage if expanded?
918
- ifg.write_source(self.action, env_spec)
919
-
920
1095
  for ofp in self.action.output_file_parsers:
921
1096
  # TODO: there should only be one at this stage if expanded?
922
1097
  if ofp.output is None:
923
1098
  raise OutputFileParserNoOutputError()
924
- ofp.write_source(self.action, env_spec)
925
-
926
- if self.action.script:
927
- self.write_source(js_idx=jobscript.index, js_act_idx=JS_action_idx)
928
1099
 
929
1100
  command_lns: list[str] = []
930
- if (env := jobscript.submission.environments.get(**env_spec)).setup:
1101
+ if (env := environments.get(**env_spec)).setup:
931
1102
  command_lns.extend(env.setup)
932
1103
 
933
1104
  shell_vars: dict[int, list[tuple[str, ...]]] = {}
@@ -935,12 +1106,135 @@ class ElementActionRun(AppAware):
935
1106
  if cmd_idx in self.commands_idx:
936
1107
  # only execute commands that have no rules, or all valid rules:
937
1108
  cmd_str, shell_vars[cmd_idx] = command.get_command_line(
938
- EAR=self, shell=jobscript.shell, env=env
1109
+ EAR=self, shell=shell, env=env
939
1110
  )
940
1111
  command_lns.append(cmd_str)
941
1112
 
942
1113
  return ("\n".join(command_lns) + "\n"), shell_vars
943
1114
 
1115
+ def get_commands_file_hash(self) -> int:
1116
+ """Get a hash that can be used to group together runs that will have the same
1117
+ commands file.
1118
+
1119
+ This hash is not stable across sessions or machines.
1120
+
1121
+ """
1122
+ return self.action.get_commands_file_hash(
1123
+ data_idx=self.get_data_idx(),
1124
+ action_idx=self.element_action.action_idx,
1125
+ )
1126
+
1127
+ @overload
1128
+ def try_write_commands(
1129
+ self,
1130
+ jobscript: Jobscript,
1131
+ environments: EnvironmentsList,
1132
+ raise_on_unset: Literal[True],
1133
+ ) -> Path:
1134
+ ...
1135
+
1136
+ @overload
1137
+ def try_write_commands(
1138
+ self,
1139
+ jobscript: Jobscript,
1140
+ environments: EnvironmentsList,
1141
+ raise_on_unset: Literal[False] = False,
1142
+ ) -> Path | None:
1143
+ ...
1144
+
1145
+ def try_write_commands(
1146
+ self,
1147
+ jobscript: Jobscript,
1148
+ environments: EnvironmentsList,
1149
+ raise_on_unset: bool = False,
1150
+ ) -> Path | None:
1151
+ """Attempt to write the commands file for this run."""
1152
+ app_name = self._app.package_name
1153
+ try:
1154
+ commands, shell_vars = self.compose_commands(
1155
+ environments=environments,
1156
+ shell=jobscript.shell,
1157
+ )
1158
+ except UnsetParameterDataError:
1159
+ if raise_on_unset:
1160
+ raise
1161
+ self._app.submission_logger.debug(
1162
+ f"cannot yet write commands file for run ID {self.id_}; unset parameters"
1163
+ )
1164
+ return None
1165
+
1166
+ for cmd_idx, var_dat in shell_vars.items():
1167
+ for param_name, shell_var_name, st_typ in var_dat:
1168
+ commands += jobscript.shell.format_save_parameter(
1169
+ workflow_app_alias=jobscript.workflow_app_alias,
1170
+ param_name=param_name,
1171
+ shell_var_name=shell_var_name,
1172
+ cmd_idx=cmd_idx,
1173
+ stderr=(st_typ == "stderr"),
1174
+ app_name=app_name,
1175
+ )
1176
+
1177
+ commands_fmt = jobscript.shell.format_commands_file(app_name, commands)
1178
+
1179
+ if jobscript.resources.combine_scripts:
1180
+ stem = f"js_{jobscript.index}" # TODO: refactor
1181
+ else:
1182
+ stem = str(self.id_)
1183
+
1184
+ cmd_file_name = f"{stem}{jobscript.shell.JS_EXT}"
1185
+ cmd_file_path: Path = jobscript.submission.commands_path / cmd_file_name
1186
+ with cmd_file_path.open("wt", newline="\n") as fp:
1187
+ fp.write(commands_fmt)
1188
+
1189
+ return cmd_file_path
1190
+
1191
+ @contextlib.contextmanager
1192
+ def raise_on_failure_threshold(self) -> Iterator[dict[str, UnsetParamTracker]]:
1193
+ """Context manager to track parameter types and associated run IDs for which those
1194
+ parameters were found to be unset when accessed via
1195
+ `WorkflowTask._get_merged_parameter_data`.
1196
+
1197
+ """
1198
+ self.workflow._is_tracking_unset = True
1199
+ self.workflow._tracked_unset = defaultdict(
1200
+ lambda: UnsetParamTracker(run_ids=set(), group_size=-1)
1201
+ )
1202
+ try:
1203
+ yield dict(self.workflow._tracked_unset)
1204
+ except:
1205
+ raise
1206
+ else:
1207
+ try:
1208
+ for schema_inp in self.task.template.schema.inputs:
1209
+ inp_path = f"inputs.{schema_inp.typ}"
1210
+ if inp_path in self.workflow._tracked_unset:
1211
+ unset_tracker = self.workflow._tracked_unset[inp_path]
1212
+ unset_num = len(unset_tracker.run_ids)
1213
+ unset_fraction = unset_num / unset_tracker.group_size
1214
+ if isinstance(schema_inp.allow_failed_dependencies, float):
1215
+ # `True` is converted to 1.0 on SchemaInput init
1216
+ if unset_fraction > schema_inp.allow_failed_dependencies:
1217
+ raise UnsetParameterFractionLimitExceededError(
1218
+ schema_inp,
1219
+ self.task,
1220
+ unset_fraction,
1221
+ log=self._app.submission_logger,
1222
+ )
1223
+ elif isinstance(schema_inp.allow_failed_dependencies, int):
1224
+ if unset_num > schema_inp.allow_failed_dependencies:
1225
+ raise UnsetParameterNumberLimitExceededError(
1226
+ schema_inp,
1227
+ self.task,
1228
+ unset_num,
1229
+ log=self._app.submission_logger,
1230
+ )
1231
+ finally:
1232
+ self.workflow._is_tracking_unset = False
1233
+ self.workflow._tracked_unset = None
1234
+ finally:
1235
+ self.workflow._is_tracking_unset = False
1236
+ self.workflow._tracked_unset = None
1237
+
944
1238
 
945
1239
  class ElementAction(AppAware):
946
1240
  """
@@ -1403,6 +1697,16 @@ class ActionRule(JSONLike):
1403
1697
  return False
1404
1698
  return self.rule == other.rule
1405
1699
 
1700
+ @property
1701
+ def __parent_action(self) -> Action:
1702
+ if self.action:
1703
+ return self.action
1704
+ else:
1705
+ assert self.command
1706
+ act = self.command.action
1707
+ assert act
1708
+ return act
1709
+
1406
1710
  @TimeIt.decorator
1407
1711
  def test(self, element_iteration: ElementIteration) -> bool:
1408
1712
  """
@@ -1413,7 +1717,11 @@ class ActionRule(JSONLike):
1413
1717
  element_iteration:
1414
1718
  The iteration to apply this rule to.
1415
1719
  """
1416
- return self.rule.test(element_like=element_iteration, action=self.action)
1720
+
1721
+ return self.rule.test(
1722
+ element_like=element_iteration,
1723
+ action=self.__parent_action,
1724
+ )
1417
1725
 
1418
1726
  @classmethod
1419
1727
  def check_exists(cls, check_exists: str) -> ActionRule:
@@ -1493,6 +1801,7 @@ class Action(JSONLike):
1493
1801
  name="commands",
1494
1802
  class_name="Command",
1495
1803
  is_multiple=True,
1804
+ parent_ref="action",
1496
1805
  ),
1497
1806
  ChildObjectSpec(
1498
1807
  name="input_file_generators",
@@ -1564,6 +1873,7 @@ class Action(JSONLike):
1564
1873
  rules: list[ActionRule] | None = None,
1565
1874
  save_files: list[FileSpec] | None = None,
1566
1875
  clean_up: list[str] | None = None,
1876
+ requires_dir: bool | None = None,
1567
1877
  ):
1568
1878
  #: The commands to be run by this action.
1569
1879
  self.commands = commands or []
@@ -1581,7 +1891,7 @@ class Action(JSONLike):
1581
1891
  #: options are always passed, and this parameter is overwritten to be True,
1582
1892
  #: regardless of its initial value.
1583
1893
  self.script_data_files_use_opt = (
1584
- script_data_files_use_opt if not self.script_is_python else True
1894
+ script_data_files_use_opt if not self.script_is_python_snippet else True
1585
1895
  )
1586
1896
  #: The executable to use to run the script.
1587
1897
  self.script_exe = script_exe.lower() if script_exe else None
@@ -1608,6 +1918,12 @@ class Action(JSONLike):
1608
1918
  #: The names of files to be deleted after each step.
1609
1919
  self.clean_up = clean_up or []
1610
1920
 
1921
+ if requires_dir is None:
1922
+ requires_dir = (
1923
+ True if self.input_file_generators or self.output_file_parsers else False
1924
+ )
1925
+ self.requires_dir = requires_dir
1926
+
1611
1927
  self._task_schema: TaskSchema | None = None # assigned by parent TaskSchema
1612
1928
  self._from_expand = False # assigned on creation of new Action by `expand`
1613
1929
 
@@ -1725,12 +2041,14 @@ class Action(JSONLike):
1725
2041
  def script_data_in_has_files(self) -> bool:
1726
2042
  """Return True if the script requires some inputs to be passed via an
1727
2043
  intermediate file format."""
2044
+ # TODO: should set `requires_dir` to True if this is True?
1728
2045
  return bool(set(self.script_data_in_grouped) - {"direct"}) # TODO: test
1729
2046
 
1730
2047
  @property
1731
2048
  def script_data_out_has_files(self) -> bool:
1732
2049
  """Return True if the script produces some outputs via an intermediate file
1733
2050
  format."""
2051
+ # TODO: should set `requires_dir` to True if this is True?
1734
2052
  return bool(set(self.script_data_out_grouped) - {"direct"}) # TODO: test
1735
2053
 
1736
2054
  @property
@@ -1746,8 +2064,8 @@ class Action(JSONLike):
1746
2064
  return "direct" in self.script_data_out_grouped # TODO: test
1747
2065
 
1748
2066
  @property
1749
- def script_is_python(self) -> bool:
1750
- """Return True if the script is a Python script (determined by the file
2067
+ def script_is_python_snippet(self) -> bool:
2068
+ """Return True if the script is a Python snippet script (determined by the file
1751
2069
  extension)"""
1752
2070
  if self.script and (snip_path := self.get_snippet_script_path(self.script)):
1753
2071
  return snip_path.suffix == ".py"
@@ -1760,6 +2078,14 @@ class Action(JSONLike):
1760
2078
  d["script_data_out"] = d.pop("_script_data_out")
1761
2079
  return d
1762
2080
 
2081
+ @property
2082
+ def is_IFG(self):
2083
+ return bool(self.input_file_generators)
2084
+
2085
+ @property
2086
+ def is_OFP(self):
2087
+ return bool(self.output_file_parsers)
2088
+
1763
2089
  def __deepcopy__(self, memo: dict[int, Any]) -> Self:
1764
2090
  kwargs = self.to_dict()
1765
2091
  _from_expand = kwargs.pop("_from_expand")
@@ -1833,6 +2159,40 @@ class Action(JSONLike):
1833
2159
  and self.rules == other.rules
1834
2160
  )
1835
2161
 
2162
+ @staticmethod
2163
+ def env_spec_to_hashable(
2164
+ env_spec: Mapping[str, Any],
2165
+ ) -> tuple[tuple[str, ...], tuple[Any, ...]]:
2166
+ keys, values = zip(*env_spec.items()) if env_spec else ((), ())
2167
+ return tuple(keys), tuple(values)
2168
+
2169
+ @staticmethod
2170
+ def env_spec_from_hashable(
2171
+ env_spec_h: tuple[tuple[str, ...], tuple[Any, ...]],
2172
+ ) -> dict[str, Any]:
2173
+ return dict(zip(*env_spec_h))
2174
+
2175
+ def get_script_determinants(self) -> tuple:
2176
+ """Get the attributes that affect the script."""
2177
+ return (
2178
+ self.script,
2179
+ self.script_data_in,
2180
+ self.script_data_out,
2181
+ self.script_data_files_use_opt,
2182
+ self.script_exe,
2183
+ )
2184
+
2185
+ def get_script_determinant_hash(self, env_specs: dict | None = None) -> int:
2186
+ """Get a hash of the instance attributes that uniquely determine the script.
2187
+
2188
+ The hash is not stable across sessions or machines.
2189
+
2190
+ """
2191
+ env_specs = env_specs or {}
2192
+ return get_hash(
2193
+ (self.get_script_determinants(), self.env_spec_to_hashable(env_specs))
2194
+ )
2195
+
1836
2196
  @classmethod
1837
2197
  def _json_like_constructor(cls, json_like) -> Self:
1838
2198
  """Invoked by `JSONLike.from_json_like` instead of `__init__`."""
@@ -1958,23 +2318,128 @@ class Action(JSONLike):
1958
2318
 
1959
2319
  @classmethod
1960
2320
  def get_script_name(cls, script: str) -> str:
1961
- """Return the script name."""
2321
+ """Return the script name.
2322
+
2323
+ If `script` is a snippet script path, this method returns the name of the script
2324
+ (i.e. the final component of the path). If `script` is not a snippet script path
2325
+ (does not start with "<<script:"), then `script` is simply returned.
2326
+
2327
+ """
1962
2328
  if cls.is_snippet_script(script):
1963
2329
  if not (match_obj := cls.__SCRIPT_NAME_RE.match(script)):
1964
2330
  raise ValueError("incomplete <<script:>>")
1965
2331
  return match_obj[1]
1966
- # a script we can expect in the working directory:
2332
+ # a script we can expect in the working directory, which might have been generated
2333
+ # by a previous action:
1967
2334
  return script
1968
2335
 
2336
+ @overload
2337
+ def get_script_artifact_name(
2338
+ self,
2339
+ env_spec: Mapping[str, Any],
2340
+ act_idx: int,
2341
+ ret_specifiers: Literal[False] = False,
2342
+ include_suffix: bool = True,
2343
+ specs_suffix_delim: str = ".",
2344
+ ) -> tuple[str, Path]:
2345
+ ...
2346
+
2347
+ @overload
2348
+ def get_script_artifact_name(
2349
+ self,
2350
+ env_spec: Mapping[str, Any],
2351
+ act_idx: int,
2352
+ ret_specifiers: Literal[True],
2353
+ include_suffix: bool = True,
2354
+ specs_suffix_delim: str = ".",
2355
+ ) -> tuple[str, Path, dict]:
2356
+ ...
2357
+
2358
+ def get_script_artifact_name(
2359
+ self,
2360
+ env_spec: Mapping[str, Any],
2361
+ act_idx: int,
2362
+ ret_specifiers: bool = False,
2363
+ include_suffix: bool = True,
2364
+ specs_suffix_delim: str = ".",
2365
+ ) -> tuple[str, Path] | tuple[str, Path, dict]:
2366
+ """Return the script name that is used when writing the script to the artifacts
2367
+ directory within the workflow.
2368
+
2369
+ Like `Action.get_script_name`, this is only applicable for snippet scripts.
2370
+
2371
+ """
2372
+ snip_path_specs = self.get_snippet_script_path(
2373
+ self.script,
2374
+ env_spec,
2375
+ ret_specifiers=True,
2376
+ )
2377
+ assert snip_path_specs
2378
+ snip_path, specifiers = snip_path_specs
2379
+ specs_suffix = "__".join(f"{k}_{v}" for k, v in specifiers.items())
2380
+ if specs_suffix:
2381
+ specs_suffix = f"{specs_suffix_delim}{specs_suffix}"
2382
+
2383
+ name = f"{self.task_schema.name}_act_{act_idx}{specs_suffix}"
2384
+ if include_suffix:
2385
+ name += snip_path.suffix
2386
+
2387
+ if ret_specifiers:
2388
+ return name, snip_path, specifiers
2389
+ else:
2390
+ return name, snip_path
2391
+
1969
2392
  __SCRIPT_RE: ClassVar[Pattern] = re.compile(r"\<\<script:(.*:?)\>\>")
1970
2393
  __ENV_RE: ClassVar[Pattern] = re.compile(r"\<\<env:(.*?)\>\>")
1971
2394
 
2395
+ @overload
1972
2396
  @classmethod
1973
2397
  def get_snippet_script_str(
1974
- cls, script: str, env_spec: Mapping[str, Any] | None = None
2398
+ cls,
2399
+ script: str,
2400
+ env_spec: Mapping[str, Any] | None = None,
2401
+ ret_specifiers: Literal[False] = False,
1975
2402
  ) -> str:
1976
- """
1977
- Get the substituted script snippet path as a string.
2403
+ ...
2404
+
2405
+ @overload
2406
+ @classmethod
2407
+ def get_snippet_script_str(
2408
+ cls,
2409
+ script: str,
2410
+ env_spec: Mapping[str, Any] | None = None,
2411
+ *,
2412
+ ret_specifiers: Literal[True],
2413
+ ) -> tuple[str, dict[str, Any]]:
2414
+ ...
2415
+
2416
+ @overload
2417
+ @classmethod
2418
+ def get_snippet_script_str(
2419
+ cls,
2420
+ script: str,
2421
+ env_spec: Mapping[str, Any] | None = None,
2422
+ *,
2423
+ ret_specifiers: bool,
2424
+ ) -> str | tuple[str, dict[str, Any]]:
2425
+ ...
2426
+
2427
+ @classmethod
2428
+ def get_snippet_script_str(
2429
+ cls,
2430
+ script: str,
2431
+ env_spec: Mapping[str, Any] | None = None,
2432
+ ret_specifiers: bool = False,
2433
+ ) -> str | tuple[str, dict[str, Any]]:
2434
+ """Return the specified snippet `script` with variable substitutions completed.
2435
+
2436
+ Parameters
2437
+ ----------
2438
+ ret_specifiers
2439
+ If True, also return a list of environment specifiers as a dict whose keys are
2440
+ specifier keys found in the `script` path and whose values are the
2441
+ corresponding values extracted from `env_spec`.
2442
+
1978
2443
  """
1979
2444
  if not cls.is_snippet_script(script):
1980
2445
  raise ValueError(
@@ -1985,66 +2450,130 @@ class Action(JSONLike):
1985
2450
  raise ValueError("incomplete <<script:>>")
1986
2451
  out: str = match_obj[1]
1987
2452
 
1988
- if env_spec:
2453
+ if env_spec is not None:
2454
+ specifiers: dict[str, Any] = {}
2455
+
2456
+ def repl(match_obj):
2457
+ spec = match_obj[1]
2458
+ specifiers[spec] = env_spec[spec]
2459
+ return str(env_spec[spec])
2460
+
1989
2461
  out = cls.__ENV_RE.sub(
1990
- repl=lambda match_obj: env_spec[match_obj[1]],
2462
+ repl=repl,
1991
2463
  string=out,
1992
2464
  )
2465
+ if ret_specifiers:
2466
+ return (out, specifiers)
1993
2467
  return out
1994
2468
 
1995
2469
  @classmethod
2470
+ @overload
1996
2471
  def get_snippet_script_path(
1997
- cls, script_path: str | None, env_spec: Mapping[str, Any] | None = None
2472
+ cls,
2473
+ script_path: str | None,
2474
+ env_spec: Mapping[str, Any] | None = None,
2475
+ *,
2476
+ ret_specifiers: Literal[True],
2477
+ ) -> tuple[Path, dict[str, Any]] | None:
2478
+ ...
2479
+
2480
+ @classmethod
2481
+ @overload
2482
+ def get_snippet_script_path(
2483
+ cls,
2484
+ script_path: str | None,
2485
+ env_spec: Mapping[str, Any] | None = None,
2486
+ *,
2487
+ ret_specifiers: Literal[False] = False,
1998
2488
  ) -> Path | None:
1999
- """
2000
- Get the substituted script snippet path, or False if there is no snippet.
2489
+ ...
2490
+
2491
+ @classmethod
2492
+ def get_snippet_script_path(
2493
+ cls,
2494
+ script_path: str | None,
2495
+ env_spec: Mapping[str, Any] | None = None,
2496
+ *,
2497
+ ret_specifiers: bool = False,
2498
+ ) -> Path | tuple[Path, dict[str, Any]] | None:
2499
+ """Return the specified snippet `script` path, or None if there is no snippet.
2500
+
2501
+ Parameters
2502
+ ----------
2503
+ ret_specifiers
2504
+ If True, also return a list of environment specifiers as a dict whose keys are
2505
+ specifier keys found in the `script` path and whose values are the
2506
+ corresponding values extracted from `env_spec`.
2507
+
2001
2508
  """
2002
2509
  if not cls.is_snippet_script(script_path):
2003
2510
  return None
2004
2511
 
2005
2512
  assert script_path is not None
2006
- path = cls.get_snippet_script_str(script_path, env_spec)
2007
- return Path(cls._app.scripts.get(path, path))
2513
+ path_ = cls.get_snippet_script_str(
2514
+ script_path, env_spec, ret_specifiers=ret_specifiers
2515
+ )
2516
+ if ret_specifiers:
2517
+ assert isinstance(path_, tuple)
2518
+ path_str, specifiers = path_
2519
+ else:
2520
+ assert isinstance(path_, str)
2521
+ path_str = path_
2522
+
2523
+ path = Path(cls._app.scripts.get(path_str, path_str))
2524
+
2525
+ if ret_specifiers:
2526
+ return path, specifiers
2527
+ else:
2528
+ return path
2008
2529
 
2009
2530
  @staticmethod
2010
- def __get_param_dump_file_stem(js_idx: int | str, js_act_idx: int | str) -> str:
2011
- return RunDirAppFiles.get_run_param_dump_file_prefix(js_idx, js_act_idx)
2531
+ def __get_param_dump_file_stem(block_act_key: BlockActionKey) -> str:
2532
+ return RunDirAppFiles.get_run_param_dump_file_prefix(block_act_key)
2012
2533
 
2013
2534
  @staticmethod
2014
- def __get_param_load_file_stem(js_idx: int | str, js_act_idx: int | str) -> str:
2015
- return RunDirAppFiles.get_run_param_load_file_prefix(js_idx, js_act_idx)
2535
+ def __get_param_load_file_stem(block_act_key: BlockActionKey) -> str:
2536
+ return RunDirAppFiles.get_run_param_load_file_prefix(block_act_key)
2016
2537
 
2017
2538
  def get_param_dump_file_path_JSON(
2018
- self, js_idx: int | str, js_act_idx: int | str
2539
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2019
2540
  ) -> Path:
2020
2541
  """
2021
2542
  Get the path of the JSON dump file.
2022
2543
  """
2023
- return Path(self.__get_param_dump_file_stem(js_idx, js_act_idx) + ".json")
2544
+ directory = directory or Path()
2545
+ return directory.joinpath(
2546
+ self.__get_param_dump_file_stem(block_act_key) + ".json"
2547
+ )
2024
2548
 
2025
2549
  def get_param_dump_file_path_HDF5(
2026
- self, js_idx: int | str, js_act_idx: int | str
2550
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2027
2551
  ) -> Path:
2028
2552
  """
2029
- Get the path of the HDF56 dump file.
2553
+ Get the path of the HDF5 dump file.
2030
2554
  """
2031
- return Path(self.__get_param_dump_file_stem(js_idx, js_act_idx) + ".h5")
2555
+ directory = directory or Path()
2556
+ return directory.joinpath(self.__get_param_dump_file_stem(block_act_key) + ".h5")
2032
2557
 
2033
2558
  def get_param_load_file_path_JSON(
2034
- self, js_idx: int | str, js_act_idx: int | str
2559
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2035
2560
  ) -> Path:
2036
2561
  """
2037
2562
  Get the path of the JSON load file.
2038
2563
  """
2039
- return Path(self.__get_param_load_file_stem(js_idx, js_act_idx) + ".json")
2564
+ directory = directory or Path()
2565
+ return directory.joinpath(
2566
+ self.__get_param_load_file_stem(block_act_key) + ".json"
2567
+ )
2040
2568
 
2041
2569
  def get_param_load_file_path_HDF5(
2042
- self, js_idx: int | str, js_act_idx: int | str
2570
+ self, block_act_key: BlockActionKey, directory: Path | None = None
2043
2571
  ) -> Path:
2044
2572
  """
2045
2573
  Get the path of the HDF5 load file.
2046
2574
  """
2047
- return Path(self.__get_param_load_file_stem(js_idx, js_act_idx) + ".h5")
2575
+ directory = directory or Path()
2576
+ return directory.joinpath(self.__get_param_load_file_stem(block_act_key) + ".h5")
2048
2577
 
2049
2578
  def expand(self) -> Sequence[Action]:
2050
2579
  """
@@ -2070,114 +2599,77 @@ class Action(JSONLike):
2070
2599
  # note we keep the IFG/OPs in the new actions, so we can check the parameters
2071
2600
  # used/produced.
2072
2601
 
2073
- args: list[str]
2074
- inp_files = []
2602
+ inp_files: list[FileSpec] = []
2075
2603
  inp_acts: list[Action] = []
2604
+
2605
+ app_caps = self._app.package_name.upper()
2606
+
2607
+ script_cmd_vars = {
2608
+ "script_name": f"${app_caps}_RUN_SCRIPT_NAME",
2609
+ "script_name_no_ext": f"${app_caps}_RUN_SCRIPT_NAME_NO_EXT",
2610
+ "script_dir": f"${app_caps}_RUN_SCRIPT_DIR",
2611
+ "script_path": f"${app_caps}_RUN_SCRIPT_PATH",
2612
+ }
2613
+
2076
2614
  for ifg in self.input_file_generators:
2077
- exe = "<<executable:python_script>>"
2078
- args = [
2079
- '"$WK_PATH"',
2080
- "$EAR_ID",
2081
- ] # WK_PATH could have a space in it
2082
- if ifg.script:
2083
- script_name = self.get_script_name(ifg.script)
2084
- variables = {
2085
- "script_name": script_name,
2086
- "script_name_no_ext": str(Path(script_name).stem),
2087
- }
2088
- else:
2089
- variables = {}
2615
+ script_exe = "python_script"
2616
+ exe = f"<<executable:{script_exe}>>"
2617
+ variables = script_cmd_vars if ifg.script else {}
2090
2618
  act_i = self._app.Action(
2091
- commands=[
2092
- self._app.Command(executable=exe, arguments=args, variables=variables)
2093
- ],
2619
+ commands=[self._app.Command(executable=exe, variables=variables)],
2094
2620
  input_file_generators=[ifg],
2095
2621
  environments=[self.get_input_file_generator_action_env(ifg)],
2096
2622
  rules=main_rules + ifg.get_action_rules(),
2623
+ script=ifg.script,
2624
+ script_data_in="direct",
2625
+ script_data_out="direct",
2626
+ script_exe=script_exe,
2097
2627
  script_pass_env_spec=ifg.script_pass_env_spec,
2098
2628
  abortable=ifg.abortable,
2099
- # TODO: add script_data_in etc? and to OFP?
2629
+ requires_dir=ifg.requires_dir,
2100
2630
  )
2101
2631
  act_i._task_schema = self.task_schema
2102
2632
  if ifg.input_file not in inp_files:
2103
2633
  inp_files.append(ifg.input_file)
2634
+ act_i.process_script_data_formats()
2104
2635
  act_i._from_expand = True
2105
2636
  inp_acts.append(act_i)
2106
2637
 
2107
2638
  out_files: list[FileSpec] = []
2108
2639
  out_acts: list[Action] = []
2109
2640
  for ofp in self.output_file_parsers:
2110
- exe = "<<executable:python_script>>"
2111
- args = [
2112
- '"$WK_PATH"',
2113
- "$EAR_ID",
2114
- ] # WK_PATH could have a space in it
2115
- if ofp.script:
2116
- script_name = self.get_script_name(ofp.script)
2117
- variables = {
2118
- "script_name": script_name,
2119
- "script_name_no_ext": str(Path(script_name).stem),
2120
- }
2121
- else:
2122
- variables = {}
2641
+ script_exe = "python_script"
2642
+ exe = f"<<executable:{script_exe}>>"
2643
+ variables = script_cmd_vars if ofp.script else {}
2123
2644
  act_i = self._app.Action(
2124
- commands=[
2125
- self._app.Command(executable=exe, arguments=args, variables=variables)
2126
- ],
2645
+ commands=[self._app.Command(executable=exe, variables=variables)],
2127
2646
  output_file_parsers=[ofp],
2128
2647
  environments=[self.get_output_file_parser_action_env(ofp)],
2129
- rules=[*self.rules, *ofp.get_action_rules()],
2648
+ rules=list(self.rules) + ofp.get_action_rules(),
2649
+ script=ofp.script,
2650
+ script_data_in="direct",
2651
+ script_data_out="direct",
2652
+ script_exe=script_exe,
2130
2653
  script_pass_env_spec=ofp.script_pass_env_spec,
2131
2654
  abortable=ofp.abortable,
2655
+ requires_dir=ofp.requires_dir,
2132
2656
  )
2133
2657
  act_i._task_schema = self.task_schema
2134
- for out_f in ofp.output_files:
2135
- if out_f not in out_files:
2136
- out_files.append(out_f)
2658
+ for j in ofp.output_files:
2659
+ if j not in out_files:
2660
+ out_files.append(j)
2661
+ act_i.process_script_data_formats()
2137
2662
  act_i._from_expand = True
2138
2663
  out_acts.append(act_i)
2139
2664
 
2140
2665
  commands = self.commands
2141
2666
  if self.script:
2142
2667
  exe = f"<<executable:{self.script_exe}>>"
2143
- args = []
2144
- if self.script:
2145
- script_name = self.get_script_name(self.script)
2146
- variables = {
2147
- "script_name": script_name,
2148
- "script_name_no_ext": str(Path(script_name).stem),
2149
- }
2150
- else:
2151
- variables = {}
2152
- if self.script_data_in_has_direct or self.script_data_out_has_direct:
2153
- # WK_PATH could have a space in it:
2154
- args.extend(("--wk-path", '"$WK_PATH"', "--run-id", "$EAR_ID"))
2155
-
2156
- fn_args = {"js_idx": "${JS_IDX}", "js_act_idx": "${JS_act_idx}"}
2157
-
2158
- for fmt in self.script_data_in_grouped:
2159
- if fmt == "json":
2160
- if self.script_data_files_use_opt:
2161
- args.append("--inputs-json")
2162
- args.append(str(self.get_param_dump_file_path_JSON(**fn_args)))
2163
- elif fmt == "hdf5":
2164
- if self.script_data_files_use_opt:
2165
- args.append("--inputs-hdf5")
2166
- args.append(str(self.get_param_dump_file_path_HDF5(**fn_args)))
2167
-
2168
- for fmt in self.script_data_out_grouped:
2169
- if fmt == "json":
2170
- if self.script_data_files_use_opt:
2171
- args.append("--outputs-json")
2172
- args.append(str(self.get_param_load_file_path_JSON(**fn_args)))
2173
- elif fmt == "hdf5":
2174
- if self.script_data_files_use_opt:
2175
- args.append("--outputs-hdf5")
2176
- args.append(str(self.get_param_load_file_path_HDF5(**fn_args)))
2177
-
2178
- commands.append(
2668
+ variables = script_cmd_vars if self.script else {}
2669
+ args = self.get_script_input_output_file_command_args()
2670
+ commands += [
2179
2671
  self._app.Command(executable=exe, arguments=args, variables=variables)
2180
- )
2672
+ ]
2181
2673
 
2182
2674
  # TODO: store script_args? and build command with executable syntax?
2183
2675
  main_act = self._app.Action(
@@ -2194,6 +2686,7 @@ class Action(JSONLike):
2194
2686
  output_files=out_files,
2195
2687
  save_files=self.save_files,
2196
2688
  clean_up=self.clean_up,
2689
+ requires_dir=self.requires_dir,
2197
2690
  )
2198
2691
  main_act._task_schema = self.task_schema
2199
2692
  main_act._from_expand = True
@@ -2233,7 +2726,7 @@ class Action(JSONLike):
2233
2726
 
2234
2727
  __FILES_RE: ClassVar[Pattern] = re.compile(r"\<\<file:(.*?)\>\>")
2235
2728
 
2236
- def get_command_input_file_labels(self) -> tuple[str, ...]:
2729
+ def get_command_file_labels(self) -> tuple[str, ...]:
2237
2730
  """Get input files types from commands."""
2238
2731
  files: set[str] = set()
2239
2732
  for command in self.commands:
@@ -2254,6 +2747,23 @@ class Action(JSONLike):
2254
2747
  params.add(out_params["stderr"])
2255
2748
  return tuple(params)
2256
2749
 
2750
+ def get_command_parameter_types(
2751
+ self, sub_parameters: bool = False
2752
+ ) -> tuple[str, ...]:
2753
+ """Get all parameter types that appear in the commands of this action.
2754
+
2755
+ Parameters
2756
+ ----------
2757
+ sub_parameters
2758
+ If True, sub-parameter inputs (i.e. dot-delimited input types) will be
2759
+ returned untouched. If False (default), only return the root parameter type
2760
+ and disregard the sub-parameter part.
2761
+ """
2762
+ # TODO: not sure if we need `input_files`
2763
+ return tuple(
2764
+ f"inputs.{i}" for i in self.get_command_input_types(sub_parameters)
2765
+ ) + tuple(f"input_files.{i}" for i in self.get_command_file_labels())
2766
+
2257
2767
  def get_input_types(self, sub_parameters: bool = False) -> tuple[str, ...]:
2258
2768
  """Get the input types that are consumed by commands and input file generators of
2259
2769
  this action.
@@ -2270,6 +2780,8 @@ class Action(JSONLike):
2270
2780
  and not self.input_file_generators
2271
2781
  and not self.output_file_parsers
2272
2782
  ):
2783
+ # TODO: refine this according to `script_data_in`, since this can be used
2784
+ # to control the inputs/outputs of a script.
2273
2785
  params = set(self.task_schema.input_types)
2274
2786
  else:
2275
2787
  params = set(self.get_command_input_types(sub_parameters))
@@ -2288,6 +2800,8 @@ class Action(JSONLike):
2288
2800
  and not self.output_file_parsers
2289
2801
  ):
2290
2802
  params = set(self.task_schema.output_types)
2803
+ # TODO: refine this according to `script_data_out`, since this can be used
2804
+ # to control the inputs/outputs of a script.
2291
2805
  else:
2292
2806
  params = set(self.get_command_output_types())
2293
2807
  for ofp in self.output_file_parsers:
@@ -2523,109 +3037,112 @@ class Action(JSONLike):
2523
3037
  with snip_path.open("rt") as fp:
2524
3038
  script_str = fp.read()
2525
3039
 
2526
- if not self.script_is_python:
3040
+ if not self.script_is_python_snippet:
2527
3041
  return script_str
2528
3042
 
2529
- py_imports = """
2530
- import argparse, sys
3043
+ if self.is_OFP and self.output_file_parsers[0].output is None:
3044
+ # might be used just for saving files:
3045
+ return ""
3046
+
3047
+ app_caps = self._app.package_name.upper()
3048
+ py_imports = dedent(
3049
+ """\
3050
+ import argparse
3051
+ import os
2531
3052
  from pathlib import Path
2532
3053
 
2533
- parser = argparse.ArgumentParser()
2534
- parser.add_argument("--wk-path")
2535
- parser.add_argument("--run-id", type=int)
2536
- parser.add_argument("--inputs-json")
2537
- parser.add_argument("--inputs-hdf5")
2538
- parser.add_argument("--outputs-json")
2539
- parser.add_argument("--outputs-hdf5")
2540
- args = parser.parse_args()
2541
- """
3054
+ import {app_module} as app
3055
+
3056
+ std_path = os.getenv("{app_caps}_RUN_STD_PATH")
3057
+ log_path = os.getenv("{app_caps}_RUN_LOG_PATH")
3058
+ run_id = int(os.getenv("{app_caps}_RUN_ID"))
3059
+ wk_path = os.getenv("{app_caps}_WK_PATH")
2542
3060
 
2543
- # if any direct inputs/outputs, we must load the workflow (must be python):
2544
- if self.script_data_in_has_direct or self.script_data_out_has_direct:
2545
- py_main_block_workflow_load = """
2546
- import {app_module} as app
3061
+ with app.redirect_std_to_file(std_path):
3062
+
3063
+ """
3064
+ ).format(app_module=self._app.module, app_caps=app_caps)
3065
+
3066
+ # we must load the workflow (must be python):
3067
+ # (note: we previously only loaded the workflow if there were any direct inputs
3068
+ # or outputs; now we always load so we can use the method
3069
+ # `get_py_script_func_kwargs`)
3070
+ py_main_block_workflow_load = dedent(
3071
+ """\
2547
3072
  app.load_config(
2548
- log_file_path=Path("{run_log_file}").resolve(),
3073
+ log_file_path=Path(log_path),
2549
3074
  config_dir=r"{cfg_dir}",
2550
3075
  config_key=r"{cfg_invoc_key}",
2551
3076
  )
2552
- wk_path, EAR_ID = args.wk_path, args.run_id
2553
3077
  wk = app.Workflow(wk_path)
2554
- EAR = wk.get_EARs_from_IDs([EAR_ID])[0]
2555
- """.format(
2556
- run_log_file=self._app.RunDirAppFiles.get_log_file_name(),
2557
- app_module=self._app.module,
2558
- cfg_dir=self._app.config.config_directory,
2559
- cfg_invoc_key=self._app.config.config_key,
2560
- )
2561
- else:
2562
- py_main_block_workflow_load = ""
2563
-
2564
- func_kwargs_lst = []
2565
- if "direct" in self.script_data_in_grouped:
2566
- direct_ins_str = "direct_ins = EAR.get_input_values_direct()"
2567
- func_kwargs_lst.append("**direct_ins")
2568
- else:
2569
- direct_ins_str = ""
2570
-
2571
- if self.script_data_in_has_files:
2572
- # need to pass "_input_files" keyword argument to script main function:
2573
- input_files_str = """
2574
- inp_files = {}
2575
- if args.inputs_json:
2576
- inp_files["json"] = Path(args.inputs_json)
2577
- if args.inputs_hdf5:
2578
- inp_files["hdf5"] = Path(args.inputs_hdf5)
2579
- """
2580
- func_kwargs_lst.append("_input_files=inp_files")
2581
- else:
2582
- input_files_str = ""
2583
-
2584
- if self.script_data_out_has_files:
2585
- # need to pass "_output_files" keyword argument to script main function:
2586
- output_files_str = """
2587
- out_files = {}
2588
- if args.outputs_json:
2589
- out_files["json"] = Path(args.outputs_json)
2590
- if args.outputs_hdf5:
2591
- out_files["hdf5"] = Path(args.outputs_hdf5)
3078
+ EAR = wk.get_EARs_from_IDs([run_id])[0]
2592
3079
  """
2593
- func_kwargs_lst.append("_output_files=out_files")
3080
+ ).format(
3081
+ cfg_dir=self._app.config.config_directory,
3082
+ cfg_invoc_key=self._app.config.config_key,
3083
+ app_caps=app_caps,
3084
+ )
2594
3085
 
2595
- else:
2596
- output_files_str = ""
3086
+ tab_indent = " "
3087
+ tab_indent_2 = 2 * tab_indent
3088
+
3089
+ func_kwargs_str = dedent(
3090
+ """\
3091
+ blk_act_key = (
3092
+ os.environ["{app_caps}_JS_IDX"],
3093
+ os.environ["{app_caps}_BLOCK_IDX"],
3094
+ os.environ["{app_caps}_BLOCK_ACT_IDX"],
3095
+ )
3096
+ with EAR.raise_on_failure_threshold() as unset_params:
3097
+ func_kwargs = EAR.get_py_script_func_kwargs(
3098
+ raise_on_unset=False,
3099
+ add_script_files=True,
3100
+ blk_act_key=blk_act_key,
3101
+ )
3102
+ """
3103
+ ).format(app_caps=app_caps)
2597
3104
 
2598
3105
  script_main_func = Path(script_name).stem
2599
- func_invoke_str = f"{script_main_func}({', '.join(func_kwargs_lst)})"
2600
- if "direct" in self.script_data_out_grouped:
3106
+ func_invoke_str = f"{script_main_func}(**func_kwargs)"
3107
+ if not self.is_OFP and "direct" in self.script_data_out_grouped:
2601
3108
  py_main_block_invoke = f"outputs = {func_invoke_str}"
2602
- py_main_block_outputs = """
2603
- outputs = {"outputs." + k: v for k, v in outputs.items()}
2604
- for name_i, out_i in outputs.items():
2605
- wk.set_parameter_value(param_id=EAR.data_idx[name_i], value=out_i)
2606
- """
3109
+ py_main_block_outputs = dedent(
3110
+ """\
3111
+ with app.redirect_std_to_file(std_path):
3112
+ for name_i, out_i in outputs.items():
3113
+ wk.set_parameter_value(param_id=EAR.data_idx[f"outputs.{name_i}"], value=out_i)
3114
+ """
3115
+ )
3116
+ elif self.is_OFP:
3117
+ py_main_block_invoke = f"output = {func_invoke_str}"
3118
+ assert self.output_file_parsers[0].output
3119
+ py_main_block_outputs = dedent(
3120
+ """\
3121
+ with app.redirect_std_to_file(std_path):
3122
+ wk.save_parameter(name="outputs.{output_typ}", value=output, EAR_ID=run_id)
3123
+ """
3124
+ ).format(output_typ=self.output_file_parsers[0].output.typ)
2607
3125
  else:
2608
3126
  py_main_block_invoke = func_invoke_str
2609
3127
  py_main_block_outputs = ""
2610
3128
 
2611
- tab_indent = " "
3129
+ wk_load = (
3130
+ "\n" + indent(py_main_block_workflow_load, tab_indent_2)
3131
+ if py_main_block_workflow_load
3132
+ else ""
3133
+ )
2612
3134
  py_main_block = dedent(
2613
3135
  """\
2614
3136
  if __name__ == "__main__":
2615
- {py_imports}
2616
- {wk_load}
2617
- {direct_ins}
2618
- {in_files}
2619
- {out_files}
3137
+ {py_imports}{wk_load}
3138
+ {func_kwargs}
2620
3139
  {invoke}
2621
3140
  {outputs}
2622
3141
  """
2623
3142
  ).format(
2624
- py_imports=indent(dedent(py_imports), tab_indent),
2625
- wk_load=indent(dedent(py_main_block_workflow_load), tab_indent),
2626
- direct_ins=indent(direct_ins_str, tab_indent),
2627
- in_files=indent(dedent(input_files_str), tab_indent),
2628
- out_files=indent(dedent(output_files_str), tab_indent),
3143
+ py_imports=indent(py_imports, tab_indent),
3144
+ wk_load=wk_load,
3145
+ func_kwargs=indent(func_kwargs_str, tab_indent_2),
2629
3146
  invoke=indent(py_main_block_invoke, tab_indent),
2630
3147
  outputs=indent(dedent(py_main_block_outputs), tab_indent),
2631
3148
  )
@@ -2672,3 +3189,138 @@ class Action(JSONLike):
2672
3189
  return list(self.get_output_file_labels())
2673
3190
  else:
2674
3191
  raise ValueError(f"unexpected prefix: {prefix}")
3192
+
3193
+ def get_commands_file_hash(self, data_idx: DataIndex, action_idx: int) -> int:
3194
+ """Get a hash that can be used to group together runs that will have the same
3195
+ commands file.
3196
+
3197
+ This hash is not stable across sessions or machines.
3198
+
3199
+ """
3200
+
3201
+ # filter data index by input parameters that appear in the commands, or are used in
3202
+ # rules in conditional commands:
3203
+ param_types = self.get_command_parameter_types()
3204
+
3205
+ relevant_paths: list[str] = []
3206
+ for i in param_types:
3207
+ relevant_paths.extend(
3208
+ list(WorkflowTask._get_relevant_paths(data_idx, i.split(".")).keys())
3209
+ )
3210
+
3211
+ # hash any relevant data index from rule path
3212
+ for cmd in self.commands:
3213
+ for act_rule in cmd.rules:
3214
+ rule_path = act_rule.rule.path
3215
+ assert rule_path
3216
+ rule_path_split = rule_path.split(".")
3217
+ if rule_path.startswith("resources."):
3218
+ # include all resource paths for now:
3219
+ relevant_paths.extend(
3220
+ list(
3221
+ WorkflowTask._get_relevant_paths(
3222
+ data_idx, ["resources"]
3223
+ ).keys()
3224
+ )
3225
+ )
3226
+ else:
3227
+ relevant_paths.extend(
3228
+ list(
3229
+ WorkflowTask._get_relevant_paths(
3230
+ data_idx, rule_path_split
3231
+ ).keys()
3232
+ )
3233
+ )
3234
+
3235
+ # note we don't need to consider action-level rules, since these determine
3236
+ # whether a run will be included in a submission or not; this method is only
3237
+ # called on runs that are part of a submission, at which point action-level rules
3238
+ # are irrelevant.
3239
+
3240
+ relevant_data_idx = {k: v for k, v in data_idx.items() if k in relevant_paths}
3241
+
3242
+ try:
3243
+ schema_name = self.task_schema.name
3244
+ except AssertionError:
3245
+ # allows for testing without making a schema
3246
+ schema_name = ""
3247
+
3248
+ return get_hash(
3249
+ (
3250
+ schema_name,
3251
+ action_idx,
3252
+ relevant_data_idx,
3253
+ )
3254
+ )
3255
+
3256
+ @classmethod
3257
+ def get_block_act_idx_shell_vars(cls) -> BlockActionKey:
3258
+ """Return a the jobscript index, block index, and block action idx shell
3259
+ environment variable names formatted for shell substitution.
3260
+
3261
+ Notes
3262
+ -----
3263
+ This seem so be shell-agnostic, at least for those currently supported.
3264
+
3265
+ """
3266
+ app_caps = cls._app.package_name.upper()
3267
+ return (
3268
+ f"${{{app_caps}_JS_IDX}}",
3269
+ f"${{{app_caps}_BLOCK_IDX}}",
3270
+ f"${{{app_caps}_BLOCK_ACT_IDX}}",
3271
+ )
3272
+
3273
+ def get_script_input_output_file_paths(
3274
+ self,
3275
+ block_act_key: BlockActionKey,
3276
+ directory: Path | None = None,
3277
+ ) -> dict[str, dict[str, Path]]:
3278
+ """Get the names (as `Path`s) of script input and output files for this action."""
3279
+ in_out_paths: dict[str, dict[str, Path]] = {
3280
+ "inputs": {},
3281
+ "outputs": {},
3282
+ }
3283
+ for fmt in self.script_data_in_grouped:
3284
+ if fmt == "json":
3285
+ path = self.get_param_dump_file_path_JSON(
3286
+ block_act_key, directory=directory
3287
+ )
3288
+ elif fmt == "hdf5":
3289
+ path = self.get_param_dump_file_path_HDF5(
3290
+ block_act_key, directory=directory
3291
+ )
3292
+ else:
3293
+ continue
3294
+ in_out_paths["inputs"][fmt] = path
3295
+
3296
+ for fmt in self.script_data_out_grouped:
3297
+ if fmt == "json":
3298
+ path = self.get_param_load_file_path_JSON(
3299
+ block_act_key, directory=directory
3300
+ )
3301
+ elif fmt == "hdf5":
3302
+ path = self.get_param_load_file_path_HDF5(
3303
+ block_act_key, directory=directory
3304
+ )
3305
+ else:
3306
+ continue
3307
+ in_out_paths["outputs"][fmt] = path
3308
+
3309
+ return in_out_paths
3310
+
3311
+ def get_script_input_output_file_command_args(self) -> list[str]:
3312
+ """Get the script input and output file names as command line arguments."""
3313
+ in_out_names = self.get_script_input_output_file_paths(
3314
+ self.get_block_act_idx_shell_vars()
3315
+ )
3316
+ args: list[str] = []
3317
+ for fmt, path in in_out_names["inputs"].items():
3318
+ if self.script_data_files_use_opt:
3319
+ args.append(f"--inputs-{fmt}")
3320
+ args.append(str(path))
3321
+ for fmt, path in in_out_names["outputs"].items():
3322
+ if self.script_data_files_use_opt:
3323
+ args.append(f"--outputs-{fmt}")
3324
+ args.append(str(path))
3325
+
3326
+ return args