hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
- hpcflow/_version.py +1 -1
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +5 -0
- hpcflow/sdk/app.py +166 -92
- hpcflow/sdk/cli.py +263 -84
- hpcflow/sdk/cli_common.py +99 -5
- hpcflow/sdk/config/callbacks.py +38 -1
- hpcflow/sdk/config/config.py +102 -13
- hpcflow/sdk/config/errors.py +19 -5
- hpcflow/sdk/config/types.py +3 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +914 -262
- hpcflow/sdk/core/cache.py +76 -34
- hpcflow/sdk/core/command_files.py +14 -128
- hpcflow/sdk/core/commands.py +35 -6
- hpcflow/sdk/core/element.py +122 -50
- hpcflow/sdk/core/errors.py +58 -2
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/loop.py +408 -50
- hpcflow/sdk/core/loop_cache.py +4 -4
- hpcflow/sdk/core/parameters.py +382 -37
- hpcflow/sdk/core/run_dir_files.py +13 -40
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +119 -30
- hpcflow/sdk/core/task_schema.py +68 -0
- hpcflow/sdk/core/test_utils.py +66 -27
- hpcflow/sdk/core/types.py +54 -1
- hpcflow/sdk/core/utils.py +136 -19
- hpcflow/sdk/core/workflow.py +1587 -356
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +7 -0
- hpcflow/sdk/helper/cli.py +1 -0
- hpcflow/sdk/log.py +42 -15
- hpcflow/sdk/persistence/base.py +405 -53
- hpcflow/sdk/persistence/json.py +177 -52
- hpcflow/sdk/persistence/pending.py +237 -69
- hpcflow/sdk/persistence/store_resource.py +3 -2
- hpcflow/sdk/persistence/types.py +15 -4
- hpcflow/sdk/persistence/zarr.py +928 -81
- hpcflow/sdk/submission/jobscript.py +1408 -489
- hpcflow/sdk/submission/schedulers/__init__.py +40 -5
- hpcflow/sdk/submission/schedulers/direct.py +33 -19
- hpcflow/sdk/submission/schedulers/sge.py +51 -16
- hpcflow/sdk/submission/schedulers/slurm.py +44 -16
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/base.py +68 -20
- hpcflow/sdk/submission/shells/bash.py +222 -129
- hpcflow/sdk/submission/shells/powershell.py +200 -150
- hpcflow/sdk/submission/submission.py +852 -119
- hpcflow/sdk/submission/types.py +18 -21
- hpcflow/sdk/typing.py +24 -5
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +19 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +821 -70
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
- hpcflow/tests/unit/test_action.py +176 -0
- hpcflow/tests/unit/test_app.py +20 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +133 -0
- hpcflow/tests/unit/test_config.py +122 -1
- hpcflow/tests/unit/test_element_iteration.py +47 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_loop.py +1332 -27
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_parameter.py +13 -0
- hpcflow/tests/unit/test_persistence.py +190 -8
- hpcflow/tests/unit/test_run.py +109 -3
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_shell.py +20 -0
- hpcflow/tests/unit/test_submission.py +5 -76
- hpcflow/tests/unit/test_workflow_template.py +31 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +332 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +142 -2
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/METADATA +7 -4
- hpcflow_new2-0.2.0a200.dist-info/RECORD +222 -0
- hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/entry_points.txt +0 -0
hpcflow/sdk/core/actions.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
Actions are base components of elements.
|
3
3
|
Element action runs (EARs) are the basic components of any enactment;
|
4
|
-
they may be grouped together within a jobscript for efficiency.
|
4
|
+
they may be grouped together within a jobscript for efficiency.
|
5
5
|
"""
|
6
6
|
|
7
7
|
from __future__ import annotations
|
@@ -9,6 +9,8 @@ from collections.abc import Mapping
|
|
9
9
|
import copy
|
10
10
|
from dataclasses import dataclass
|
11
11
|
import json
|
12
|
+
import contextlib
|
13
|
+
from collections import defaultdict
|
12
14
|
from pathlib import Path
|
13
15
|
import re
|
14
16
|
from textwrap import indent, dedent
|
@@ -20,6 +22,8 @@ from watchdog.utils.dirsnapshot import DirectorySnapshotDiff
|
|
20
22
|
from hpcflow.sdk.core import ABORT_EXIT_CODE
|
21
23
|
from hpcflow.sdk.core.app_aware import AppAware
|
22
24
|
from hpcflow.sdk.core.enums import ActionScopeType, EARStatus
|
25
|
+
from hpcflow.sdk.core.skip_reason import SkipReason
|
26
|
+
from hpcflow.sdk.core.task import WorkflowTask
|
23
27
|
from hpcflow.sdk.core.errors import (
|
24
28
|
ActionEnvironmentMissingNameError,
|
25
29
|
MissingCompatibleActionEnvironment,
|
@@ -27,6 +31,9 @@ from hpcflow.sdk.core.errors import (
|
|
27
31
|
UnknownScriptDataKey,
|
28
32
|
UnknownScriptDataParameter,
|
29
33
|
UnsupportedScriptDataFormat,
|
34
|
+
UnsetParameterDataError,
|
35
|
+
UnsetParameterFractionLimitExceededError,
|
36
|
+
UnsetParameterNumberLimitExceededError,
|
30
37
|
)
|
31
38
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
32
39
|
from hpcflow.sdk.core.parameters import ParameterValue
|
@@ -35,10 +42,13 @@ from hpcflow.sdk.core.utils import (
|
|
35
42
|
JSONLikeDirSnapShot,
|
36
43
|
split_param_label,
|
37
44
|
swap_nested_dict_keys,
|
45
|
+
get_relative_path,
|
38
46
|
)
|
39
47
|
from hpcflow.sdk.log import TimeIt
|
40
48
|
from hpcflow.sdk.core.run_dir_files import RunDirAppFiles
|
41
49
|
from hpcflow.sdk.submission.enums import SubmissionStatus
|
50
|
+
from hpcflow.sdk.submission.submission import Submission
|
51
|
+
from hpcflow.sdk.utils.hashing import get_hash
|
42
52
|
|
43
53
|
if TYPE_CHECKING:
|
44
54
|
from collections.abc import Callable, Container, Iterable, Iterator, Sequence
|
@@ -49,6 +59,7 @@ if TYPE_CHECKING:
|
|
49
59
|
from valida.conditions import ConditionLike # type: ignore
|
50
60
|
|
51
61
|
from ..typing import DataIndex, ParamSource
|
62
|
+
from ..submission.shells import Shell
|
52
63
|
from ..submission.jobscript import Jobscript
|
53
64
|
from .commands import Command
|
54
65
|
from .command_files import InputFileGenerator, OutputFileParser, FileSpec
|
@@ -66,8 +77,34 @@ if TYPE_CHECKING:
|
|
66
77
|
from .rule import Rule
|
67
78
|
from .task import WorkflowTask
|
68
79
|
from .task_schema import TaskSchema
|
69
|
-
from .types import ParameterDependence, ScriptData
|
80
|
+
from .types import ParameterDependence, ScriptData, BlockActionKey
|
70
81
|
from .workflow import Workflow
|
82
|
+
from .object_list import EnvironmentsList
|
83
|
+
|
84
|
+
ACTION_SCOPE_REGEX = r"(\w*)(?:\[(.*)\])?"
|
85
|
+
|
86
|
+
|
87
|
+
@dataclass
|
88
|
+
class UnsetParamTracker:
|
89
|
+
"""Class to track run IDs that are the sources of unset parameter data for some input
|
90
|
+
parameter type.
|
91
|
+
|
92
|
+
Attributes
|
93
|
+
----------
|
94
|
+
run_ids
|
95
|
+
Set of integer run IDs that have been tracked.
|
96
|
+
group_size
|
97
|
+
The size of the group, if the associated SchemaInput in question is a group.
|
98
|
+
|
99
|
+
Notes
|
100
|
+
-----
|
101
|
+
Objects of this class are instantiated within
|
102
|
+
`WorkflowTask._get_merged_parameter_data` when we are tracking unset parameters.
|
103
|
+
|
104
|
+
"""
|
105
|
+
|
106
|
+
run_ids: set[int]
|
107
|
+
group_size: int
|
71
108
|
|
72
109
|
|
73
110
|
#: Keyword arguments permitted for particular scopes.
|
@@ -138,11 +175,13 @@ class ElementActionRun(AppAware):
|
|
138
175
|
snapshot_start: dict[str, Any] | None,
|
139
176
|
snapshot_end: dict[str, Any] | None,
|
140
177
|
submission_idx: int | None,
|
178
|
+
commands_file_ID: int | None,
|
141
179
|
success: bool | None,
|
142
|
-
skip:
|
180
|
+
skip: int,
|
143
181
|
exit_code: int | None,
|
144
182
|
metadata: dict[str, Any],
|
145
183
|
run_hostname: str | None,
|
184
|
+
port_number: int | None,
|
146
185
|
) -> None:
|
147
186
|
self._id = id_
|
148
187
|
self._is_pending = is_pending
|
@@ -153,6 +192,7 @@ class ElementActionRun(AppAware):
|
|
153
192
|
self._start_time = start_time
|
154
193
|
self._end_time = end_time
|
155
194
|
self._submission_idx = submission_idx
|
195
|
+
self._commands_file_ID = commands_file_ID
|
156
196
|
self._success = success
|
157
197
|
self._skip = skip
|
158
198
|
self._snapshot_start = snapshot_start
|
@@ -160,6 +200,7 @@ class ElementActionRun(AppAware):
|
|
160
200
|
self._exit_code = exit_code
|
161
201
|
self._metadata = metadata
|
162
202
|
self._run_hostname = run_hostname
|
203
|
+
self._port_number = port_number
|
163
204
|
|
164
205
|
# assigned on first access of corresponding properties:
|
165
206
|
self._inputs: ElementInputs | None = None
|
@@ -260,6 +301,10 @@ class ElementActionRun(AppAware):
|
|
260
301
|
"""
|
261
302
|
return self._run_hostname
|
262
303
|
|
304
|
+
@property
|
305
|
+
def port_number(self):
|
306
|
+
return self._port_number
|
307
|
+
|
263
308
|
@property
|
264
309
|
def start_time(self) -> datetime | None:
|
265
310
|
"""
|
@@ -281,6 +326,10 @@ class ElementActionRun(AppAware):
|
|
281
326
|
"""
|
282
327
|
return self._submission_idx
|
283
328
|
|
329
|
+
@property
|
330
|
+
def commands_file_ID(self):
|
331
|
+
return self._commands_file_ID
|
332
|
+
|
284
333
|
@property
|
285
334
|
def success(self) -> bool | None:
|
286
335
|
"""
|
@@ -289,12 +338,16 @@ class ElementActionRun(AppAware):
|
|
289
338
|
return self._success
|
290
339
|
|
291
340
|
@property
|
292
|
-
def skip(self) ->
|
341
|
+
def skip(self) -> int:
|
293
342
|
"""
|
294
343
|
Was the EAR skipped?
|
295
344
|
"""
|
296
345
|
return self._skip
|
297
346
|
|
347
|
+
@property
|
348
|
+
def skip_reason(self):
|
349
|
+
return SkipReason(self.skip)
|
350
|
+
|
298
351
|
@property
|
299
352
|
def snapshot_start(self) -> JSONLikeDirSnapShot | None:
|
300
353
|
"""
|
@@ -610,6 +663,7 @@ class ElementActionRun(AppAware):
|
|
610
663
|
return self._output_files
|
611
664
|
|
612
665
|
@property
|
666
|
+
@TimeIt.decorator
|
613
667
|
def env_spec(self) -> Mapping[str, Any]:
|
614
668
|
"""
|
615
669
|
Environment details.
|
@@ -618,6 +672,33 @@ class ElementActionRun(AppAware):
|
|
618
672
|
return {}
|
619
673
|
return envs[self.action.get_environment_name()]
|
620
674
|
|
675
|
+
@property
|
676
|
+
@TimeIt.decorator
|
677
|
+
def env_spec_hashable(self) -> tuple:
|
678
|
+
return self.action.env_spec_to_hashable(self.env_spec)
|
679
|
+
|
680
|
+
def get_directory(self) -> Path | None:
|
681
|
+
"""
|
682
|
+
Get the working directory, if one is required.
|
683
|
+
"""
|
684
|
+
return self.workflow.get_run_directories(run_ids=[self.id_])[0]
|
685
|
+
|
686
|
+
def get_app_log_path(self) -> Path:
|
687
|
+
assert self.submission_idx is not None
|
688
|
+
return Submission.get_app_log_file_path(
|
689
|
+
self.workflow.submissions_path,
|
690
|
+
self.submission_idx,
|
691
|
+
self.id_,
|
692
|
+
)
|
693
|
+
|
694
|
+
def get_app_std_path(self) -> Path:
|
695
|
+
assert self.submission_idx is not None
|
696
|
+
std_dir = Submission.get_app_std_path(
|
697
|
+
self.workflow.submissions_path,
|
698
|
+
self.submission_idx,
|
699
|
+
)
|
700
|
+
return std_dir / f"{self.id_}.txt" # TODO: refactor
|
701
|
+
|
621
702
|
@TimeIt.decorator
|
622
703
|
def get_resources(self) -> Mapping[str, Any]:
|
623
704
|
"""Resolve specific resources for this EAR, considering all applicable scopes and
|
@@ -660,6 +741,7 @@ class ElementActionRun(AppAware):
|
|
660
741
|
self,
|
661
742
|
inputs: Sequence[str] | Mapping[str, Mapping[str, Any]] | None = None,
|
662
743
|
label_dict: bool = True,
|
744
|
+
raise_on_unset: bool = False,
|
663
745
|
) -> Mapping[str, Mapping[str, Any]]:
|
664
746
|
"""Get a dict of (optionally a subset of) inputs values for this run.
|
665
747
|
|
@@ -686,12 +768,14 @@ class ElementActionRun(AppAware):
|
|
686
768
|
val_i = {
|
687
769
|
f"iteration_{run_i.element_iteration.index}": {
|
688
770
|
"loop_idx": run_i.element_iteration.loop_idx,
|
689
|
-
"value": run_i.get(
|
771
|
+
"value": run_i.get(
|
772
|
+
f"inputs.{inp_name}", raise_on_unset=raise_on_unset
|
773
|
+
),
|
690
774
|
}
|
691
775
|
for run_i in self.get_all_previous_iteration_runs(include_self=True)
|
692
776
|
}
|
693
777
|
else:
|
694
|
-
val_i = self.get(f"inputs.{inp_name}")
|
778
|
+
val_i = self.get(f"inputs.{inp_name}", raise_on_unset=raise_on_unset)
|
695
779
|
|
696
780
|
key, label_i = self.__split_input_name(inp_name, label_dict)
|
697
781
|
if label_i:
|
@@ -725,14 +809,16 @@ class ElementActionRun(AppAware):
|
|
725
809
|
return key.split(".")[-1], (label if label_dict else None)
|
726
810
|
|
727
811
|
def get_input_values_direct(
|
728
|
-
self, label_dict: bool = True
|
812
|
+
self, label_dict: bool = True, raise_on_unset: bool = False
|
729
813
|
) -> Mapping[str, Mapping[str, Any]]:
|
730
814
|
"""Get a dict of input values that are to be passed directly to a Python script
|
731
815
|
function."""
|
732
816
|
inputs = self.action.script_data_in_grouped.get("direct", {})
|
733
|
-
return self.get_input_values(
|
817
|
+
return self.get_input_values(
|
818
|
+
inputs=inputs, label_dict=label_dict, raise_on_unset=raise_on_unset
|
819
|
+
)
|
734
820
|
|
735
|
-
def get_IFG_input_values(self) -> Mapping[str, Any]:
|
821
|
+
def get_IFG_input_values(self, raise_on_unset: bool = False) -> Mapping[str, Any]:
|
736
822
|
"""
|
737
823
|
Get a dict of input values that are to be passed via an input file generator.
|
738
824
|
"""
|
@@ -741,12 +827,11 @@ class ElementActionRun(AppAware):
|
|
741
827
|
"Cannot get input file generator inputs from this EAR because the "
|
742
828
|
"associated action is not expanded, meaning multiple IFGs might exists."
|
743
829
|
)
|
744
|
-
input_types =
|
745
|
-
inputs
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
inputs[typ] = inp.value
|
830
|
+
input_types = [i.typ for i in self.action.input_file_generators[0].inputs]
|
831
|
+
inputs = {
|
832
|
+
typ_i: self.get(f"inputs.{typ_i}", raise_on_unset=raise_on_unset)
|
833
|
+
for typ_i in input_types
|
834
|
+
}
|
750
835
|
|
751
836
|
if self.action.script_pass_env_spec:
|
752
837
|
inputs["env_spec"] = self.env_spec
|
@@ -769,7 +854,9 @@ class ElementActionRun(AppAware):
|
|
769
854
|
for file_spec in self.action.output_file_parsers[0].output_files
|
770
855
|
}
|
771
856
|
|
772
|
-
def get_OFP_inputs(
|
857
|
+
def get_OFP_inputs(
|
858
|
+
self, raise_on_unset: bool = False
|
859
|
+
) -> Mapping[str, str | list[str] | Mapping[str, Any]]:
|
773
860
|
"""
|
774
861
|
Get a dict of input values that are to be passed to output file parsers.
|
775
862
|
"""
|
@@ -778,49 +865,101 @@ class ElementActionRun(AppAware):
|
|
778
865
|
"Cannot get output file parser inputs from this from EAR because the "
|
779
866
|
"associated action is not expanded, meaning multiple OFPs might exist."
|
780
867
|
)
|
781
|
-
inputs: dict[
|
782
|
-
|
783
|
-
|
784
|
-
|
868
|
+
inputs: dict[
|
869
|
+
str, str | list[str] | Mapping[str, Any]
|
870
|
+
] = {} # not sure this type is correct
|
871
|
+
for inp_typ in self.action.output_file_parsers[0].inputs or []:
|
872
|
+
inputs[inp_typ] = self.get(f"inputs.{inp_typ}", raise_on_unset=raise_on_unset)
|
785
873
|
|
786
874
|
if self.action.script_pass_env_spec:
|
787
875
|
inputs["env_spec"] = self.env_spec
|
788
876
|
|
789
877
|
return inputs
|
790
878
|
|
791
|
-
def get_OFP_outputs(
|
879
|
+
def get_OFP_outputs(
|
880
|
+
self, raise_on_unset: bool = False
|
881
|
+
) -> Mapping[str, str | list[str]]:
|
792
882
|
"""
|
793
|
-
Get the outputs
|
883
|
+
Get the outputs that are required to execute an output file parser.
|
794
884
|
"""
|
795
885
|
if not self.action._from_expand:
|
796
886
|
raise RuntimeError(
|
797
887
|
"Cannot get output file parser outputs from this from EAR because the "
|
798
888
|
"associated action is not expanded, meaning multiple OFPs might exist."
|
799
889
|
)
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
890
|
+
outputs: dict[str, str | list[str]] = {} # not sure this type is correct
|
891
|
+
for out_typ in self.action.output_file_parsers[0].outputs or []:
|
892
|
+
outputs[out_typ] = self.get(
|
893
|
+
f"outputs.{out_typ}", raise_on_unset=raise_on_unset
|
894
|
+
)
|
895
|
+
return outputs
|
804
896
|
|
805
|
-
def
|
897
|
+
def get_py_script_func_kwargs(
|
898
|
+
self,
|
899
|
+
raise_on_unset: bool = False,
|
900
|
+
add_script_files: bool = False,
|
901
|
+
blk_act_key: BlockActionKey | None = None,
|
902
|
+
) -> Mapping[str, Any]:
|
903
|
+
"""Get function arguments to run the Python script associated with this action.
|
904
|
+
|
905
|
+
Parameters
|
906
|
+
----------
|
907
|
+
raise_on_unset
|
908
|
+
If True, raise if unset parameter data is found when trying to retrieve input
|
909
|
+
data.
|
910
|
+
add_script_files
|
911
|
+
If True, include additional keys "_input_files" and "_output_files" that will
|
912
|
+
be dicts mapping file formats to file names for script input and output files.
|
913
|
+
If True, `js_blk_act_key` must be provided.
|
914
|
+
js_blk_act_key
|
915
|
+
A three-tuple of integers corresponding to the jobscript index, block index,
|
916
|
+
and block-action index.
|
917
|
+
"""
|
918
|
+
kwargs: dict[str, Any] = {}
|
919
|
+
if self.action.is_IFG:
|
920
|
+
ifg = self.action.input_file_generators[0]
|
921
|
+
path = ifg.input_file.name.value()
|
922
|
+
assert isinstance(path, str)
|
923
|
+
kwargs["path"] = Path(path)
|
924
|
+
kwargs.update(self.get_IFG_input_values(raise_on_unset=raise_on_unset))
|
925
|
+
|
926
|
+
elif self.action.is_OFP:
|
927
|
+
kwargs.update(self.get_OFP_output_files())
|
928
|
+
kwargs.update(self.get_OFP_inputs(raise_on_unset=raise_on_unset))
|
929
|
+
kwargs.update(self.get_OFP_outputs(raise_on_unset=raise_on_unset))
|
930
|
+
|
931
|
+
if (
|
932
|
+
not any((self.action.is_IFG, self.action.is_OFP))
|
933
|
+
and self.action.script_data_in_has_direct
|
934
|
+
):
|
935
|
+
kwargs.update(self.get_input_values_direct(raise_on_unset=raise_on_unset))
|
936
|
+
|
937
|
+
if add_script_files:
|
938
|
+
assert blk_act_key
|
939
|
+
in_out_names = self.action.get_script_input_output_file_paths(blk_act_key)
|
940
|
+
in_names, out_names = in_out_names["inputs"], in_out_names["outputs"]
|
941
|
+
if in_names:
|
942
|
+
kwargs["_input_files"] = in_names
|
943
|
+
if out_names:
|
944
|
+
kwargs["_output_files"] = out_names
|
945
|
+
|
946
|
+
return kwargs
|
947
|
+
|
948
|
+
def write_script_input_files(self, block_act_key: BlockActionKey) -> None:
|
806
949
|
"""
|
807
950
|
Write values to files in standard formats.
|
808
951
|
"""
|
809
952
|
for fmt, ins in self.action.script_data_in_grouped.items():
|
810
|
-
in_vals = self.get_input_values(
|
953
|
+
in_vals = self.get_input_values(
|
954
|
+
inputs=ins, label_dict=False, raise_on_unset=False
|
955
|
+
)
|
811
956
|
if writer := self.__source_writer_map.get(fmt):
|
812
|
-
writer(self, in_vals,
|
813
|
-
|
814
|
-
# write the script if it is specified as a app data script, otherwise we assume
|
815
|
-
# the script already exists in the working directory:
|
816
|
-
if snip_path := self.action.get_snippet_script_path(
|
817
|
-
self.action.script, self.env_spec
|
818
|
-
):
|
819
|
-
with Path(snip_path.name).open("wt", newline="\n") as fp:
|
820
|
-
fp.write(self.action.compose_source(snip_path))
|
957
|
+
writer(self, in_vals, block_act_key)
|
821
958
|
|
822
959
|
def __write_json_inputs(
|
823
|
-
self,
|
960
|
+
self,
|
961
|
+
in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
|
962
|
+
block_act_key: BlockActionKey,
|
824
963
|
):
|
825
964
|
in_vals_processed: dict[str, Any] = {}
|
826
965
|
for k, v in in_vals.items():
|
@@ -831,21 +970,29 @@ class ElementActionRun(AppAware):
|
|
831
970
|
except (AttributeError, NotImplementedError):
|
832
971
|
in_vals_processed[k] = v
|
833
972
|
|
834
|
-
with self.action.get_param_dump_file_path_JSON(
|
835
|
-
"wt"
|
836
|
-
) as fp:
|
973
|
+
with self.action.get_param_dump_file_path_JSON(block_act_key).open("wt") as fp:
|
837
974
|
json.dump(in_vals_processed, fp)
|
838
975
|
|
839
976
|
def __write_hdf5_inputs(
|
840
|
-
self,
|
977
|
+
self,
|
978
|
+
in_vals: Mapping[str, ParameterValue | list[ParameterValue]],
|
979
|
+
block_act_key: BlockActionKey,
|
841
980
|
):
|
842
981
|
import h5py # type: ignore
|
843
982
|
|
844
983
|
with h5py.File(
|
845
|
-
self.action.get_param_dump_file_path_HDF5(
|
984
|
+
self.action.get_param_dump_file_path_HDF5(block_act_key), mode="w"
|
846
985
|
) as h5file:
|
847
986
|
for k, v in in_vals.items():
|
848
|
-
|
987
|
+
grp_k = h5file.create_group(k)
|
988
|
+
try:
|
989
|
+
assert isinstance(v, ParameterValue)
|
990
|
+
v.dump_to_HDF5_group(grp_k)
|
991
|
+
except AttributeError:
|
992
|
+
# probably an element group (i.e. v is a list of `ParameterValue`
|
993
|
+
# objects):
|
994
|
+
assert isinstance(v, list)
|
995
|
+
v[0].dump_element_group_to_HDF5_group(v, grp_k)
|
849
996
|
|
850
997
|
__source_writer_map: ClassVar[dict[str, Callable[..., None]]] = {
|
851
998
|
"json": __write_json_inputs,
|
@@ -855,47 +1002,79 @@ class ElementActionRun(AppAware):
|
|
855
1002
|
def __output_index(self, param_name: str) -> int:
|
856
1003
|
return cast("int", self.data_idx[f"outputs.{param_name}"])
|
857
1004
|
|
858
|
-
def _param_save(self,
|
1005
|
+
def _param_save(self, block_act_key: BlockActionKey, run_dir: Path | None = None):
|
859
1006
|
"""Save script-generated parameters that are stored within the supported script
|
860
1007
|
data output formats (HDF5, JSON, etc)."""
|
1008
|
+
in_out_names = self.action.get_script_input_output_file_paths(
|
1009
|
+
block_act_key, directory=run_dir
|
1010
|
+
)
|
1011
|
+
|
861
1012
|
import h5py # type: ignore
|
862
1013
|
|
863
1014
|
parameters = self._app.parameters
|
864
|
-
for fmt in
|
1015
|
+
for fmt, load_path in in_out_names["outputs"].items():
|
865
1016
|
if fmt == "json":
|
866
|
-
with
|
867
|
-
mode="rt"
|
868
|
-
) as f:
|
1017
|
+
with load_path.open(mode="rt") as f:
|
869
1018
|
file_data: dict[str, Any] = json.load(f)
|
870
1019
|
for param_name, param_dat in file_data.items():
|
871
1020
|
param_id = self.__output_index(param_name)
|
872
1021
|
if param_cls := parameters.get(param_name)._force_value_class():
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
1022
|
+
try:
|
1023
|
+
param_cls.save_from_JSON(
|
1024
|
+
param_dat, param_id, self.workflow
|
1025
|
+
)
|
1026
|
+
continue
|
1027
|
+
except NotImplementedError:
|
1028
|
+
pass
|
1029
|
+
# try to save as a primitive:
|
1030
|
+
self.workflow.set_parameter_value(
|
1031
|
+
param_id=param_id, value=param_dat
|
1032
|
+
)
|
879
1033
|
|
880
1034
|
elif fmt == "hdf5":
|
881
|
-
with h5py.File(
|
882
|
-
self.action.get_param_load_file_path_HDF5(js_idx, js_act_idx),
|
883
|
-
mode="r",
|
884
|
-
) as h5file:
|
1035
|
+
with h5py.File(load_path, mode="r") as h5file:
|
885
1036
|
for param_name, h5_grp in h5file.items():
|
1037
|
+
param_id = self.__output_index(param_name)
|
886
1038
|
if param_cls := parameters.get(param_name)._force_value_class():
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
1039
|
+
try:
|
1040
|
+
param_cls.save_from_HDF5_group(
|
1041
|
+
h5_grp, param_id, self.workflow
|
1042
|
+
)
|
1043
|
+
continue
|
1044
|
+
except NotImplementedError:
|
1045
|
+
pass
|
1046
|
+
# Unlike with JSON, we've no fallback so we warn
|
1047
|
+
self._app.logger.warning(
|
1048
|
+
"parameter %s could not be saved; serializer not found",
|
1049
|
+
param_name,
|
1050
|
+
)
|
1051
|
+
|
1052
|
+
@property
|
1053
|
+
def is_snippet_script(self) -> bool:
|
1054
|
+
"""Returns True if the action script string represents a script snippets that is
|
1055
|
+
to be modified before execution (e.g. to receive and provide parameter data)."""
|
1056
|
+
try:
|
1057
|
+
return self.action.is_snippet_script(self.action.script)
|
1058
|
+
except AttributeError:
|
1059
|
+
return False
|
1060
|
+
|
1061
|
+
def get_script_artifact_name(self) -> str:
|
1062
|
+
"""Return the script name that is used when writing the script to the artifacts
|
1063
|
+
directory within the workflow.
|
1064
|
+
|
1065
|
+
Like `Action.get_script_name`, this is only applicable for snippet scripts.
|
1066
|
+
|
1067
|
+
"""
|
1068
|
+
art_name, snip_path = self.action.get_script_artifact_name(
|
1069
|
+
env_spec=self.env_spec,
|
1070
|
+
act_idx=self.element_action.action_idx,
|
1071
|
+
include_suffix=True,
|
1072
|
+
specs_suffix_delim=".",
|
1073
|
+
)
|
1074
|
+
return art_name
|
896
1075
|
|
897
1076
|
def compose_commands(
|
898
|
-
self,
|
1077
|
+
self, environments: EnvironmentsList, shell: Shell
|
899
1078
|
) -> tuple[str, Mapping[int, Sequence[tuple[str, ...]]]]:
|
900
1079
|
"""
|
901
1080
|
Write the EAR's enactment to disk in preparation for submission.
|
@@ -913,21 +1092,13 @@ class ElementActionRun(AppAware):
|
|
913
1092
|
self._app.persistence_logger.debug("EAR.compose_commands")
|
914
1093
|
env_spec = self.env_spec
|
915
1094
|
|
916
|
-
for ifg in self.action.input_file_generators:
|
917
|
-
# TODO: there should only be one at this stage if expanded?
|
918
|
-
ifg.write_source(self.action, env_spec)
|
919
|
-
|
920
1095
|
for ofp in self.action.output_file_parsers:
|
921
1096
|
# TODO: there should only be one at this stage if expanded?
|
922
1097
|
if ofp.output is None:
|
923
1098
|
raise OutputFileParserNoOutputError()
|
924
|
-
ofp.write_source(self.action, env_spec)
|
925
|
-
|
926
|
-
if self.action.script:
|
927
|
-
self.write_source(js_idx=jobscript.index, js_act_idx=JS_action_idx)
|
928
1099
|
|
929
1100
|
command_lns: list[str] = []
|
930
|
-
if (env :=
|
1101
|
+
if (env := environments.get(**env_spec)).setup:
|
931
1102
|
command_lns.extend(env.setup)
|
932
1103
|
|
933
1104
|
shell_vars: dict[int, list[tuple[str, ...]]] = {}
|
@@ -935,12 +1106,135 @@ class ElementActionRun(AppAware):
|
|
935
1106
|
if cmd_idx in self.commands_idx:
|
936
1107
|
# only execute commands that have no rules, or all valid rules:
|
937
1108
|
cmd_str, shell_vars[cmd_idx] = command.get_command_line(
|
938
|
-
EAR=self, shell=
|
1109
|
+
EAR=self, shell=shell, env=env
|
939
1110
|
)
|
940
1111
|
command_lns.append(cmd_str)
|
941
1112
|
|
942
1113
|
return ("\n".join(command_lns) + "\n"), shell_vars
|
943
1114
|
|
1115
|
+
def get_commands_file_hash(self) -> int:
|
1116
|
+
"""Get a hash that can be used to group together runs that will have the same
|
1117
|
+
commands file.
|
1118
|
+
|
1119
|
+
This hash is not stable across sessions or machines.
|
1120
|
+
|
1121
|
+
"""
|
1122
|
+
return self.action.get_commands_file_hash(
|
1123
|
+
data_idx=self.get_data_idx(),
|
1124
|
+
action_idx=self.element_action.action_idx,
|
1125
|
+
)
|
1126
|
+
|
1127
|
+
@overload
|
1128
|
+
def try_write_commands(
|
1129
|
+
self,
|
1130
|
+
jobscript: Jobscript,
|
1131
|
+
environments: EnvironmentsList,
|
1132
|
+
raise_on_unset: Literal[True],
|
1133
|
+
) -> Path:
|
1134
|
+
...
|
1135
|
+
|
1136
|
+
@overload
|
1137
|
+
def try_write_commands(
|
1138
|
+
self,
|
1139
|
+
jobscript: Jobscript,
|
1140
|
+
environments: EnvironmentsList,
|
1141
|
+
raise_on_unset: Literal[False] = False,
|
1142
|
+
) -> Path | None:
|
1143
|
+
...
|
1144
|
+
|
1145
|
+
def try_write_commands(
|
1146
|
+
self,
|
1147
|
+
jobscript: Jobscript,
|
1148
|
+
environments: EnvironmentsList,
|
1149
|
+
raise_on_unset: bool = False,
|
1150
|
+
) -> Path | None:
|
1151
|
+
"""Attempt to write the commands file for this run."""
|
1152
|
+
app_name = self._app.package_name
|
1153
|
+
try:
|
1154
|
+
commands, shell_vars = self.compose_commands(
|
1155
|
+
environments=environments,
|
1156
|
+
shell=jobscript.shell,
|
1157
|
+
)
|
1158
|
+
except UnsetParameterDataError:
|
1159
|
+
if raise_on_unset:
|
1160
|
+
raise
|
1161
|
+
self._app.submission_logger.debug(
|
1162
|
+
f"cannot yet write commands file for run ID {self.id_}; unset parameters"
|
1163
|
+
)
|
1164
|
+
return None
|
1165
|
+
|
1166
|
+
for cmd_idx, var_dat in shell_vars.items():
|
1167
|
+
for param_name, shell_var_name, st_typ in var_dat:
|
1168
|
+
commands += jobscript.shell.format_save_parameter(
|
1169
|
+
workflow_app_alias=jobscript.workflow_app_alias,
|
1170
|
+
param_name=param_name,
|
1171
|
+
shell_var_name=shell_var_name,
|
1172
|
+
cmd_idx=cmd_idx,
|
1173
|
+
stderr=(st_typ == "stderr"),
|
1174
|
+
app_name=app_name,
|
1175
|
+
)
|
1176
|
+
|
1177
|
+
commands_fmt = jobscript.shell.format_commands_file(app_name, commands)
|
1178
|
+
|
1179
|
+
if jobscript.resources.combine_scripts:
|
1180
|
+
stem = f"js_{jobscript.index}" # TODO: refactor
|
1181
|
+
else:
|
1182
|
+
stem = str(self.id_)
|
1183
|
+
|
1184
|
+
cmd_file_name = f"{stem}{jobscript.shell.JS_EXT}"
|
1185
|
+
cmd_file_path: Path = jobscript.submission.commands_path / cmd_file_name
|
1186
|
+
with cmd_file_path.open("wt", newline="\n") as fp:
|
1187
|
+
fp.write(commands_fmt)
|
1188
|
+
|
1189
|
+
return cmd_file_path
|
1190
|
+
|
1191
|
+
@contextlib.contextmanager
|
1192
|
+
def raise_on_failure_threshold(self) -> Iterator[dict[str, UnsetParamTracker]]:
|
1193
|
+
"""Context manager to track parameter types and associated run IDs for which those
|
1194
|
+
parameters were found to be unset when accessed via
|
1195
|
+
`WorkflowTask._get_merged_parameter_data`.
|
1196
|
+
|
1197
|
+
"""
|
1198
|
+
self.workflow._is_tracking_unset = True
|
1199
|
+
self.workflow._tracked_unset = defaultdict(
|
1200
|
+
lambda: UnsetParamTracker(run_ids=set(), group_size=-1)
|
1201
|
+
)
|
1202
|
+
try:
|
1203
|
+
yield dict(self.workflow._tracked_unset)
|
1204
|
+
except:
|
1205
|
+
raise
|
1206
|
+
else:
|
1207
|
+
try:
|
1208
|
+
for schema_inp in self.task.template.schema.inputs:
|
1209
|
+
inp_path = f"inputs.{schema_inp.typ}"
|
1210
|
+
if inp_path in self.workflow._tracked_unset:
|
1211
|
+
unset_tracker = self.workflow._tracked_unset[inp_path]
|
1212
|
+
unset_num = len(unset_tracker.run_ids)
|
1213
|
+
unset_fraction = unset_num / unset_tracker.group_size
|
1214
|
+
if isinstance(schema_inp.allow_failed_dependencies, float):
|
1215
|
+
# `True` is converted to 1.0 on SchemaInput init
|
1216
|
+
if unset_fraction > schema_inp.allow_failed_dependencies:
|
1217
|
+
raise UnsetParameterFractionLimitExceededError(
|
1218
|
+
schema_inp,
|
1219
|
+
self.task,
|
1220
|
+
unset_fraction,
|
1221
|
+
log=self._app.submission_logger,
|
1222
|
+
)
|
1223
|
+
elif isinstance(schema_inp.allow_failed_dependencies, int):
|
1224
|
+
if unset_num > schema_inp.allow_failed_dependencies:
|
1225
|
+
raise UnsetParameterNumberLimitExceededError(
|
1226
|
+
schema_inp,
|
1227
|
+
self.task,
|
1228
|
+
unset_num,
|
1229
|
+
log=self._app.submission_logger,
|
1230
|
+
)
|
1231
|
+
finally:
|
1232
|
+
self.workflow._is_tracking_unset = False
|
1233
|
+
self.workflow._tracked_unset = None
|
1234
|
+
finally:
|
1235
|
+
self.workflow._is_tracking_unset = False
|
1236
|
+
self.workflow._tracked_unset = None
|
1237
|
+
|
944
1238
|
|
945
1239
|
class ElementAction(AppAware):
|
946
1240
|
"""
|
@@ -1403,6 +1697,16 @@ class ActionRule(JSONLike):
|
|
1403
1697
|
return False
|
1404
1698
|
return self.rule == other.rule
|
1405
1699
|
|
1700
|
+
@property
|
1701
|
+
def __parent_action(self) -> Action:
|
1702
|
+
if self.action:
|
1703
|
+
return self.action
|
1704
|
+
else:
|
1705
|
+
assert self.command
|
1706
|
+
act = self.command.action
|
1707
|
+
assert act
|
1708
|
+
return act
|
1709
|
+
|
1406
1710
|
@TimeIt.decorator
|
1407
1711
|
def test(self, element_iteration: ElementIteration) -> bool:
|
1408
1712
|
"""
|
@@ -1413,7 +1717,11 @@ class ActionRule(JSONLike):
|
|
1413
1717
|
element_iteration:
|
1414
1718
|
The iteration to apply this rule to.
|
1415
1719
|
"""
|
1416
|
-
|
1720
|
+
|
1721
|
+
return self.rule.test(
|
1722
|
+
element_like=element_iteration,
|
1723
|
+
action=self.__parent_action,
|
1724
|
+
)
|
1417
1725
|
|
1418
1726
|
@classmethod
|
1419
1727
|
def check_exists(cls, check_exists: str) -> ActionRule:
|
@@ -1493,6 +1801,7 @@ class Action(JSONLike):
|
|
1493
1801
|
name="commands",
|
1494
1802
|
class_name="Command",
|
1495
1803
|
is_multiple=True,
|
1804
|
+
parent_ref="action",
|
1496
1805
|
),
|
1497
1806
|
ChildObjectSpec(
|
1498
1807
|
name="input_file_generators",
|
@@ -1564,6 +1873,7 @@ class Action(JSONLike):
|
|
1564
1873
|
rules: list[ActionRule] | None = None,
|
1565
1874
|
save_files: list[FileSpec] | None = None,
|
1566
1875
|
clean_up: list[str] | None = None,
|
1876
|
+
requires_dir: bool | None = None,
|
1567
1877
|
):
|
1568
1878
|
#: The commands to be run by this action.
|
1569
1879
|
self.commands = commands or []
|
@@ -1581,7 +1891,7 @@ class Action(JSONLike):
|
|
1581
1891
|
#: options are always passed, and this parameter is overwritten to be True,
|
1582
1892
|
#: regardless of its initial value.
|
1583
1893
|
self.script_data_files_use_opt = (
|
1584
|
-
script_data_files_use_opt if not self.
|
1894
|
+
script_data_files_use_opt if not self.script_is_python_snippet else True
|
1585
1895
|
)
|
1586
1896
|
#: The executable to use to run the script.
|
1587
1897
|
self.script_exe = script_exe.lower() if script_exe else None
|
@@ -1608,6 +1918,12 @@ class Action(JSONLike):
|
|
1608
1918
|
#: The names of files to be deleted after each step.
|
1609
1919
|
self.clean_up = clean_up or []
|
1610
1920
|
|
1921
|
+
if requires_dir is None:
|
1922
|
+
requires_dir = (
|
1923
|
+
True if self.input_file_generators or self.output_file_parsers else False
|
1924
|
+
)
|
1925
|
+
self.requires_dir = requires_dir
|
1926
|
+
|
1611
1927
|
self._task_schema: TaskSchema | None = None # assigned by parent TaskSchema
|
1612
1928
|
self._from_expand = False # assigned on creation of new Action by `expand`
|
1613
1929
|
|
@@ -1725,12 +2041,14 @@ class Action(JSONLike):
|
|
1725
2041
|
def script_data_in_has_files(self) -> bool:
|
1726
2042
|
"""Return True if the script requires some inputs to be passed via an
|
1727
2043
|
intermediate file format."""
|
2044
|
+
# TODO: should set `requires_dir` to True if this is True?
|
1728
2045
|
return bool(set(self.script_data_in_grouped) - {"direct"}) # TODO: test
|
1729
2046
|
|
1730
2047
|
@property
|
1731
2048
|
def script_data_out_has_files(self) -> bool:
|
1732
2049
|
"""Return True if the script produces some outputs via an intermediate file
|
1733
2050
|
format."""
|
2051
|
+
# TODO: should set `requires_dir` to True if this is True?
|
1734
2052
|
return bool(set(self.script_data_out_grouped) - {"direct"}) # TODO: test
|
1735
2053
|
|
1736
2054
|
@property
|
@@ -1746,8 +2064,8 @@ class Action(JSONLike):
|
|
1746
2064
|
return "direct" in self.script_data_out_grouped # TODO: test
|
1747
2065
|
|
1748
2066
|
@property
|
1749
|
-
def
|
1750
|
-
"""Return True if the script is a Python script (determined by the file
|
2067
|
+
def script_is_python_snippet(self) -> bool:
|
2068
|
+
"""Return True if the script is a Python snippet script (determined by the file
|
1751
2069
|
extension)"""
|
1752
2070
|
if self.script and (snip_path := self.get_snippet_script_path(self.script)):
|
1753
2071
|
return snip_path.suffix == ".py"
|
@@ -1760,6 +2078,14 @@ class Action(JSONLike):
|
|
1760
2078
|
d["script_data_out"] = d.pop("_script_data_out")
|
1761
2079
|
return d
|
1762
2080
|
|
2081
|
+
@property
|
2082
|
+
def is_IFG(self):
|
2083
|
+
return bool(self.input_file_generators)
|
2084
|
+
|
2085
|
+
@property
|
2086
|
+
def is_OFP(self):
|
2087
|
+
return bool(self.output_file_parsers)
|
2088
|
+
|
1763
2089
|
def __deepcopy__(self, memo: dict[int, Any]) -> Self:
|
1764
2090
|
kwargs = self.to_dict()
|
1765
2091
|
_from_expand = kwargs.pop("_from_expand")
|
@@ -1833,6 +2159,40 @@ class Action(JSONLike):
|
|
1833
2159
|
and self.rules == other.rules
|
1834
2160
|
)
|
1835
2161
|
|
2162
|
+
@staticmethod
|
2163
|
+
def env_spec_to_hashable(
|
2164
|
+
env_spec: Mapping[str, Any],
|
2165
|
+
) -> tuple[tuple[str, ...], tuple[Any, ...]]:
|
2166
|
+
keys, values = zip(*env_spec.items()) if env_spec else ((), ())
|
2167
|
+
return tuple(keys), tuple(values)
|
2168
|
+
|
2169
|
+
@staticmethod
|
2170
|
+
def env_spec_from_hashable(
|
2171
|
+
env_spec_h: tuple[tuple[str, ...], tuple[Any, ...]],
|
2172
|
+
) -> dict[str, Any]:
|
2173
|
+
return dict(zip(*env_spec_h))
|
2174
|
+
|
2175
|
+
def get_script_determinants(self) -> tuple:
|
2176
|
+
"""Get the attributes that affect the script."""
|
2177
|
+
return (
|
2178
|
+
self.script,
|
2179
|
+
self.script_data_in,
|
2180
|
+
self.script_data_out,
|
2181
|
+
self.script_data_files_use_opt,
|
2182
|
+
self.script_exe,
|
2183
|
+
)
|
2184
|
+
|
2185
|
+
def get_script_determinant_hash(self, env_specs: dict | None = None) -> int:
|
2186
|
+
"""Get a hash of the instance attributes that uniquely determine the script.
|
2187
|
+
|
2188
|
+
The hash is not stable across sessions or machines.
|
2189
|
+
|
2190
|
+
"""
|
2191
|
+
env_specs = env_specs or {}
|
2192
|
+
return get_hash(
|
2193
|
+
(self.get_script_determinants(), self.env_spec_to_hashable(env_specs))
|
2194
|
+
)
|
2195
|
+
|
1836
2196
|
@classmethod
|
1837
2197
|
def _json_like_constructor(cls, json_like) -> Self:
|
1838
2198
|
"""Invoked by `JSONLike.from_json_like` instead of `__init__`."""
|
@@ -1958,23 +2318,128 @@ class Action(JSONLike):
|
|
1958
2318
|
|
1959
2319
|
@classmethod
|
1960
2320
|
def get_script_name(cls, script: str) -> str:
|
1961
|
-
"""Return the script name.
|
2321
|
+
"""Return the script name.
|
2322
|
+
|
2323
|
+
If `script` is a snippet script path, this method returns the name of the script
|
2324
|
+
(i.e. the final component of the path). If `script` is not a snippet script path
|
2325
|
+
(does not start with "<<script:"), then `script` is simply returned.
|
2326
|
+
|
2327
|
+
"""
|
1962
2328
|
if cls.is_snippet_script(script):
|
1963
2329
|
if not (match_obj := cls.__SCRIPT_NAME_RE.match(script)):
|
1964
2330
|
raise ValueError("incomplete <<script:>>")
|
1965
2331
|
return match_obj[1]
|
1966
|
-
# a script we can expect in the working directory
|
2332
|
+
# a script we can expect in the working directory, which might have been generated
|
2333
|
+
# by a previous action:
|
1967
2334
|
return script
|
1968
2335
|
|
2336
|
+
@overload
|
2337
|
+
def get_script_artifact_name(
|
2338
|
+
self,
|
2339
|
+
env_spec: Mapping[str, Any],
|
2340
|
+
act_idx: int,
|
2341
|
+
ret_specifiers: Literal[False] = False,
|
2342
|
+
include_suffix: bool = True,
|
2343
|
+
specs_suffix_delim: str = ".",
|
2344
|
+
) -> tuple[str, Path]:
|
2345
|
+
...
|
2346
|
+
|
2347
|
+
@overload
|
2348
|
+
def get_script_artifact_name(
|
2349
|
+
self,
|
2350
|
+
env_spec: Mapping[str, Any],
|
2351
|
+
act_idx: int,
|
2352
|
+
ret_specifiers: Literal[True],
|
2353
|
+
include_suffix: bool = True,
|
2354
|
+
specs_suffix_delim: str = ".",
|
2355
|
+
) -> tuple[str, Path, dict]:
|
2356
|
+
...
|
2357
|
+
|
2358
|
+
def get_script_artifact_name(
|
2359
|
+
self,
|
2360
|
+
env_spec: Mapping[str, Any],
|
2361
|
+
act_idx: int,
|
2362
|
+
ret_specifiers: bool = False,
|
2363
|
+
include_suffix: bool = True,
|
2364
|
+
specs_suffix_delim: str = ".",
|
2365
|
+
) -> tuple[str, Path] | tuple[str, Path, dict]:
|
2366
|
+
"""Return the script name that is used when writing the script to the artifacts
|
2367
|
+
directory within the workflow.
|
2368
|
+
|
2369
|
+
Like `Action.get_script_name`, this is only applicable for snippet scripts.
|
2370
|
+
|
2371
|
+
"""
|
2372
|
+
snip_path_specs = self.get_snippet_script_path(
|
2373
|
+
self.script,
|
2374
|
+
env_spec,
|
2375
|
+
ret_specifiers=True,
|
2376
|
+
)
|
2377
|
+
assert snip_path_specs
|
2378
|
+
snip_path, specifiers = snip_path_specs
|
2379
|
+
specs_suffix = "__".join(f"{k}_{v}" for k, v in specifiers.items())
|
2380
|
+
if specs_suffix:
|
2381
|
+
specs_suffix = f"{specs_suffix_delim}{specs_suffix}"
|
2382
|
+
|
2383
|
+
name = f"{self.task_schema.name}_act_{act_idx}{specs_suffix}"
|
2384
|
+
if include_suffix:
|
2385
|
+
name += snip_path.suffix
|
2386
|
+
|
2387
|
+
if ret_specifiers:
|
2388
|
+
return name, snip_path, specifiers
|
2389
|
+
else:
|
2390
|
+
return name, snip_path
|
2391
|
+
|
1969
2392
|
__SCRIPT_RE: ClassVar[Pattern] = re.compile(r"\<\<script:(.*:?)\>\>")
|
1970
2393
|
__ENV_RE: ClassVar[Pattern] = re.compile(r"\<\<env:(.*?)\>\>")
|
1971
2394
|
|
2395
|
+
@overload
|
1972
2396
|
@classmethod
|
1973
2397
|
def get_snippet_script_str(
|
1974
|
-
cls,
|
2398
|
+
cls,
|
2399
|
+
script: str,
|
2400
|
+
env_spec: Mapping[str, Any] | None = None,
|
2401
|
+
ret_specifiers: Literal[False] = False,
|
1975
2402
|
) -> str:
|
1976
|
-
|
1977
|
-
|
2403
|
+
...
|
2404
|
+
|
2405
|
+
@overload
|
2406
|
+
@classmethod
|
2407
|
+
def get_snippet_script_str(
|
2408
|
+
cls,
|
2409
|
+
script: str,
|
2410
|
+
env_spec: Mapping[str, Any] | None = None,
|
2411
|
+
*,
|
2412
|
+
ret_specifiers: Literal[True],
|
2413
|
+
) -> tuple[str, dict[str, Any]]:
|
2414
|
+
...
|
2415
|
+
|
2416
|
+
@overload
|
2417
|
+
@classmethod
|
2418
|
+
def get_snippet_script_str(
|
2419
|
+
cls,
|
2420
|
+
script: str,
|
2421
|
+
env_spec: Mapping[str, Any] | None = None,
|
2422
|
+
*,
|
2423
|
+
ret_specifiers: bool,
|
2424
|
+
) -> str | tuple[str, dict[str, Any]]:
|
2425
|
+
...
|
2426
|
+
|
2427
|
+
@classmethod
|
2428
|
+
def get_snippet_script_str(
|
2429
|
+
cls,
|
2430
|
+
script: str,
|
2431
|
+
env_spec: Mapping[str, Any] | None = None,
|
2432
|
+
ret_specifiers: bool = False,
|
2433
|
+
) -> str | tuple[str, dict[str, Any]]:
|
2434
|
+
"""Return the specified snippet `script` with variable substitutions completed.
|
2435
|
+
|
2436
|
+
Parameters
|
2437
|
+
----------
|
2438
|
+
ret_specifiers
|
2439
|
+
If True, also return a list of environment specifiers as a dict whose keys are
|
2440
|
+
specifier keys found in the `script` path and whose values are the
|
2441
|
+
corresponding values extracted from `env_spec`.
|
2442
|
+
|
1978
2443
|
"""
|
1979
2444
|
if not cls.is_snippet_script(script):
|
1980
2445
|
raise ValueError(
|
@@ -1985,66 +2450,130 @@ class Action(JSONLike):
|
|
1985
2450
|
raise ValueError("incomplete <<script:>>")
|
1986
2451
|
out: str = match_obj[1]
|
1987
2452
|
|
1988
|
-
if env_spec:
|
2453
|
+
if env_spec is not None:
|
2454
|
+
specifiers: dict[str, Any] = {}
|
2455
|
+
|
2456
|
+
def repl(match_obj):
|
2457
|
+
spec = match_obj[1]
|
2458
|
+
specifiers[spec] = env_spec[spec]
|
2459
|
+
return str(env_spec[spec])
|
2460
|
+
|
1989
2461
|
out = cls.__ENV_RE.sub(
|
1990
|
-
repl=
|
2462
|
+
repl=repl,
|
1991
2463
|
string=out,
|
1992
2464
|
)
|
2465
|
+
if ret_specifiers:
|
2466
|
+
return (out, specifiers)
|
1993
2467
|
return out
|
1994
2468
|
|
1995
2469
|
@classmethod
|
2470
|
+
@overload
|
1996
2471
|
def get_snippet_script_path(
|
1997
|
-
cls,
|
2472
|
+
cls,
|
2473
|
+
script_path: str | None,
|
2474
|
+
env_spec: Mapping[str, Any] | None = None,
|
2475
|
+
*,
|
2476
|
+
ret_specifiers: Literal[True],
|
2477
|
+
) -> tuple[Path, dict[str, Any]] | None:
|
2478
|
+
...
|
2479
|
+
|
2480
|
+
@classmethod
|
2481
|
+
@overload
|
2482
|
+
def get_snippet_script_path(
|
2483
|
+
cls,
|
2484
|
+
script_path: str | None,
|
2485
|
+
env_spec: Mapping[str, Any] | None = None,
|
2486
|
+
*,
|
2487
|
+
ret_specifiers: Literal[False] = False,
|
1998
2488
|
) -> Path | None:
|
1999
|
-
|
2000
|
-
|
2489
|
+
...
|
2490
|
+
|
2491
|
+
@classmethod
|
2492
|
+
def get_snippet_script_path(
|
2493
|
+
cls,
|
2494
|
+
script_path: str | None,
|
2495
|
+
env_spec: Mapping[str, Any] | None = None,
|
2496
|
+
*,
|
2497
|
+
ret_specifiers: bool = False,
|
2498
|
+
) -> Path | tuple[Path, dict[str, Any]] | None:
|
2499
|
+
"""Return the specified snippet `script` path, or None if there is no snippet.
|
2500
|
+
|
2501
|
+
Parameters
|
2502
|
+
----------
|
2503
|
+
ret_specifiers
|
2504
|
+
If True, also return a list of environment specifiers as a dict whose keys are
|
2505
|
+
specifier keys found in the `script` path and whose values are the
|
2506
|
+
corresponding values extracted from `env_spec`.
|
2507
|
+
|
2001
2508
|
"""
|
2002
2509
|
if not cls.is_snippet_script(script_path):
|
2003
2510
|
return None
|
2004
2511
|
|
2005
2512
|
assert script_path is not None
|
2006
|
-
|
2007
|
-
|
2513
|
+
path_ = cls.get_snippet_script_str(
|
2514
|
+
script_path, env_spec, ret_specifiers=ret_specifiers
|
2515
|
+
)
|
2516
|
+
if ret_specifiers:
|
2517
|
+
assert isinstance(path_, tuple)
|
2518
|
+
path_str, specifiers = path_
|
2519
|
+
else:
|
2520
|
+
assert isinstance(path_, str)
|
2521
|
+
path_str = path_
|
2522
|
+
|
2523
|
+
path = Path(cls._app.scripts.get(path_str, path_str))
|
2524
|
+
|
2525
|
+
if ret_specifiers:
|
2526
|
+
return path, specifiers
|
2527
|
+
else:
|
2528
|
+
return path
|
2008
2529
|
|
2009
2530
|
@staticmethod
|
2010
|
-
def __get_param_dump_file_stem(
|
2011
|
-
return RunDirAppFiles.get_run_param_dump_file_prefix(
|
2531
|
+
def __get_param_dump_file_stem(block_act_key: BlockActionKey) -> str:
|
2532
|
+
return RunDirAppFiles.get_run_param_dump_file_prefix(block_act_key)
|
2012
2533
|
|
2013
2534
|
@staticmethod
|
2014
|
-
def __get_param_load_file_stem(
|
2015
|
-
return RunDirAppFiles.get_run_param_load_file_prefix(
|
2535
|
+
def __get_param_load_file_stem(block_act_key: BlockActionKey) -> str:
|
2536
|
+
return RunDirAppFiles.get_run_param_load_file_prefix(block_act_key)
|
2016
2537
|
|
2017
2538
|
def get_param_dump_file_path_JSON(
|
2018
|
-
self,
|
2539
|
+
self, block_act_key: BlockActionKey, directory: Path | None = None
|
2019
2540
|
) -> Path:
|
2020
2541
|
"""
|
2021
2542
|
Get the path of the JSON dump file.
|
2022
2543
|
"""
|
2023
|
-
|
2544
|
+
directory = directory or Path()
|
2545
|
+
return directory.joinpath(
|
2546
|
+
self.__get_param_dump_file_stem(block_act_key) + ".json"
|
2547
|
+
)
|
2024
2548
|
|
2025
2549
|
def get_param_dump_file_path_HDF5(
|
2026
|
-
self,
|
2550
|
+
self, block_act_key: BlockActionKey, directory: Path | None = None
|
2027
2551
|
) -> Path:
|
2028
2552
|
"""
|
2029
|
-
Get the path of the
|
2553
|
+
Get the path of the HDF5 dump file.
|
2030
2554
|
"""
|
2031
|
-
|
2555
|
+
directory = directory or Path()
|
2556
|
+
return directory.joinpath(self.__get_param_dump_file_stem(block_act_key) + ".h5")
|
2032
2557
|
|
2033
2558
|
def get_param_load_file_path_JSON(
|
2034
|
-
self,
|
2559
|
+
self, block_act_key: BlockActionKey, directory: Path | None = None
|
2035
2560
|
) -> Path:
|
2036
2561
|
"""
|
2037
2562
|
Get the path of the JSON load file.
|
2038
2563
|
"""
|
2039
|
-
|
2564
|
+
directory = directory or Path()
|
2565
|
+
return directory.joinpath(
|
2566
|
+
self.__get_param_load_file_stem(block_act_key) + ".json"
|
2567
|
+
)
|
2040
2568
|
|
2041
2569
|
def get_param_load_file_path_HDF5(
|
2042
|
-
self,
|
2570
|
+
self, block_act_key: BlockActionKey, directory: Path | None = None
|
2043
2571
|
) -> Path:
|
2044
2572
|
"""
|
2045
2573
|
Get the path of the HDF5 load file.
|
2046
2574
|
"""
|
2047
|
-
|
2575
|
+
directory = directory or Path()
|
2576
|
+
return directory.joinpath(self.__get_param_load_file_stem(block_act_key) + ".h5")
|
2048
2577
|
|
2049
2578
|
def expand(self) -> Sequence[Action]:
|
2050
2579
|
"""
|
@@ -2070,114 +2599,77 @@ class Action(JSONLike):
|
|
2070
2599
|
# note we keep the IFG/OPs in the new actions, so we can check the parameters
|
2071
2600
|
# used/produced.
|
2072
2601
|
|
2073
|
-
|
2074
|
-
inp_files = []
|
2602
|
+
inp_files: list[FileSpec] = []
|
2075
2603
|
inp_acts: list[Action] = []
|
2604
|
+
|
2605
|
+
app_caps = self._app.package_name.upper()
|
2606
|
+
|
2607
|
+
script_cmd_vars = {
|
2608
|
+
"script_name": f"${app_caps}_RUN_SCRIPT_NAME",
|
2609
|
+
"script_name_no_ext": f"${app_caps}_RUN_SCRIPT_NAME_NO_EXT",
|
2610
|
+
"script_dir": f"${app_caps}_RUN_SCRIPT_DIR",
|
2611
|
+
"script_path": f"${app_caps}_RUN_SCRIPT_PATH",
|
2612
|
+
}
|
2613
|
+
|
2076
2614
|
for ifg in self.input_file_generators:
|
2077
|
-
|
2078
|
-
|
2079
|
-
|
2080
|
-
"$EAR_ID",
|
2081
|
-
] # WK_PATH could have a space in it
|
2082
|
-
if ifg.script:
|
2083
|
-
script_name = self.get_script_name(ifg.script)
|
2084
|
-
variables = {
|
2085
|
-
"script_name": script_name,
|
2086
|
-
"script_name_no_ext": str(Path(script_name).stem),
|
2087
|
-
}
|
2088
|
-
else:
|
2089
|
-
variables = {}
|
2615
|
+
script_exe = "python_script"
|
2616
|
+
exe = f"<<executable:{script_exe}>>"
|
2617
|
+
variables = script_cmd_vars if ifg.script else {}
|
2090
2618
|
act_i = self._app.Action(
|
2091
|
-
commands=[
|
2092
|
-
self._app.Command(executable=exe, arguments=args, variables=variables)
|
2093
|
-
],
|
2619
|
+
commands=[self._app.Command(executable=exe, variables=variables)],
|
2094
2620
|
input_file_generators=[ifg],
|
2095
2621
|
environments=[self.get_input_file_generator_action_env(ifg)],
|
2096
2622
|
rules=main_rules + ifg.get_action_rules(),
|
2623
|
+
script=ifg.script,
|
2624
|
+
script_data_in="direct",
|
2625
|
+
script_data_out="direct",
|
2626
|
+
script_exe=script_exe,
|
2097
2627
|
script_pass_env_spec=ifg.script_pass_env_spec,
|
2098
2628
|
abortable=ifg.abortable,
|
2099
|
-
|
2629
|
+
requires_dir=ifg.requires_dir,
|
2100
2630
|
)
|
2101
2631
|
act_i._task_schema = self.task_schema
|
2102
2632
|
if ifg.input_file not in inp_files:
|
2103
2633
|
inp_files.append(ifg.input_file)
|
2634
|
+
act_i.process_script_data_formats()
|
2104
2635
|
act_i._from_expand = True
|
2105
2636
|
inp_acts.append(act_i)
|
2106
2637
|
|
2107
2638
|
out_files: list[FileSpec] = []
|
2108
2639
|
out_acts: list[Action] = []
|
2109
2640
|
for ofp in self.output_file_parsers:
|
2110
|
-
|
2111
|
-
|
2112
|
-
|
2113
|
-
"$EAR_ID",
|
2114
|
-
] # WK_PATH could have a space in it
|
2115
|
-
if ofp.script:
|
2116
|
-
script_name = self.get_script_name(ofp.script)
|
2117
|
-
variables = {
|
2118
|
-
"script_name": script_name,
|
2119
|
-
"script_name_no_ext": str(Path(script_name).stem),
|
2120
|
-
}
|
2121
|
-
else:
|
2122
|
-
variables = {}
|
2641
|
+
script_exe = "python_script"
|
2642
|
+
exe = f"<<executable:{script_exe}>>"
|
2643
|
+
variables = script_cmd_vars if ofp.script else {}
|
2123
2644
|
act_i = self._app.Action(
|
2124
|
-
commands=[
|
2125
|
-
self._app.Command(executable=exe, arguments=args, variables=variables)
|
2126
|
-
],
|
2645
|
+
commands=[self._app.Command(executable=exe, variables=variables)],
|
2127
2646
|
output_file_parsers=[ofp],
|
2128
2647
|
environments=[self.get_output_file_parser_action_env(ofp)],
|
2129
|
-
rules=
|
2648
|
+
rules=list(self.rules) + ofp.get_action_rules(),
|
2649
|
+
script=ofp.script,
|
2650
|
+
script_data_in="direct",
|
2651
|
+
script_data_out="direct",
|
2652
|
+
script_exe=script_exe,
|
2130
2653
|
script_pass_env_spec=ofp.script_pass_env_spec,
|
2131
2654
|
abortable=ofp.abortable,
|
2655
|
+
requires_dir=ofp.requires_dir,
|
2132
2656
|
)
|
2133
2657
|
act_i._task_schema = self.task_schema
|
2134
|
-
for
|
2135
|
-
if
|
2136
|
-
out_files.append(
|
2658
|
+
for j in ofp.output_files:
|
2659
|
+
if j not in out_files:
|
2660
|
+
out_files.append(j)
|
2661
|
+
act_i.process_script_data_formats()
|
2137
2662
|
act_i._from_expand = True
|
2138
2663
|
out_acts.append(act_i)
|
2139
2664
|
|
2140
2665
|
commands = self.commands
|
2141
2666
|
if self.script:
|
2142
2667
|
exe = f"<<executable:{self.script_exe}>>"
|
2143
|
-
|
2144
|
-
|
2145
|
-
|
2146
|
-
variables = {
|
2147
|
-
"script_name": script_name,
|
2148
|
-
"script_name_no_ext": str(Path(script_name).stem),
|
2149
|
-
}
|
2150
|
-
else:
|
2151
|
-
variables = {}
|
2152
|
-
if self.script_data_in_has_direct or self.script_data_out_has_direct:
|
2153
|
-
# WK_PATH could have a space in it:
|
2154
|
-
args.extend(("--wk-path", '"$WK_PATH"', "--run-id", "$EAR_ID"))
|
2155
|
-
|
2156
|
-
fn_args = {"js_idx": "${JS_IDX}", "js_act_idx": "${JS_act_idx}"}
|
2157
|
-
|
2158
|
-
for fmt in self.script_data_in_grouped:
|
2159
|
-
if fmt == "json":
|
2160
|
-
if self.script_data_files_use_opt:
|
2161
|
-
args.append("--inputs-json")
|
2162
|
-
args.append(str(self.get_param_dump_file_path_JSON(**fn_args)))
|
2163
|
-
elif fmt == "hdf5":
|
2164
|
-
if self.script_data_files_use_opt:
|
2165
|
-
args.append("--inputs-hdf5")
|
2166
|
-
args.append(str(self.get_param_dump_file_path_HDF5(**fn_args)))
|
2167
|
-
|
2168
|
-
for fmt in self.script_data_out_grouped:
|
2169
|
-
if fmt == "json":
|
2170
|
-
if self.script_data_files_use_opt:
|
2171
|
-
args.append("--outputs-json")
|
2172
|
-
args.append(str(self.get_param_load_file_path_JSON(**fn_args)))
|
2173
|
-
elif fmt == "hdf5":
|
2174
|
-
if self.script_data_files_use_opt:
|
2175
|
-
args.append("--outputs-hdf5")
|
2176
|
-
args.append(str(self.get_param_load_file_path_HDF5(**fn_args)))
|
2177
|
-
|
2178
|
-
commands.append(
|
2668
|
+
variables = script_cmd_vars if self.script else {}
|
2669
|
+
args = self.get_script_input_output_file_command_args()
|
2670
|
+
commands += [
|
2179
2671
|
self._app.Command(executable=exe, arguments=args, variables=variables)
|
2180
|
-
|
2672
|
+
]
|
2181
2673
|
|
2182
2674
|
# TODO: store script_args? and build command with executable syntax?
|
2183
2675
|
main_act = self._app.Action(
|
@@ -2194,6 +2686,7 @@ class Action(JSONLike):
|
|
2194
2686
|
output_files=out_files,
|
2195
2687
|
save_files=self.save_files,
|
2196
2688
|
clean_up=self.clean_up,
|
2689
|
+
requires_dir=self.requires_dir,
|
2197
2690
|
)
|
2198
2691
|
main_act._task_schema = self.task_schema
|
2199
2692
|
main_act._from_expand = True
|
@@ -2233,7 +2726,7 @@ class Action(JSONLike):
|
|
2233
2726
|
|
2234
2727
|
__FILES_RE: ClassVar[Pattern] = re.compile(r"\<\<file:(.*?)\>\>")
|
2235
2728
|
|
2236
|
-
def
|
2729
|
+
def get_command_file_labels(self) -> tuple[str, ...]:
|
2237
2730
|
"""Get input files types from commands."""
|
2238
2731
|
files: set[str] = set()
|
2239
2732
|
for command in self.commands:
|
@@ -2254,6 +2747,23 @@ class Action(JSONLike):
|
|
2254
2747
|
params.add(out_params["stderr"])
|
2255
2748
|
return tuple(params)
|
2256
2749
|
|
2750
|
+
def get_command_parameter_types(
|
2751
|
+
self, sub_parameters: bool = False
|
2752
|
+
) -> tuple[str, ...]:
|
2753
|
+
"""Get all parameter types that appear in the commands of this action.
|
2754
|
+
|
2755
|
+
Parameters
|
2756
|
+
----------
|
2757
|
+
sub_parameters
|
2758
|
+
If True, sub-parameter inputs (i.e. dot-delimited input types) will be
|
2759
|
+
returned untouched. If False (default), only return the root parameter type
|
2760
|
+
and disregard the sub-parameter part.
|
2761
|
+
"""
|
2762
|
+
# TODO: not sure if we need `input_files`
|
2763
|
+
return tuple(
|
2764
|
+
f"inputs.{i}" for i in self.get_command_input_types(sub_parameters)
|
2765
|
+
) + tuple(f"input_files.{i}" for i in self.get_command_file_labels())
|
2766
|
+
|
2257
2767
|
def get_input_types(self, sub_parameters: bool = False) -> tuple[str, ...]:
|
2258
2768
|
"""Get the input types that are consumed by commands and input file generators of
|
2259
2769
|
this action.
|
@@ -2270,6 +2780,8 @@ class Action(JSONLike):
|
|
2270
2780
|
and not self.input_file_generators
|
2271
2781
|
and not self.output_file_parsers
|
2272
2782
|
):
|
2783
|
+
# TODO: refine this according to `script_data_in`, since this can be used
|
2784
|
+
# to control the inputs/outputs of a script.
|
2273
2785
|
params = set(self.task_schema.input_types)
|
2274
2786
|
else:
|
2275
2787
|
params = set(self.get_command_input_types(sub_parameters))
|
@@ -2288,6 +2800,8 @@ class Action(JSONLike):
|
|
2288
2800
|
and not self.output_file_parsers
|
2289
2801
|
):
|
2290
2802
|
params = set(self.task_schema.output_types)
|
2803
|
+
# TODO: refine this according to `script_data_out`, since this can be used
|
2804
|
+
# to control the inputs/outputs of a script.
|
2291
2805
|
else:
|
2292
2806
|
params = set(self.get_command_output_types())
|
2293
2807
|
for ofp in self.output_file_parsers:
|
@@ -2523,109 +3037,112 @@ class Action(JSONLike):
|
|
2523
3037
|
with snip_path.open("rt") as fp:
|
2524
3038
|
script_str = fp.read()
|
2525
3039
|
|
2526
|
-
if not self.
|
3040
|
+
if not self.script_is_python_snippet:
|
2527
3041
|
return script_str
|
2528
3042
|
|
2529
|
-
|
2530
|
-
|
3043
|
+
if self.is_OFP and self.output_file_parsers[0].output is None:
|
3044
|
+
# might be used just for saving files:
|
3045
|
+
return ""
|
3046
|
+
|
3047
|
+
app_caps = self._app.package_name.upper()
|
3048
|
+
py_imports = dedent(
|
3049
|
+
"""\
|
3050
|
+
import argparse
|
3051
|
+
import os
|
2531
3052
|
from pathlib import Path
|
2532
3053
|
|
2533
|
-
|
2534
|
-
|
2535
|
-
|
2536
|
-
|
2537
|
-
|
2538
|
-
|
2539
|
-
parser.add_argument("--outputs-hdf5")
|
2540
|
-
args = parser.parse_args()
|
2541
|
-
"""
|
3054
|
+
import {app_module} as app
|
3055
|
+
|
3056
|
+
std_path = os.getenv("{app_caps}_RUN_STD_PATH")
|
3057
|
+
log_path = os.getenv("{app_caps}_RUN_LOG_PATH")
|
3058
|
+
run_id = int(os.getenv("{app_caps}_RUN_ID"))
|
3059
|
+
wk_path = os.getenv("{app_caps}_WK_PATH")
|
2542
3060
|
|
2543
|
-
|
2544
|
-
|
2545
|
-
|
2546
|
-
|
3061
|
+
with app.redirect_std_to_file(std_path):
|
3062
|
+
|
3063
|
+
"""
|
3064
|
+
).format(app_module=self._app.module, app_caps=app_caps)
|
3065
|
+
|
3066
|
+
# we must load the workflow (must be python):
|
3067
|
+
# (note: we previously only loaded the workflow if there were any direct inputs
|
3068
|
+
# or outputs; now we always load so we can use the method
|
3069
|
+
# `get_py_script_func_kwargs`)
|
3070
|
+
py_main_block_workflow_load = dedent(
|
3071
|
+
"""\
|
2547
3072
|
app.load_config(
|
2548
|
-
log_file_path=Path(
|
3073
|
+
log_file_path=Path(log_path),
|
2549
3074
|
config_dir=r"{cfg_dir}",
|
2550
3075
|
config_key=r"{cfg_invoc_key}",
|
2551
3076
|
)
|
2552
|
-
wk_path, EAR_ID = args.wk_path, args.run_id
|
2553
3077
|
wk = app.Workflow(wk_path)
|
2554
|
-
EAR = wk.get_EARs_from_IDs([
|
2555
|
-
""".format(
|
2556
|
-
run_log_file=self._app.RunDirAppFiles.get_log_file_name(),
|
2557
|
-
app_module=self._app.module,
|
2558
|
-
cfg_dir=self._app.config.config_directory,
|
2559
|
-
cfg_invoc_key=self._app.config.config_key,
|
2560
|
-
)
|
2561
|
-
else:
|
2562
|
-
py_main_block_workflow_load = ""
|
2563
|
-
|
2564
|
-
func_kwargs_lst = []
|
2565
|
-
if "direct" in self.script_data_in_grouped:
|
2566
|
-
direct_ins_str = "direct_ins = EAR.get_input_values_direct()"
|
2567
|
-
func_kwargs_lst.append("**direct_ins")
|
2568
|
-
else:
|
2569
|
-
direct_ins_str = ""
|
2570
|
-
|
2571
|
-
if self.script_data_in_has_files:
|
2572
|
-
# need to pass "_input_files" keyword argument to script main function:
|
2573
|
-
input_files_str = """
|
2574
|
-
inp_files = {}
|
2575
|
-
if args.inputs_json:
|
2576
|
-
inp_files["json"] = Path(args.inputs_json)
|
2577
|
-
if args.inputs_hdf5:
|
2578
|
-
inp_files["hdf5"] = Path(args.inputs_hdf5)
|
2579
|
-
"""
|
2580
|
-
func_kwargs_lst.append("_input_files=inp_files")
|
2581
|
-
else:
|
2582
|
-
input_files_str = ""
|
2583
|
-
|
2584
|
-
if self.script_data_out_has_files:
|
2585
|
-
# need to pass "_output_files" keyword argument to script main function:
|
2586
|
-
output_files_str = """
|
2587
|
-
out_files = {}
|
2588
|
-
if args.outputs_json:
|
2589
|
-
out_files["json"] = Path(args.outputs_json)
|
2590
|
-
if args.outputs_hdf5:
|
2591
|
-
out_files["hdf5"] = Path(args.outputs_hdf5)
|
3078
|
+
EAR = wk.get_EARs_from_IDs([run_id])[0]
|
2592
3079
|
"""
|
2593
|
-
|
3080
|
+
).format(
|
3081
|
+
cfg_dir=self._app.config.config_directory,
|
3082
|
+
cfg_invoc_key=self._app.config.config_key,
|
3083
|
+
app_caps=app_caps,
|
3084
|
+
)
|
2594
3085
|
|
2595
|
-
|
2596
|
-
|
3086
|
+
tab_indent = " "
|
3087
|
+
tab_indent_2 = 2 * tab_indent
|
3088
|
+
|
3089
|
+
func_kwargs_str = dedent(
|
3090
|
+
"""\
|
3091
|
+
blk_act_key = (
|
3092
|
+
os.environ["{app_caps}_JS_IDX"],
|
3093
|
+
os.environ["{app_caps}_BLOCK_IDX"],
|
3094
|
+
os.environ["{app_caps}_BLOCK_ACT_IDX"],
|
3095
|
+
)
|
3096
|
+
with EAR.raise_on_failure_threshold() as unset_params:
|
3097
|
+
func_kwargs = EAR.get_py_script_func_kwargs(
|
3098
|
+
raise_on_unset=False,
|
3099
|
+
add_script_files=True,
|
3100
|
+
blk_act_key=blk_act_key,
|
3101
|
+
)
|
3102
|
+
"""
|
3103
|
+
).format(app_caps=app_caps)
|
2597
3104
|
|
2598
3105
|
script_main_func = Path(script_name).stem
|
2599
|
-
func_invoke_str = f"{script_main_func}(
|
2600
|
-
if "direct" in self.script_data_out_grouped:
|
3106
|
+
func_invoke_str = f"{script_main_func}(**func_kwargs)"
|
3107
|
+
if not self.is_OFP and "direct" in self.script_data_out_grouped:
|
2601
3108
|
py_main_block_invoke = f"outputs = {func_invoke_str}"
|
2602
|
-
py_main_block_outputs =
|
2603
|
-
|
2604
|
-
|
2605
|
-
|
2606
|
-
|
3109
|
+
py_main_block_outputs = dedent(
|
3110
|
+
"""\
|
3111
|
+
with app.redirect_std_to_file(std_path):
|
3112
|
+
for name_i, out_i in outputs.items():
|
3113
|
+
wk.set_parameter_value(param_id=EAR.data_idx[f"outputs.{name_i}"], value=out_i)
|
3114
|
+
"""
|
3115
|
+
)
|
3116
|
+
elif self.is_OFP:
|
3117
|
+
py_main_block_invoke = f"output = {func_invoke_str}"
|
3118
|
+
assert self.output_file_parsers[0].output
|
3119
|
+
py_main_block_outputs = dedent(
|
3120
|
+
"""\
|
3121
|
+
with app.redirect_std_to_file(std_path):
|
3122
|
+
wk.save_parameter(name="outputs.{output_typ}", value=output, EAR_ID=run_id)
|
3123
|
+
"""
|
3124
|
+
).format(output_typ=self.output_file_parsers[0].output.typ)
|
2607
3125
|
else:
|
2608
3126
|
py_main_block_invoke = func_invoke_str
|
2609
3127
|
py_main_block_outputs = ""
|
2610
3128
|
|
2611
|
-
|
3129
|
+
wk_load = (
|
3130
|
+
"\n" + indent(py_main_block_workflow_load, tab_indent_2)
|
3131
|
+
if py_main_block_workflow_load
|
3132
|
+
else ""
|
3133
|
+
)
|
2612
3134
|
py_main_block = dedent(
|
2613
3135
|
"""\
|
2614
3136
|
if __name__ == "__main__":
|
2615
|
-
{py_imports}
|
2616
|
-
{
|
2617
|
-
{direct_ins}
|
2618
|
-
{in_files}
|
2619
|
-
{out_files}
|
3137
|
+
{py_imports}{wk_load}
|
3138
|
+
{func_kwargs}
|
2620
3139
|
{invoke}
|
2621
3140
|
{outputs}
|
2622
3141
|
"""
|
2623
3142
|
).format(
|
2624
|
-
py_imports=indent(
|
2625
|
-
wk_load=
|
2626
|
-
|
2627
|
-
in_files=indent(dedent(input_files_str), tab_indent),
|
2628
|
-
out_files=indent(dedent(output_files_str), tab_indent),
|
3143
|
+
py_imports=indent(py_imports, tab_indent),
|
3144
|
+
wk_load=wk_load,
|
3145
|
+
func_kwargs=indent(func_kwargs_str, tab_indent_2),
|
2629
3146
|
invoke=indent(py_main_block_invoke, tab_indent),
|
2630
3147
|
outputs=indent(dedent(py_main_block_outputs), tab_indent),
|
2631
3148
|
)
|
@@ -2672,3 +3189,138 @@ class Action(JSONLike):
|
|
2672
3189
|
return list(self.get_output_file_labels())
|
2673
3190
|
else:
|
2674
3191
|
raise ValueError(f"unexpected prefix: {prefix}")
|
3192
|
+
|
3193
|
+
def get_commands_file_hash(self, data_idx: DataIndex, action_idx: int) -> int:
|
3194
|
+
"""Get a hash that can be used to group together runs that will have the same
|
3195
|
+
commands file.
|
3196
|
+
|
3197
|
+
This hash is not stable across sessions or machines.
|
3198
|
+
|
3199
|
+
"""
|
3200
|
+
|
3201
|
+
# filter data index by input parameters that appear in the commands, or are used in
|
3202
|
+
# rules in conditional commands:
|
3203
|
+
param_types = self.get_command_parameter_types()
|
3204
|
+
|
3205
|
+
relevant_paths: list[str] = []
|
3206
|
+
for i in param_types:
|
3207
|
+
relevant_paths.extend(
|
3208
|
+
list(WorkflowTask._get_relevant_paths(data_idx, i.split(".")).keys())
|
3209
|
+
)
|
3210
|
+
|
3211
|
+
# hash any relevant data index from rule path
|
3212
|
+
for cmd in self.commands:
|
3213
|
+
for act_rule in cmd.rules:
|
3214
|
+
rule_path = act_rule.rule.path
|
3215
|
+
assert rule_path
|
3216
|
+
rule_path_split = rule_path.split(".")
|
3217
|
+
if rule_path.startswith("resources."):
|
3218
|
+
# include all resource paths for now:
|
3219
|
+
relevant_paths.extend(
|
3220
|
+
list(
|
3221
|
+
WorkflowTask._get_relevant_paths(
|
3222
|
+
data_idx, ["resources"]
|
3223
|
+
).keys()
|
3224
|
+
)
|
3225
|
+
)
|
3226
|
+
else:
|
3227
|
+
relevant_paths.extend(
|
3228
|
+
list(
|
3229
|
+
WorkflowTask._get_relevant_paths(
|
3230
|
+
data_idx, rule_path_split
|
3231
|
+
).keys()
|
3232
|
+
)
|
3233
|
+
)
|
3234
|
+
|
3235
|
+
# note we don't need to consider action-level rules, since these determine
|
3236
|
+
# whether a run will be included in a submission or not; this method is only
|
3237
|
+
# called on runs that are part of a submission, at which point action-level rules
|
3238
|
+
# are irrelevant.
|
3239
|
+
|
3240
|
+
relevant_data_idx = {k: v for k, v in data_idx.items() if k in relevant_paths}
|
3241
|
+
|
3242
|
+
try:
|
3243
|
+
schema_name = self.task_schema.name
|
3244
|
+
except AssertionError:
|
3245
|
+
# allows for testing without making a schema
|
3246
|
+
schema_name = ""
|
3247
|
+
|
3248
|
+
return get_hash(
|
3249
|
+
(
|
3250
|
+
schema_name,
|
3251
|
+
action_idx,
|
3252
|
+
relevant_data_idx,
|
3253
|
+
)
|
3254
|
+
)
|
3255
|
+
|
3256
|
+
@classmethod
|
3257
|
+
def get_block_act_idx_shell_vars(cls) -> BlockActionKey:
|
3258
|
+
"""Return a the jobscript index, block index, and block action idx shell
|
3259
|
+
environment variable names formatted for shell substitution.
|
3260
|
+
|
3261
|
+
Notes
|
3262
|
+
-----
|
3263
|
+
This seem so be shell-agnostic, at least for those currently supported.
|
3264
|
+
|
3265
|
+
"""
|
3266
|
+
app_caps = cls._app.package_name.upper()
|
3267
|
+
return (
|
3268
|
+
f"${{{app_caps}_JS_IDX}}",
|
3269
|
+
f"${{{app_caps}_BLOCK_IDX}}",
|
3270
|
+
f"${{{app_caps}_BLOCK_ACT_IDX}}",
|
3271
|
+
)
|
3272
|
+
|
3273
|
+
def get_script_input_output_file_paths(
|
3274
|
+
self,
|
3275
|
+
block_act_key: BlockActionKey,
|
3276
|
+
directory: Path | None = None,
|
3277
|
+
) -> dict[str, dict[str, Path]]:
|
3278
|
+
"""Get the names (as `Path`s) of script input and output files for this action."""
|
3279
|
+
in_out_paths: dict[str, dict[str, Path]] = {
|
3280
|
+
"inputs": {},
|
3281
|
+
"outputs": {},
|
3282
|
+
}
|
3283
|
+
for fmt in self.script_data_in_grouped:
|
3284
|
+
if fmt == "json":
|
3285
|
+
path = self.get_param_dump_file_path_JSON(
|
3286
|
+
block_act_key, directory=directory
|
3287
|
+
)
|
3288
|
+
elif fmt == "hdf5":
|
3289
|
+
path = self.get_param_dump_file_path_HDF5(
|
3290
|
+
block_act_key, directory=directory
|
3291
|
+
)
|
3292
|
+
else:
|
3293
|
+
continue
|
3294
|
+
in_out_paths["inputs"][fmt] = path
|
3295
|
+
|
3296
|
+
for fmt in self.script_data_out_grouped:
|
3297
|
+
if fmt == "json":
|
3298
|
+
path = self.get_param_load_file_path_JSON(
|
3299
|
+
block_act_key, directory=directory
|
3300
|
+
)
|
3301
|
+
elif fmt == "hdf5":
|
3302
|
+
path = self.get_param_load_file_path_HDF5(
|
3303
|
+
block_act_key, directory=directory
|
3304
|
+
)
|
3305
|
+
else:
|
3306
|
+
continue
|
3307
|
+
in_out_paths["outputs"][fmt] = path
|
3308
|
+
|
3309
|
+
return in_out_paths
|
3310
|
+
|
3311
|
+
def get_script_input_output_file_command_args(self) -> list[str]:
|
3312
|
+
"""Get the script input and output file names as command line arguments."""
|
3313
|
+
in_out_names = self.get_script_input_output_file_paths(
|
3314
|
+
self.get_block_act_idx_shell_vars()
|
3315
|
+
)
|
3316
|
+
args: list[str] = []
|
3317
|
+
for fmt, path in in_out_names["inputs"].items():
|
3318
|
+
if self.script_data_files_use_opt:
|
3319
|
+
args.append(f"--inputs-{fmt}")
|
3320
|
+
args.append(str(path))
|
3321
|
+
for fmt, path in in_out_names["outputs"].items():
|
3322
|
+
if self.script_data_files_use_opt:
|
3323
|
+
args.append(f"--outputs-{fmt}")
|
3324
|
+
args.append(str(path))
|
3325
|
+
|
3326
|
+
return args
|