hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
  2. hpcflow/_version.py +1 -1
  3. hpcflow/app.py +1 -0
  4. hpcflow/data/scripts/bad_script.py +2 -0
  5. hpcflow/data/scripts/do_nothing.py +2 -0
  6. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  7. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  8. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  11. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  12. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  13. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  15. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  16. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  23. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
  24. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  25. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
  26. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  27. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  28. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  29. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  30. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  31. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  32. hpcflow/data/scripts/script_exit_test.py +5 -0
  33. hpcflow/data/template_components/environments.yaml +1 -1
  34. hpcflow/sdk/__init__.py +26 -15
  35. hpcflow/sdk/app.py +2192 -768
  36. hpcflow/sdk/cli.py +506 -296
  37. hpcflow/sdk/cli_common.py +105 -7
  38. hpcflow/sdk/config/__init__.py +1 -1
  39. hpcflow/sdk/config/callbacks.py +115 -43
  40. hpcflow/sdk/config/cli.py +126 -103
  41. hpcflow/sdk/config/config.py +674 -318
  42. hpcflow/sdk/config/config_file.py +131 -95
  43. hpcflow/sdk/config/errors.py +125 -84
  44. hpcflow/sdk/config/types.py +148 -0
  45. hpcflow/sdk/core/__init__.py +25 -1
  46. hpcflow/sdk/core/actions.py +1771 -1059
  47. hpcflow/sdk/core/app_aware.py +24 -0
  48. hpcflow/sdk/core/cache.py +139 -79
  49. hpcflow/sdk/core/command_files.py +263 -287
  50. hpcflow/sdk/core/commands.py +145 -112
  51. hpcflow/sdk/core/element.py +828 -535
  52. hpcflow/sdk/core/enums.py +192 -0
  53. hpcflow/sdk/core/environment.py +74 -93
  54. hpcflow/sdk/core/errors.py +455 -52
  55. hpcflow/sdk/core/execute.py +207 -0
  56. hpcflow/sdk/core/json_like.py +540 -272
  57. hpcflow/sdk/core/loop.py +751 -347
  58. hpcflow/sdk/core/loop_cache.py +164 -47
  59. hpcflow/sdk/core/object_list.py +370 -207
  60. hpcflow/sdk/core/parameters.py +1100 -627
  61. hpcflow/sdk/core/rule.py +59 -41
  62. hpcflow/sdk/core/run_dir_files.py +21 -37
  63. hpcflow/sdk/core/skip_reason.py +7 -0
  64. hpcflow/sdk/core/task.py +1649 -1339
  65. hpcflow/sdk/core/task_schema.py +308 -196
  66. hpcflow/sdk/core/test_utils.py +191 -114
  67. hpcflow/sdk/core/types.py +440 -0
  68. hpcflow/sdk/core/utils.py +485 -309
  69. hpcflow/sdk/core/validation.py +82 -9
  70. hpcflow/sdk/core/workflow.py +2544 -1178
  71. hpcflow/sdk/core/zarr_io.py +98 -137
  72. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  73. hpcflow/sdk/demo/cli.py +53 -33
  74. hpcflow/sdk/helper/cli.py +18 -15
  75. hpcflow/sdk/helper/helper.py +75 -63
  76. hpcflow/sdk/helper/watcher.py +61 -28
  77. hpcflow/sdk/log.py +122 -71
  78. hpcflow/sdk/persistence/__init__.py +8 -31
  79. hpcflow/sdk/persistence/base.py +1360 -606
  80. hpcflow/sdk/persistence/defaults.py +6 -0
  81. hpcflow/sdk/persistence/discovery.py +38 -0
  82. hpcflow/sdk/persistence/json.py +568 -188
  83. hpcflow/sdk/persistence/pending.py +382 -179
  84. hpcflow/sdk/persistence/store_resource.py +39 -23
  85. hpcflow/sdk/persistence/types.py +318 -0
  86. hpcflow/sdk/persistence/utils.py +14 -11
  87. hpcflow/sdk/persistence/zarr.py +1337 -433
  88. hpcflow/sdk/runtime.py +44 -41
  89. hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
  90. hpcflow/sdk/submission/jobscript.py +1651 -692
  91. hpcflow/sdk/submission/schedulers/__init__.py +167 -39
  92. hpcflow/sdk/submission/schedulers/direct.py +121 -81
  93. hpcflow/sdk/submission/schedulers/sge.py +170 -129
  94. hpcflow/sdk/submission/schedulers/slurm.py +291 -268
  95. hpcflow/sdk/submission/schedulers/utils.py +12 -2
  96. hpcflow/sdk/submission/shells/__init__.py +14 -15
  97. hpcflow/sdk/submission/shells/base.py +150 -29
  98. hpcflow/sdk/submission/shells/bash.py +283 -173
  99. hpcflow/sdk/submission/shells/os_version.py +31 -30
  100. hpcflow/sdk/submission/shells/powershell.py +228 -170
  101. hpcflow/sdk/submission/submission.py +1014 -335
  102. hpcflow/sdk/submission/types.py +140 -0
  103. hpcflow/sdk/typing.py +182 -12
  104. hpcflow/sdk/utils/arrays.py +71 -0
  105. hpcflow/sdk/utils/deferred_file.py +55 -0
  106. hpcflow/sdk/utils/hashing.py +16 -0
  107. hpcflow/sdk/utils/patches.py +12 -0
  108. hpcflow/sdk/utils/strings.py +33 -0
  109. hpcflow/tests/api/test_api.py +32 -0
  110. hpcflow/tests/conftest.py +27 -6
  111. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  112. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  113. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  114. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
  115. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  116. hpcflow/tests/scripts/test_main_scripts.py +866 -85
  117. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  118. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  119. hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
  120. hpcflow/tests/unit/test_action.py +262 -75
  121. hpcflow/tests/unit/test_action_rule.py +9 -4
  122. hpcflow/tests/unit/test_app.py +33 -6
  123. hpcflow/tests/unit/test_cache.py +46 -0
  124. hpcflow/tests/unit/test_cli.py +134 -1
  125. hpcflow/tests/unit/test_command.py +71 -54
  126. hpcflow/tests/unit/test_config.py +142 -16
  127. hpcflow/tests/unit/test_config_file.py +21 -18
  128. hpcflow/tests/unit/test_element.py +58 -62
  129. hpcflow/tests/unit/test_element_iteration.py +50 -1
  130. hpcflow/tests/unit/test_element_set.py +29 -19
  131. hpcflow/tests/unit/test_group.py +4 -2
  132. hpcflow/tests/unit/test_input_source.py +116 -93
  133. hpcflow/tests/unit/test_input_value.py +29 -24
  134. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  135. hpcflow/tests/unit/test_json_like.py +44 -35
  136. hpcflow/tests/unit/test_loop.py +1396 -84
  137. hpcflow/tests/unit/test_meta_task.py +325 -0
  138. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  139. hpcflow/tests/unit/test_object_list.py +17 -12
  140. hpcflow/tests/unit/test_parameter.py +29 -7
  141. hpcflow/tests/unit/test_persistence.py +237 -42
  142. hpcflow/tests/unit/test_resources.py +20 -18
  143. hpcflow/tests/unit/test_run.py +117 -6
  144. hpcflow/tests/unit/test_run_directories.py +29 -0
  145. hpcflow/tests/unit/test_runtime.py +2 -1
  146. hpcflow/tests/unit/test_schema_input.py +23 -15
  147. hpcflow/tests/unit/test_shell.py +23 -2
  148. hpcflow/tests/unit/test_slurm.py +8 -7
  149. hpcflow/tests/unit/test_submission.py +38 -89
  150. hpcflow/tests/unit/test_task.py +352 -247
  151. hpcflow/tests/unit/test_task_schema.py +33 -20
  152. hpcflow/tests/unit/test_utils.py +9 -11
  153. hpcflow/tests/unit/test_value_sequence.py +15 -12
  154. hpcflow/tests/unit/test_workflow.py +114 -83
  155. hpcflow/tests/unit/test_workflow_template.py +0 -1
  156. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  157. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  158. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  159. hpcflow/tests/unit/utils/test_patches.py +5 -0
  160. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  161. hpcflow/tests/workflows/__init__.py +0 -0
  162. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  163. hpcflow/tests/workflows/test_jobscript.py +334 -1
  164. hpcflow/tests/workflows/test_run_status.py +198 -0
  165. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  166. hpcflow/tests/workflows/test_submission.py +140 -0
  167. hpcflow/tests/workflows/test_workflows.py +160 -15
  168. hpcflow/tests/workflows/test_zip.py +18 -0
  169. hpcflow/viz_demo.ipynb +6587 -3
  170. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
  171. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  172. hpcflow/sdk/core/parallel.py +0 -21
  173. hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
  174. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  175. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  176. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -4,16 +4,19 @@ A collection of submissions to a scheduler, generated from a workflow.
4
4
 
5
5
  from __future__ import annotations
6
6
  from collections import defaultdict
7
-
8
- from datetime import datetime, timedelta, timezone
9
- import enum
10
- import os
7
+ import shutil
11
8
  from pathlib import Path
9
+ import socket
12
10
  from textwrap import indent
13
- from typing import Dict, List, Optional, Tuple
11
+ from typing import Any, Literal, overload, TYPE_CHECKING
12
+ from typing_extensions import override
13
+ import warnings
14
+
14
15
 
15
- from hpcflow.sdk import app
16
- from hpcflow.sdk.core.element import ElementResources
16
+ from hpcflow.sdk.utils.strings import shorten_list_str
17
+ import numpy as np
18
+
19
+ from hpcflow.sdk.typing import hydrate
17
20
  from hpcflow.sdk.core.errors import (
18
21
  JobscriptSubmissionFailure,
19
22
  MissingEnvironmentError,
@@ -21,47 +24,50 @@ from hpcflow.sdk.core.errors import (
21
24
  MissingEnvironmentExecutableInstanceError,
22
25
  MultipleEnvironmentsError,
23
26
  SubmissionFailure,
27
+ OutputFileParserNoOutputError,
24
28
  )
25
29
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
26
30
  from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
31
+ from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
32
+ from hpcflow.sdk.submission.enums import SubmissionStatus
33
+ from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE
27
34
  from hpcflow.sdk.log import TimeIt
35
+ from hpcflow.sdk.utils.strings import shorten_list_str
36
+
37
+ if TYPE_CHECKING:
38
+ from collections.abc import Iterable, Mapping, Sequence
39
+ from datetime import datetime
40
+ from typing import ClassVar, Literal
41
+ from rich.status import Status
42
+ from numpy.typing import NDArray
43
+ from .jobscript import Jobscript
44
+ from .enums import JobscriptElementState
45
+ from .schedulers import Scheduler
46
+ from .shells import Shell
47
+ from .types import SubmissionPart
48
+ from ..core.element import ElementActionRun
49
+ from ..core.environment import Environment
50
+ from ..core.object_list import EnvironmentsList
51
+ from ..core.workflow import Workflow
52
+ from ..core.cache import ObjectCache
53
+
54
+
55
+ # jobscript attributes that are set persistently just after the jobscript has been
56
+ # submitted to the scheduler:
57
+ JOBSCRIPT_SUBMIT_TIME_KEYS = (
58
+ "submit_cmdline",
59
+ "scheduler_job_ID",
60
+ "process_ID",
61
+ "submit_time",
62
+ )
63
+ # submission attributes that are set persistently just after all of a submission's
64
+ # jobscripts have been submitted:
65
+ SUBMISSION_SUBMIT_TIME_KEYS = {
66
+ "submission_parts": dict,
67
+ }
28
68
 
29
69
 
30
- def timedelta_format(td: timedelta) -> str:
31
- """
32
- Convert time delta to string in standard form.
33
- """
34
- days, seconds = td.days, td.seconds
35
- hours = seconds // (60 * 60)
36
- seconds -= hours * (60 * 60)
37
- minutes = seconds // 60
38
- seconds -= minutes * 60
39
- return f"{days}-{hours:02}:{minutes:02}:{seconds:02}"
40
-
41
-
42
- def timedelta_parse(td_str: str) -> timedelta:
43
- """
44
- Parse a string in standard form as a time delta.
45
- """
46
- days, other = td_str.split("-")
47
- days = int(days)
48
- hours, mins, secs = [int(i) for i in other.split(":")]
49
- return timedelta(days=days, hours=hours, minutes=mins, seconds=secs)
50
-
51
-
52
- class SubmissionStatus(enum.Enum):
53
- """
54
- The overall status of a submission.
55
- """
56
-
57
- #: Not yet submitted.
58
- PENDING = 0
59
- #: All jobscripts submitted successfully.
60
- SUBMITTED = 1
61
- #: Some jobscripts submitted successfully.
62
- PARTIALLY_SUBMITTED = 2
63
-
64
-
70
+ @hydrate
65
71
  class Submission(JSONLike):
66
72
  """
67
73
  A collection of jobscripts to be submitted to a scheduler.
@@ -82,7 +88,7 @@ class Submission(JSONLike):
82
88
  The execution environments to use.
83
89
  """
84
90
 
85
- _child_objects = (
91
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
86
92
  ChildObjectSpec(
87
93
  name="jobscripts",
88
94
  class_name="Jobscript",
@@ -95,22 +101,39 @@ class Submission(JSONLike):
95
101
  ),
96
102
  )
97
103
 
104
+ TMP_DIR_NAME = "tmp"
105
+ LOG_DIR_NAME = "app_logs"
106
+ APP_STD_DIR_NAME = "app_std"
107
+ JS_DIR_NAME = "jobscripts"
108
+ JS_STD_DIR_NAME = "js_std"
109
+ JS_RUN_IDS_DIR_NAME = "js_run_ids"
110
+ JS_FUNCS_DIR_NAME = "js_funcs"
111
+ JS_WIN_PIDS_DIR_NAME = "js_pids"
112
+ JS_SCRIPT_INDICES_DIR_NAME = "js_script_indices"
113
+ SCRIPTS_DIR_NAME = "scripts"
114
+ COMMANDS_DIR_NAME = "commands"
115
+ WORKFLOW_APP_ALIAS = "wkflow_app"
116
+
98
117
  def __init__(
99
118
  self,
100
119
  index: int,
101
- jobscripts: List[app.Jobscript],
102
- workflow: Optional[app.Workflow] = None,
103
- submission_parts: Optional[Dict] = None,
104
- JS_parallelism: Optional[bool] = None,
105
- environments: Optional[app.EnvironmentsList] = None,
120
+ jobscripts: list[Jobscript],
121
+ workflow: Workflow | None = None,
122
+ at_submit_metadata: dict[str, Any] | None = None,
123
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
124
+ environments: EnvironmentsList | None = None,
106
125
  ):
107
126
  self._index = index
108
127
  self._jobscripts = jobscripts
109
- self._submission_parts = submission_parts or {}
128
+ self._at_submit_metadata = at_submit_metadata or {
129
+ k: v() for k, v in SUBMISSION_SUBMIT_TIME_KEYS.items()
130
+ }
110
131
  self._JS_parallelism = JS_parallelism
111
- self._environments = environments
132
+ self._environments = environments # assigned by _set_environments
112
133
 
113
- self._submission_parts_lst = None # assigned on first access; datetime objects
134
+ self._submission_parts_lst: list[
135
+ SubmissionPart
136
+ ] | None = None # assigned on first access
114
137
 
115
138
  if workflow:
116
139
  #: The workflow this is part of.
@@ -118,41 +141,61 @@ class Submission(JSONLike):
118
141
 
119
142
  self._set_parent_refs()
120
143
 
121
- for js_idx, js in enumerate(self.jobscripts):
122
- js._index = js_idx
144
+ def _ensure_JS_parallelism_set(self):
145
+ """Ensure that the JS_parallelism attribute is one of `True`, `False`, `'direct'`
146
+ or `'scheduled'`.
147
+
148
+ Notes
149
+ -----
150
+ This method is called after the Submission object is first created in
151
+ `Workflow._add_submission`.
152
+
153
+ """
154
+ # if JS_parallelism explicitly requested but store doesn't support, raise:
155
+ supports_JS_para = self.workflow._store._features.jobscript_parallelism
156
+ if self.JS_parallelism:
157
+ # could be: True | "direct" | "scheduled"
158
+ if not supports_JS_para:
159
+ # if status:
160
+ # status.stop()
161
+ raise ValueError(
162
+ f"Store type {self.workflow._store!r} does not support jobscript "
163
+ f"parallelism."
164
+ )
165
+ elif self.JS_parallelism is None:
166
+ # by default only use JS parallelism for scheduled jobscripts:
167
+ self._JS_parallelism = "scheduled" if supports_JS_para else False
123
168
 
124
169
  @TimeIt.decorator
125
- def _set_environments(self):
126
- filterable = ElementResources.get_env_instance_filterable_attributes()
170
+ def _set_environments(self) -> None:
171
+ filterable = self._app.ElementResources.get_env_instance_filterable_attributes()
127
172
 
128
173
  # map required environments and executable labels to job script indices:
129
- req_envs = defaultdict(lambda: defaultdict(set))
130
- for js_idx, js_i in enumerate(self.jobscripts):
131
- for run in js_i.all_EARs:
132
- env_spec_h = tuple(zip(*run.env_spec.items())) # hashable
133
- for exec_label_j in run.action.get_required_executables():
134
- req_envs[env_spec_h][exec_label_j].add(js_idx)
135
- if env_spec_h not in req_envs:
136
- req_envs[env_spec_h] = {}
174
+ req_envs: dict[
175
+ tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]
176
+ ] = defaultdict(lambda: defaultdict(set))
177
+ with self.workflow.cached_merged_parameters():
178
+ # using the cache (for `run.env_spec_hashable` -> `run.resources`) should
179
+ # significantly speed up this loop, unless a large resources sequence is used:
180
+ for js_idx, all_EARs_i in enumerate(self.all_EARs_by_jobscript):
181
+ for run in all_EARs_i:
182
+ env_spec_h = run.env_spec_hashable
183
+ for exec_label_j in run.action.get_required_executables():
184
+ req_envs[env_spec_h][exec_label_j].add(js_idx)
185
+ # add any environment for which an executable was not required:
186
+ if env_spec_h not in req_envs:
187
+ req_envs[env_spec_h]
137
188
 
138
189
  # check these envs/execs exist in app data:
139
- envs = []
190
+ envs: list[Environment] = []
140
191
  for env_spec_h, exec_js in req_envs.items():
141
- env_spec = dict(zip(*env_spec_h))
142
- non_name_spec = {k: v for k, v in env_spec.items() if k != "name"}
143
- spec_str = f" with specifiers {non_name_spec!r}" if non_name_spec else ""
144
- env_ref = f"{env_spec['name']!r}{spec_str}"
192
+ env_spec = self._app.Action.env_spec_from_hashable(env_spec_h)
145
193
  try:
146
- env_i = self.app.envs.get(**env_spec)
194
+ env_i = self._app.envs.get(**env_spec)
147
195
  except ObjectListMultipleMatchError:
148
- raise MultipleEnvironmentsError(
149
- f"Multiple environments {env_ref} are defined on this machine."
150
- )
196
+ raise MultipleEnvironmentsError(env_spec)
151
197
  except ValueError:
152
- raise MissingEnvironmentError(
153
- f"The environment {env_ref} is not defined on this machine, so the "
154
- f"submission cannot be created."
155
- ) from None
198
+ raise MissingEnvironmentError(env_spec) from None
156
199
  else:
157
200
  if env_i not in envs:
158
201
  envs.append(env_i)
@@ -162,34 +205,28 @@ class Submission(JSONLike):
162
205
  exec_i = env_i.executables.get(exec_i_lab)
163
206
  except ValueError:
164
207
  raise MissingEnvironmentExecutableError(
165
- f"The environment {env_ref} as defined on this machine has no "
166
- f"executable labelled {exec_i_lab!r}, which is required for this "
167
- f"submission, so the submission cannot be created."
208
+ env_spec, exec_i_lab
168
209
  ) from None
169
210
 
170
211
  # check matching executable instances exist:
171
212
  for js_idx_j in js_idx_set:
172
- js_j = self.jobscripts[js_idx_j]
173
- filter_exec = {j: getattr(js_j.resources, j) for j in filterable}
174
- exec_instances = exec_i.filter_instances(**filter_exec)
175
- if not exec_instances:
213
+ js_res = self.jobscripts[js_idx_j].resources
214
+ filter_exec = {j: getattr(js_res, j) for j in filterable}
215
+ if not exec_i.filter_instances(**filter_exec):
176
216
  raise MissingEnvironmentExecutableInstanceError(
177
- f"No matching executable instances found for executable "
178
- f"{exec_i_lab!r} of environment {env_ref} for jobscript "
179
- f"index {js_idx_j!r} with requested resources "
180
- f"{filter_exec!r}."
217
+ env_spec, exec_i_lab, js_idx_j, filter_exec
181
218
  )
182
219
 
183
220
  # save env definitions to the environments attribute:
184
- self._environments = self.app.EnvironmentsList(envs)
221
+ self._environments = self._app.EnvironmentsList(envs)
185
222
 
186
- def to_dict(self):
187
- dct = super().to_dict()
223
+ @override
224
+ def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
225
+ dct = super()._postprocess_to_dict(d)
188
226
  del dct["_workflow"]
189
227
  del dct["_index"]
190
228
  del dct["_submission_parts_lst"]
191
- dct = {k.lstrip("_"): v for k, v in dct.items()}
192
- return dct
229
+ return {k.lstrip("_"): v for k, v in dct.items()}
193
230
 
194
231
  @property
195
232
  def index(self) -> int:
@@ -199,26 +236,29 @@ class Submission(JSONLike):
199
236
  return self._index
200
237
 
201
238
  @property
202
- def environments(self) -> app.EnvironmentsList:
239
+ def environments(self) -> EnvironmentsList:
203
240
  """
204
241
  The execution environments to use.
205
242
  """
243
+ assert self._environments
206
244
  return self._environments
207
245
 
208
246
  @property
209
- def submission_parts(self) -> List[Dict]:
210
- """
211
- Description of the parts of this submission.
212
- """
213
- if not self._submission_parts:
214
- return []
247
+ def at_submit_metadata(self) -> dict[str, dict[str, Any]]:
248
+ return self.workflow._store.get_submission_at_submit_metadata(
249
+ sub_idx=self.index, metadata_attr=self._at_submit_metadata
250
+ )
251
+
252
+ @property
253
+ def _submission_parts(self) -> dict[str, list[int]]:
254
+ return self.at_submit_metadata["submission_parts"] or {}
215
255
 
256
+ @property
257
+ def submission_parts(self) -> list[SubmissionPart]:
216
258
  if self._submission_parts_lst is None:
217
259
  self._submission_parts_lst = [
218
260
  {
219
- "submit_time": datetime.strptime(dt, self.workflow.ts_fmt)
220
- .replace(tzinfo=timezone.utc)
221
- .astimezone(),
261
+ "submit_time": parse_timestamp(dt, self.workflow.ts_fmt),
222
262
  "jobscripts": js_idx,
223
263
  }
224
264
  for dt, js_idx in self._submission_parts.items()
@@ -226,116 +266,89 @@ class Submission(JSONLike):
226
266
  return self._submission_parts_lst
227
267
 
228
268
  @TimeIt.decorator
229
- def get_start_time(self, submit_time: str) -> Union[datetime, None]:
269
+ def get_start_time(self, submit_time: str) -> datetime | None:
230
270
  """Get the start time of a given submission part."""
231
- js_idx = self._submission_parts[submit_time]
232
- all_part_starts = []
233
- for i in js_idx:
234
- start_time = self.jobscripts[i].start_time
235
- if start_time:
236
- all_part_starts.append(start_time)
237
- if all_part_starts:
238
- return min(all_part_starts)
239
- else:
240
- return None
271
+ times = (
272
+ self.jobscripts[i].start_time for i in self._submission_parts[submit_time]
273
+ )
274
+ return min((t for t in times if t is not None), default=None)
241
275
 
242
276
  @TimeIt.decorator
243
- def get_end_time(self, submit_time: str) -> Union[datetime, None]:
277
+ def get_end_time(self, submit_time: str) -> datetime | None:
244
278
  """Get the end time of a given submission part."""
245
- js_idx = self._submission_parts[submit_time]
246
- all_part_ends = []
247
- for i in js_idx:
248
- end_time = self.jobscripts[i].end_time
249
- if end_time:
250
- all_part_ends.append(end_time)
251
- if all_part_ends:
252
- return max(all_part_ends)
253
- else:
254
- return None
279
+ times = (self.jobscripts[i].end_time for i in self._submission_parts[submit_time])
280
+ return max((t for t in times if t is not None), default=None)
255
281
 
256
282
  @property
257
283
  @TimeIt.decorator
258
- def start_time(self):
284
+ def start_time(self) -> datetime | None:
259
285
  """Get the first non-None start time over all submission parts."""
260
- all_start_times = []
261
- for submit_time in self._submission_parts:
262
- start_i = self.get_start_time(submit_time)
263
- if start_i:
264
- all_start_times.append(start_i)
265
- if all_start_times:
266
- return max(all_start_times)
267
- else:
268
- return None
286
+ times = (
287
+ self.get_start_time(submit_time) for submit_time in self._submission_parts
288
+ )
289
+ return min((t for t in times if t is not None), default=None)
269
290
 
270
291
  @property
271
292
  @TimeIt.decorator
272
- def end_time(self):
293
+ def end_time(self) -> datetime | None:
273
294
  """Get the final non-None end time over all submission parts."""
274
- all_end_times = []
275
- for submit_time in self._submission_parts:
276
- end_i = self.get_end_time(submit_time)
277
- if end_i:
278
- all_end_times.append(end_i)
279
- if all_end_times:
280
- return max(all_end_times)
281
- else:
282
- return None
295
+ times = (self.get_end_time(submit_time) for submit_time in self._submission_parts)
296
+ return max((t for t in times if t is not None), default=None)
283
297
 
284
298
  @property
285
- def jobscripts(self) -> List:
299
+ def jobscripts(self) -> list[Jobscript]:
286
300
  """
287
301
  The jobscripts in this submission.
288
302
  """
289
303
  return self._jobscripts
290
304
 
291
305
  @property
292
- def JS_parallelism(self):
306
+ def JS_parallelism(self) -> bool | Literal["direct", "scheduled"] | None:
293
307
  """
294
308
  Whether to exploit jobscript parallelism.
295
309
  """
296
310
  return self._JS_parallelism
297
311
 
298
312
  @property
299
- def workflow(self) -> List:
313
+ def workflow(self) -> Workflow:
300
314
  """
301
315
  The workflow this is part of.
302
316
  """
303
317
  return self._workflow
304
318
 
305
319
  @workflow.setter
306
- def workflow(self, wk):
320
+ def workflow(self, wk: Workflow):
307
321
  self._workflow = wk
308
322
 
309
323
  @property
310
- def jobscript_indices(self) -> Tuple[int]:
324
+ def jobscript_indices(self) -> tuple[int, ...]:
311
325
  """All associated jobscript indices."""
312
- return tuple(i.index for i in self.jobscripts)
326
+ return tuple(js.index for js in self.jobscripts)
313
327
 
314
328
  @property
315
- def submitted_jobscripts(self) -> Tuple[int]:
329
+ def submitted_jobscripts(self) -> tuple[int, ...]:
316
330
  """Jobscript indices that have been successfully submitted."""
317
- return tuple(j for i in self.submission_parts for j in i["jobscripts"])
331
+ return tuple(j for sp in self.submission_parts for j in sp["jobscripts"])
318
332
 
319
333
  @property
320
- def outstanding_jobscripts(self) -> Tuple[int]:
334
+ def outstanding_jobscripts(self) -> tuple[int, ...]:
321
335
  """Jobscript indices that have not yet been successfully submitted."""
322
- return tuple(set(self.jobscript_indices) - set(self.submitted_jobscripts))
336
+ return tuple(set(self.jobscript_indices).difference(self.submitted_jobscripts))
323
337
 
324
338
  @property
325
- def status(self):
339
+ def status(self) -> SubmissionStatus:
326
340
  """
327
341
  The status of this submission.
328
342
  """
329
343
  if not self.submission_parts:
330
344
  return SubmissionStatus.PENDING
345
+ elif set(self.submitted_jobscripts) == set(self.jobscript_indices):
346
+ return SubmissionStatus.SUBMITTED
331
347
  else:
332
- if set(self.submitted_jobscripts) == set(self.jobscript_indices):
333
- return SubmissionStatus.SUBMITTED
334
- else:
335
- return SubmissionStatus.PARTIALLY_SUBMITTED
348
+ return SubmissionStatus.PARTIALLY_SUBMITTED
336
349
 
337
350
  @property
338
- def needs_submit(self):
351
+ def needs_submit(self) -> bool:
339
352
  """
340
353
  Whether this submission needs a submit to be done.
341
354
  """
@@ -345,131 +358,695 @@ class Submission(JSONLike):
345
358
  )
346
359
 
347
360
  @property
348
- def path(self):
361
+ def needs_app_log_dir(self) -> bool:
349
362
  """
350
- The path to files associated with this submission.
363
+ Whether this submision requires an app log directory.
351
364
  """
352
- return self.workflow.submissions_path / str(self.index)
365
+ for js in self.jobscripts:
366
+ if js.resources.write_app_logs:
367
+ return True
368
+ return False
353
369
 
354
370
  @property
355
- def all_EAR_IDs(self):
371
+ def needs_win_pids_dir(self) -> bool:
356
372
  """
357
- The IDs of all EARs in this submission.
373
+ Whether this submision requires a directory for process ID files (Windows only).
374
+ """
375
+ for js in self.jobscripts:
376
+ if js.os_name == "nt":
377
+ return True
378
+ return False
379
+
380
+ @property
381
+ def needs_script_indices_dir(self) -> bool:
382
+ """
383
+ Whether this submision requires a directory for combined-script script ID files.
384
+ """
385
+ for js in self.jobscripts:
386
+ if js.resources.combine_scripts:
387
+ return True
388
+ return False
389
+
390
+ @classmethod
391
+ def get_path(cls, submissions_path: Path, sub_idx: int) -> Path:
392
+ """
393
+ The directory path to files associated with the specified submission.
394
+ """
395
+ return submissions_path / str(sub_idx)
396
+
397
+ @classmethod
398
+ def get_tmp_path(cls, submissions_path: Path, sub_idx: int) -> Path:
399
+ """
400
+ The path to the temporary files directory, for the specified submission.
401
+ """
402
+ return cls.get_path(submissions_path, sub_idx) / cls.TMP_DIR_NAME
403
+
404
+ @classmethod
405
+ def get_app_log_path(cls, submissions_path: Path, sub_idx: int) -> Path:
406
+ """
407
+ The path to the app log directory for this submission, for the specified
408
+ submission.
409
+ """
410
+ return cls.get_path(submissions_path, sub_idx) / cls.LOG_DIR_NAME
411
+
412
+ @staticmethod
413
+ def get_app_log_file_name(run_ID: int | str) -> str:
414
+ """
415
+ The app log file name.
416
+ """
417
+ # TODO: consider combine_app_logs argument
418
+ return f"r_{run_ID}.log"
419
+
420
+ @classmethod
421
+ def get_app_log_file_path(cls, submissions_path: Path, sub_idx: int, run_ID: int):
422
+ """
423
+ The file path to the app log, for the specified submission.
424
+ """
425
+ return (
426
+ cls.get_path(submissions_path, sub_idx)
427
+ / cls.LOG_DIR_NAME
428
+ / cls.get_app_log_file_name(run_ID)
429
+ )
430
+
431
+ @classmethod
432
+ def get_app_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
433
+ """
434
+ The path to the app standard output and error stream files directory, for the
435
+ specified submission.
436
+ """
437
+ return cls.get_path(submissions_path, sub_idx) / cls.APP_STD_DIR_NAME
438
+
439
+ @classmethod
440
+ def get_js_path(cls, submissions_path: Path, sub_idx: int) -> Path:
441
+ """
442
+ The path to the jobscript files directory, for the specified submission.
443
+ """
444
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_DIR_NAME
445
+
446
+ @classmethod
447
+ def get_js_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
448
+ """
449
+ The path to the jobscript standard output and error files directory, for the
450
+ specified submission.
451
+ """
452
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_STD_DIR_NAME
453
+
454
+ @classmethod
455
+ def get_js_run_ids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
456
+ """
457
+ The path to the directory containing jobscript run IDs, for the specified
458
+ submission.
459
+ """
460
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_RUN_IDS_DIR_NAME
461
+
462
+ @classmethod
463
+ def get_js_funcs_path(cls, submissions_path: Path, sub_idx: int) -> Path:
464
+ """
465
+ The path to the directory containing the shell functions that are invoked within
466
+ jobscripts and commmand files, for the specified submission.
467
+ """
468
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_FUNCS_DIR_NAME
469
+
470
+ @classmethod
471
+ def get_js_win_pids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
472
+ """
473
+ The path to the directory containing process ID files (Windows only), for the
474
+ specified submission.
475
+ """
476
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_WIN_PIDS_DIR_NAME
477
+
478
+ @classmethod
479
+ def get_js_script_indices_path(cls, submissions_path: Path, sub_idx: int) -> Path:
480
+ """
481
+ The path to the directory containing script indices for combined-script jobscripts
482
+ only, for the specified submission.
483
+ """
484
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_SCRIPT_INDICES_DIR_NAME
485
+
486
+ @classmethod
487
+ def get_scripts_path(cls, submissions_path: Path, sub_idx: int) -> Path:
488
+ """
489
+ The path to the directory containing action scripts, for the specified submission.
490
+ """
491
+ return cls.get_path(submissions_path, sub_idx) / cls.SCRIPTS_DIR_NAME
492
+
493
+ @classmethod
494
+ def get_commands_path(cls, submissions_path: Path, sub_idx: int) -> Path:
495
+ """
496
+ The path to the directory containing command files, for the specified submission.
497
+ """
498
+ return cls.get_path(submissions_path, sub_idx) / cls.COMMANDS_DIR_NAME
499
+
500
+ @property
501
+ def path(self) -> Path:
502
+ """
503
+ The path to the directory containing action scripts.
504
+ """
505
+ return self.get_path(self.workflow.submissions_path, self.index)
506
+
507
+ @property
508
+ def tmp_path(self) -> Path:
509
+ """
510
+ The path to the temporary files directory for this submission.
511
+ """
512
+ return self.get_tmp_path(self.workflow.submissions_path, self.index)
513
+
514
+ @property
515
+ def app_log_path(self) -> Path:
516
+ """
517
+ The path to the app log directory for this submission for this submission.
518
+ """
519
+ return self.get_app_log_path(self.workflow.submissions_path, self.index)
520
+
521
+ @property
522
+ def app_std_path(self) -> Path:
523
+ """
524
+ The path to the app standard output and error stream files directory, for the
525
+ this submission.
526
+ """
527
+ return self.get_app_std_path(self.workflow.submissions_path, self.index)
528
+
529
+ @property
530
+ def js_path(self) -> Path:
531
+ """
532
+ The path to the jobscript files directory, for this submission.
533
+ """
534
+ return self.get_js_path(self.workflow.submissions_path, self.index)
535
+
536
+ @property
537
+ def js_std_path(self) -> Path:
538
+ """
539
+ The path to the jobscript standard output and error files directory, for this
540
+ submission.
541
+ """
542
+ return self.get_js_std_path(self.workflow.submissions_path, self.index)
543
+
544
+ @property
545
+ def js_run_ids_path(self) -> Path:
546
+ """
547
+ The path to the directory containing jobscript run IDs, for this submission.
548
+ """
549
+ return self.get_js_run_ids_path(self.workflow.submissions_path, self.index)
550
+
551
+ @property
552
+ def js_funcs_path(self) -> Path:
358
553
  """
359
- return [i for js in self.jobscripts for i in js.all_EAR_IDs]
554
+ The path to the directory containing the shell functions that are invoked within
555
+ jobscripts and commmand files, for this submission.
556
+ """
557
+ return self.get_js_funcs_path(self.workflow.submissions_path, self.index)
558
+
559
+ @property
560
+ def js_win_pids_path(self) -> Path:
561
+ """
562
+ The path to the directory containing process ID files (Windows only), for this
563
+ submission.
564
+ """
565
+ return self.get_js_win_pids_path(self.workflow.submissions_path, self.index)
566
+
567
+ @property
568
+ def js_script_indices_path(self) -> Path:
569
+ """
570
+ The path to the directory containing script indices for combined-script jobscripts
571
+ only, for this submission.
572
+ """
573
+ return self.get_js_script_indices_path(self.workflow.submissions_path, self.index)
574
+
575
+ @property
576
+ def scripts_path(self) -> Path:
577
+ """
578
+ The path to the directory containing action scripts, for this submission.
579
+ """
580
+ return self.get_scripts_path(self.workflow.submissions_path, self.index)
360
581
 
361
582
  @property
362
- def all_EARs(self):
583
+ def commands_path(self) -> Path:
363
584
  """
364
- All EARs in this this submission.
585
+ The path to the directory containing command files, for this submission.
365
586
  """
366
- return [i for js in self.jobscripts for i in js.all_EARs]
587
+ return self.get_commands_path(self.workflow.submissions_path, self.index)
367
588
 
368
589
  @property
369
590
  @TimeIt.decorator
370
- def EARs_by_elements(self):
591
+ def all_EAR_IDs(self) -> Iterable[int]:
371
592
  """
372
- All EARs in this submission, grouped by element.
593
+ The IDs of all EARs in this submission.
373
594
  """
374
- task_elem_EARs = defaultdict(lambda: defaultdict(list))
375
- for i in self.all_EARs:
376
- task_elem_EARs[i.task.index][i.element.index].append(i)
377
- return task_elem_EARs
595
+ return (i for js in self.jobscripts for i in js.all_EAR_IDs)
378
596
 
379
597
  @property
380
- def abort_EARs_file_name(self):
598
+ @TimeIt.decorator
599
+ def all_EARs(self) -> Iterable[ElementActionRun]:
381
600
  """
382
- The name of a file describing what EARs have aborted.
601
+ All EARs in this submission.
383
602
  """
384
- return f"abort_EARs.txt"
603
+ return (ear for js in self.jobscripts for ear in js.all_EARs)
604
+
605
+ @property
606
+ @TimeIt.decorator
607
+ def all_EARs_IDs_by_jobscript(self) -> list[np.ndarray]:
608
+ return [i.all_EAR_IDs for i in self.jobscripts]
385
609
 
386
610
  @property
387
- def abort_EARs_file_path(self):
611
+ @TimeIt.decorator
612
+ def all_EARs_by_jobscript(self) -> list[list[ElementActionRun]]:
613
+ ids = [i.all_EAR_IDs for i in self.jobscripts]
614
+ all_EARs = {i.id_: i for i in self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)}
615
+ return [[all_EARs[i] for i in js_ids] for js_ids in ids]
616
+
617
+ @property
618
+ @TimeIt.decorator
619
+ def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
388
620
  """
389
- The path to the file describing what EARs have aborted in this submission.
621
+ All EARs in this submission, grouped by element.
390
622
  """
391
- return self.path / self.abort_EARs_file_name
623
+ task_elem_EARs: dict[int, dict[int, list[ElementActionRun]]] = defaultdict(
624
+ lambda: defaultdict(list)
625
+ )
626
+ for ear in self.all_EARs:
627
+ task_elem_EARs[ear.task.index][ear.element.index].append(ear)
628
+ return task_elem_EARs
629
+
630
+ @property
631
+ def is_scheduled(self) -> tuple[bool, ...]:
632
+ """Return whether each jobscript of this submission uses a scheduler or not."""
633
+ return tuple(i.is_scheduled for i in self.jobscripts)
634
+
635
+ @overload
636
+ def get_active_jobscripts(
637
+ self, as_json: Literal[False] = False
638
+ ) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState]]]:
639
+ ...
640
+
641
+ @overload
642
+ def get_active_jobscripts(
643
+ self, as_json: Literal[True]
644
+ ) -> Mapping[int, Mapping[int, Mapping[int, str]]]:
645
+ ...
392
646
 
393
647
  @TimeIt.decorator
394
648
  def get_active_jobscripts(
395
- self, as_json: bool = False
396
- ) -> List[Tuple[int, Dict[int, JobscriptElementState]]]:
649
+ self,
650
+ as_json: Literal[True] | Literal[False] = False, # TODO: why can't we use bool?
651
+ ) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState | str]]]:
397
652
  """Get jobscripts that are active on this machine, and their active states."""
398
- # this returns: {JS_IDX: {JS_ELEMENT_IDX: STATE}}
653
+ # this returns: {JS_IDX: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}}
399
654
  # TODO: query the scheduler once for all jobscripts?
400
- out = {}
401
- for js in self.jobscripts:
402
- active_states = js.get_active_states(as_json=as_json)
403
- if active_states:
404
- out[js.index] = active_states
405
- return out
655
+ return {
656
+ js.index: act_states
657
+ for js in self.jobscripts
658
+ if (act_states := js.get_active_states(as_json=as_json))
659
+ }
406
660
 
407
- def _write_abort_EARs_file(self):
408
- with self.abort_EARs_file_path.open(mode="wt", newline="\n") as fp:
409
- # write a single line for each EAR currently in the workflow:
410
- fp.write("\n".join("0" for _ in range(self.workflow.num_EARs)) + "\n")
411
-
412
- def _set_run_abort(self, run_ID: int):
413
- """Modify the abort runs file to indicate a specified run should be aborted."""
414
- with self.abort_EARs_file_path.open(mode="rt", newline="\n") as fp:
415
- lines = fp.read().splitlines()
416
- lines[run_ID] = "1"
417
-
418
- # write a new temporary run-abort file:
419
- tmp_suffix = self.abort_EARs_file_path.suffix + ".tmp"
420
- tmp = self.abort_EARs_file_path.with_suffix(tmp_suffix)
421
- self.app.submission_logger.debug(f"Creating temporary run abort file: {tmp!r}.")
422
- with tmp.open(mode="wt", newline="\n") as fp:
423
- fp.write("\n".join(i for i in lines) + "\n")
424
-
425
- # atomic rename, overwriting original:
426
- self.app.submission_logger.debug(
427
- "Replacing original run abort file with new temporary file."
661
+ @TimeIt.decorator
662
+ def _write_scripts(
663
+ self, cache: ObjectCache, status: Status | None = None
664
+ ) -> tuple[dict[int, int | None], NDArray, dict[int, list[Path]]]:
665
+ """Write to disk all action scripts associated with this submission."""
666
+ # TODO: rename this method
667
+
668
+ # TODO: need to check is_snippet_script is exclusive? i.e. only `script` and no
669
+ # `commands` in the action?
670
+ # TODO: scripts must have the same exe and the same environment as well?
671
+ # TODO: env_spec should be included in jobscript hash if combine_scripts=True ?
672
+
673
+ actions_by_schema: dict[str, dict[int, set]] = defaultdict(
674
+ lambda: defaultdict(set)
428
675
  )
429
- os.replace(src=tmp, dst=self.abort_EARs_file_path)
676
+ combined_env_specs = {}
677
+
678
+ # task insert IDs and action indices for each combined_scripts jobscript:
679
+ combined_actions = {}
680
+
681
+ cmd_hashes = defaultdict(set)
682
+ num_runs_tot = sum(len(js.all_EAR_IDs) for js in self.jobscripts)
683
+ run_indices = np.ones((num_runs_tot, 9), dtype=int) * -1
684
+ run_inp_files = defaultdict(
685
+ list
686
+ ) # keys are `run_idx`, values are Paths to copy to run dir
687
+ run_cmd_file_names: dict[int, int | None] = {} # None if no commands to write
688
+ run_idx = 0
689
+
690
+ if status:
691
+ status.update(f"Adding new submission: processing run 1/{num_runs_tot}.")
692
+
693
+ all_runs = cache.runs
694
+ assert all_runs is not None
695
+ runs_ids_by_js = self.all_EARs_IDs_by_jobscript
696
+
697
+ with self.workflow.cached_merged_parameters():
698
+ for js in self.jobscripts:
699
+ js_idx = js.index
700
+ js_run_0 = all_runs[runs_ids_by_js[js.index][0]]
701
+
702
+ if js.resources.combine_scripts:
703
+ # this will be one or more snippet scripts that needs to be combined into
704
+ # one script for the whole jobscript
705
+
706
+ # need to write one script + one commands file for the whole jobscript
707
+
708
+ # env_spec will be the same for all runs of this jobscript:
709
+ combined_env_specs[js_idx] = js_run_0.env_spec
710
+ combined_actions[js_idx] = [
711
+ [j[0:2] for j in i.task_actions] for i in js.blocks
712
+ ]
713
+
714
+ for idx, run_id in enumerate(js.all_EAR_IDs):
715
+ run = all_runs[run_id]
716
+
717
+ run_indices[run_idx] = [
718
+ run.task.insert_ID,
719
+ run.element.id_,
720
+ run.element_iteration.id_,
721
+ run.id_,
722
+ run.element.index,
723
+ run.element_iteration.index,
724
+ run.element_action.action_idx,
725
+ run.index,
726
+ int(run.action.requires_dir),
727
+ ]
728
+ run_idx += 1
729
+
730
+ if status and run_idx % 10 == 0:
731
+ status.update(
732
+ f"Adding new submission: processing run {run_idx}/{num_runs_tot}."
733
+ )
734
+
735
+ if js.resources.combine_scripts:
736
+ if idx == 0:
737
+ # the commands file for a combined jobscript won't have
738
+ # any parameter data in the command line, so should raise
739
+ # if something is found to be unset:
740
+ run.try_write_commands(
741
+ environments=self.environments,
742
+ jobscript=js,
743
+ raise_on_unset=True,
744
+ )
745
+ run_cmd_file_names[run.id_] = None
746
+
747
+ else:
748
+ if run.is_snippet_script:
749
+ actions_by_schema[run.action.task_schema.name][
750
+ run.element_action.action_idx
751
+ ].add(run.env_spec_hashable)
752
+
753
+ if run.action.commands:
754
+ hash_i = run.get_commands_file_hash()
755
+ # TODO: could further reduce number of files in the case the data
756
+ # indices hash is the same: if commands objects are the same and
757
+ # environment objects are the same, then the files will be the
758
+ # same, even if runs come from different task schemas/actions...
759
+ if hash_i not in cmd_hashes:
760
+ try:
761
+ run.try_write_commands(
762
+ environments=self.environments,
763
+ jobscript=js,
764
+ )
765
+ except OutputFileParserNoOutputError:
766
+ # no commands to write, might be used just for saving
767
+ # files
768
+ run_cmd_file_names[run.id_] = None
769
+ cmd_hashes[hash_i].add(run.id_)
770
+ else:
771
+ run_cmd_file_names[run.id_] = None
772
+
773
+ if run.action.requires_dir:
774
+ # TODO: what is type of `path`?
775
+ for name, path in run.get("input_files", {}).items():
776
+ if path:
777
+ run_inp_files[run_idx].append(path)
778
+
779
+ for run_ids in cmd_hashes.values():
780
+ run_ids_srt = sorted(run_ids)
781
+ root_id = run_ids_srt[0] # used for command file name for this group
782
+ # TODO: could store multiple IDs to reduce number of files created
783
+ for run_id_i in run_ids_srt:
784
+ if run_id_i not in run_cmd_file_names:
785
+ run_cmd_file_names[run_id_i] = root_id
786
+
787
+ if status:
788
+ status.update("Adding new submission: writing scripts...")
789
+
790
+ seen: dict[int, Path] = {}
791
+ combined_script_data: dict[
792
+ int, dict[int, list[tuple[str, Path, bool]]]
793
+ ] = defaultdict(lambda: defaultdict(list))
794
+ for task in self.workflow.tasks:
795
+ for schema in task.template.schemas:
796
+ if schema.name in actions_by_schema:
797
+ for idx, action in enumerate(schema.actions):
798
+
799
+ if not action.script:
800
+ continue
801
+
802
+ for env_spec_h in actions_by_schema[schema.name][idx]:
803
+
804
+ env_spec = action.env_spec_from_hashable(env_spec_h)
805
+ name, snip_path, specs = action.get_script_artifact_name(
806
+ env_spec=env_spec,
807
+ act_idx=idx,
808
+ ret_specifiers=True,
809
+ )
810
+ script_hash = action.get_script_determinant_hash(specs)
811
+ script_path = self.scripts_path / name
812
+ prev_path = seen.get(script_hash)
813
+ if script_path == prev_path:
814
+ continue
815
+
816
+ elif prev_path:
817
+ # try to make a symbolic link to the file previously
818
+ # created:
819
+ try:
820
+ script_path.symlink_to(prev_path.name)
821
+ except OSError:
822
+ # windows requires admin permission, copy instead:
823
+ shutil.copy(prev_path, script_path)
824
+ else:
825
+ # write script to disk:
826
+ source_str = action.compose_source(snip_path)
827
+ if source_str:
828
+ with script_path.open("wt", newline="\n") as fp:
829
+ fp.write(source_str)
830
+ seen[script_hash] = script_path
831
+
832
+ # combined script stuff
833
+ for js_idx, act_IDs in combined_actions.items():
834
+ for block_idx, act_IDs_i in enumerate(act_IDs):
835
+ for task_iID, act_idx in act_IDs_i:
836
+ task = self.workflow.tasks.get(insert_ID=task_iID)
837
+ schema = task.template.schemas[0] # TODO: multiple schemas
838
+ action = schema.actions[act_idx]
839
+ func_name, snip_path = action.get_script_artifact_name(
840
+ env_spec=combined_env_specs[js_idx],
841
+ act_idx=act_idx,
842
+ ret_specifiers=False,
843
+ include_suffix=False,
844
+ specs_suffix_delim="_", # can't use "." in function name
845
+ )
846
+ combined_script_data[js_idx][block_idx].append(
847
+ (func_name, snip_path, action.requires_dir)
848
+ )
849
+
850
+ for js_idx, action_scripts in combined_script_data.items():
851
+ js = self.jobscripts[js_idx]
852
+
853
+ script_str, script_indices, num_elems, num_acts = js.compose_combined_script(
854
+ [i for _, i in sorted(action_scripts.items())]
855
+ )
856
+ js.write_script_indices_file(script_indices, num_elems, num_acts)
857
+
858
+ script_path = self.scripts_path / f"js_{js_idx}.py" # TODO: refactor name
859
+ with script_path.open("wt", newline="\n") as fp:
860
+ fp.write(script_str)
861
+
862
+ return run_cmd_file_names, run_indices, run_inp_files
863
+
864
+ @TimeIt.decorator
865
+ def _calculate_run_dir_indices(
866
+ self,
867
+ run_indices: np.ndarray,
868
+ cache: ObjectCache,
869
+ ) -> tuple[np.ndarray, np.ndarray]:
870
+
871
+ assert cache.elements is not None
872
+ assert cache.iterations is not None
873
+ # get the multiplicities of all tasks, elements, iterations, and runs:
874
+ wk_num_tasks = self.workflow.num_tasks
875
+ task_num_elems = {}
876
+ elem_num_iters = {}
877
+ iter_num_acts = {}
878
+ iter_acts_num_runs = {}
879
+ for task in self.workflow.tasks:
880
+ elem_IDs = task.element_IDs
881
+ task_num_elems[task.insert_ID] = len(elem_IDs)
882
+ for elem_ID in elem_IDs:
883
+ iter_IDs = cache.elements[elem_ID].iteration_IDs
884
+ elem_num_iters[elem_ID] = len(iter_IDs)
885
+ for iter_ID in iter_IDs:
886
+ run_IDs = cache.iterations[iter_ID].EAR_IDs
887
+ if run_IDs: # the schema might have no actions
888
+ iter_num_acts[iter_ID] = len(run_IDs)
889
+ for act_idx, act_run_IDs in run_IDs.items():
890
+ iter_acts_num_runs[(iter_ID, act_idx)] = len(act_run_IDs)
891
+ else:
892
+ iter_num_acts[iter_ID] = 0
893
+
894
+ max_u8 = np.iinfo(np.uint8).max
895
+ max_u32 = np.iinfo(np.uint32).max
896
+ MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
897
+ MAX_ITERS_PER_DIR = 1000
898
+ requires_dir_idx = np.where(run_indices[:, -1] == 1)[0]
899
+ run_dir_arr = np.empty(requires_dir_idx.size, dtype=RUN_DIR_ARR_DTYPE)
900
+ run_ids = np.empty(requires_dir_idx.size, dtype=int)
901
+
902
+ elem_depths: dict[int, int] = {}
903
+ iter_depths: dict[int, int] = {}
904
+ for idx in range(requires_dir_idx.size):
905
+ row = run_indices[requires_dir_idx[idx]]
906
+ t_iID, e_id, i_id, r_id, e_idx, i_idx, a_idx, r_idx = row[:-1]
907
+ run_ids[idx] = r_id
908
+
909
+ num_elems_i = task_num_elems[t_iID]
910
+ num_iters_i = elem_num_iters[e_id]
911
+ num_acts_i = iter_num_acts[i_id] # see TODO below
912
+ num_runs_i = iter_acts_num_runs[(i_id, a_idx)]
913
+
914
+ e_depth = 1
915
+ if num_elems_i == 1:
916
+ e_idx = max_u32
917
+ elif num_elems_i > MAX_ELEMS_PER_DIR:
918
+ if (e_depth := elem_depths.get(t_iID, -1)) == -1:
919
+ e_depth = int(
920
+ np.ceil(np.log(num_elems_i) / np.log(MAX_ELEMS_PER_DIR))
921
+ )
922
+ elem_depths[t_iID] = e_depth
923
+
924
+ # TODO: i_idx should be either MAX or the iteration ID, which will index into
925
+ # a separate array to get the formatted loop indices e.g.
926
+ # ("outer_loop_0_inner_loop_9")
927
+ i_depth = 1
928
+ if num_iters_i == 1:
929
+ i_idx = max_u32
930
+ elif num_iters_i > MAX_ITERS_PER_DIR:
931
+ if (i_depth := iter_depths.get(e_id, -1)) == -1:
932
+ i_depth = int(
933
+ np.ceil(np.log(num_iters_i) / np.log(MAX_ITERS_PER_DIR))
934
+ )
935
+ iter_depths[e_id] = i_depth
936
+
937
+ a_idx = max_u8 # TODO: for now, always exclude action index dir
938
+
939
+ if num_runs_i == 1:
940
+ r_idx = max_u8
941
+
942
+ if wk_num_tasks == 1:
943
+ t_iID = max_u8
944
+
945
+ run_dir_arr[idx] = (t_iID, e_idx, i_idx, a_idx, r_idx, e_depth, i_depth)
946
+
947
+ return run_dir_arr, run_ids
948
+
949
+ @TimeIt.decorator
950
+ def _write_execute_dirs(
951
+ self,
952
+ run_indices: NDArray,
953
+ run_inp_files: dict[int, list[Path]],
954
+ cache: ObjectCache,
955
+ status: Status | None = None,
956
+ ):
957
+
958
+ if status:
959
+ status.update("Adding new submission: resolving execution directories...")
960
+
961
+ run_dir_arr, run_idx = self._calculate_run_dir_indices(run_indices, cache)
962
+
963
+ # set run dirs in persistent array:
964
+ if run_idx.size:
965
+ self.workflow._store.set_run_dirs(run_dir_arr, run_idx)
966
+
967
+ # retrieve run directories as paths. array is not yet commited, so pass in
968
+ # directly:
969
+ run_dirs = self.workflow.get_run_directories(dir_indices_arr=run_dir_arr)
970
+
971
+ if status:
972
+ status.update("Adding new submission: making execution directories...")
973
+
974
+ # make directories
975
+ for idx, run_dir in enumerate(run_dirs):
976
+ assert run_dir
977
+ run_dir.mkdir(parents=True, exist_ok=True)
978
+ inp_files_i = run_inp_files.get(run_idx[idx])
979
+ if inp_files_i:
980
+ # copy (TODO: optionally symlink) any input files:
981
+ for path_i in inp_files_i:
982
+ shutil.copy(path_i, run_dir)
430
983
 
431
984
  @staticmethod
432
985
  def get_unique_schedulers_of_jobscripts(
433
- jobscripts: List[Jobscript],
434
- ) -> Dict[Tuple[Tuple[int, int]], Scheduler]:
986
+ jobscripts: Iterable[Jobscript],
987
+ ) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
435
988
  """Get unique schedulers and which of the passed jobscripts they correspond to.
436
989
 
437
- Uniqueness is determines only by the `Scheduler.unique_properties` tuple.
990
+ Uniqueness is determined only by the `QueuedScheduler.unique_properties` tuple.
438
991
 
439
992
  Parameters
440
993
  ----------
441
994
  jobscripts: list[~hpcflow.app.Jobscript]
995
+
996
+ Returns
997
+ -------
998
+ scheduler_mapping
999
+ Mapping where keys are a sequence of jobscript index descriptors and
1000
+ the values are the scheduler to use for that jobscript.
1001
+ A jobscript index descriptor is a pair of the submission index and the main
1002
+ jobscript index.
442
1003
  """
443
- js_idx = []
444
- schedulers = []
1004
+ js_idx: list[list[tuple[int, int]]] = []
1005
+ schedulers: list[Scheduler] = []
445
1006
 
446
1007
  # list of tuples of scheduler properties we consider to determine "uniqueness",
447
1008
  # with the first string being the scheduler type (class name):
448
- seen_schedulers = []
1009
+ seen_schedulers: dict[tuple, int] = {}
449
1010
 
450
1011
  for js in jobscripts:
451
- if js.scheduler.unique_properties not in seen_schedulers:
452
- seen_schedulers.append(js.scheduler.unique_properties)
1012
+ if (
1013
+ sched_idx := seen_schedulers.get(key := js.scheduler.unique_properties)
1014
+ ) is None:
1015
+ seen_schedulers[key] = sched_idx = len(seen_schedulers) - 1
453
1016
  schedulers.append(js.scheduler)
454
1017
  js_idx.append([])
455
- sched_idx = seen_schedulers.index(js.scheduler.unique_properties)
456
1018
  js_idx[sched_idx].append((js.submission.index, js.index))
457
1019
 
458
- sched_js_idx = dict(zip((tuple(i) for i in js_idx), schedulers))
1020
+ return zip(map(tuple, js_idx), schedulers)
459
1021
 
460
- return sched_js_idx
1022
+ @property
1023
+ @TimeIt.decorator
1024
+ def _unique_schedulers(
1025
+ self,
1026
+ ) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
1027
+ return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
461
1028
 
462
1029
  @TimeIt.decorator
463
- def get_unique_schedulers(self) -> Dict[Tuple[int], Scheduler]:
1030
+ def get_unique_schedulers(self) -> Mapping[tuple[tuple[int, int], ...], Scheduler]:
464
1031
  """Get unique schedulers and which of this submission's jobscripts they
465
- correspond to."""
466
- return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
1032
+ correspond to.
1033
+
1034
+ Returns
1035
+ -------
1036
+ scheduler_mapping
1037
+ Mapping where keys are a sequence of jobscript index descriptors and
1038
+ the values are the scheduler to use for that jobscript.
1039
+ A jobscript index descriptor is a pair of the submission index and the main
1040
+ jobscript index.
1041
+ """
1042
+ # This is an absurd type; you never use the key as a key
1043
+ return dict(self._unique_schedulers)
467
1044
 
468
1045
  @TimeIt.decorator
469
- def get_unique_shells(self) -> Dict[Tuple[int], Shell]:
1046
+ def get_unique_shells(self) -> Iterable[tuple[tuple[int, ...], Shell]]:
470
1047
  """Get unique shells and which jobscripts they correspond to."""
471
- js_idx = []
472
- shells = []
1048
+ js_idx: list[list[int]] = []
1049
+ shells: list[Shell] = []
473
1050
 
474
1051
  for js in self.jobscripts:
475
1052
  if js.shell not in shells:
@@ -478,126 +1055,154 @@ class Submission(JSONLike):
478
1055
  shell_idx = shells.index(js.shell)
479
1056
  js_idx[shell_idx].append(js.index)
480
1057
 
481
- shell_js_idx = dict(zip((tuple(i) for i in js_idx), shells))
1058
+ return zip(map(tuple, js_idx), shells)
482
1059
 
483
- return shell_js_idx
1060
+ def _update_at_submit_metadata(self, submission_parts: dict[str, list[int]]):
1061
+ """Update persistent store and in-memory record of at-submit metadata.
484
1062
 
485
- def _raise_failure(self, submitted_js_idx, exceptions):
486
- msg = f"Some jobscripts in submission index {self.index} could not be submitted"
487
- if submitted_js_idx:
488
- msg += f" (but jobscripts {submitted_js_idx} were submitted successfully):"
489
- else:
490
- msg += ":"
491
-
492
- msg += "\n"
493
- for sub_err in exceptions:
494
- msg += (
495
- f"Jobscript {sub_err.js_idx} at path: {str(sub_err.js_path)!r}\n"
496
- f"Submit command: {sub_err.submit_cmd!r}.\n"
497
- f"Reason: {sub_err.message!r}\n"
498
- )
499
- if sub_err.subprocess_exc is not None:
500
- msg += f"Subprocess exception: {sub_err.subprocess_exc}\n"
501
- if sub_err.job_ID_parse_exc is not None:
502
- msg += f"Subprocess job ID parse exception: {sub_err.job_ID_parse_exc}\n"
503
- if sub_err.job_ID_parse_exc is not None:
504
- msg += f"Job ID parse exception: {sub_err.job_ID_parse_exc}\n"
505
- if sub_err.stdout:
506
- msg += f"Submission stdout:\n{indent(sub_err.stdout, ' ')}\n"
507
- if sub_err.stderr:
508
- msg += f"Submission stderr:\n{indent(sub_err.stderr, ' ')}\n"
509
-
510
- raise SubmissionFailure(message=msg)
511
-
512
- def _append_submission_part(self, submit_time: str, submitted_js_idx: List[int]):
513
- self._submission_parts[submit_time] = submitted_js_idx
514
- self.workflow._store.add_submission_part(
1063
+ Notes
1064
+ -----
1065
+ Currently there is only one type of at-submit metadata, which is the
1066
+ submission-parts: a mapping between a string submit-time, and the list of
1067
+ jobscript indices that were submitted at that submit-time. This method updates
1068
+ the recorded submission parts to include those passed here.
1069
+
1070
+ """
1071
+
1072
+ self.workflow._store.update_at_submit_metadata(
515
1073
  sub_idx=self.index,
516
- dt_str=submit_time,
517
- submitted_js_idx=submitted_js_idx,
1074
+ submission_parts=submission_parts,
1075
+ )
1076
+
1077
+ self._at_submit_metadata["submission_parts"].update(submission_parts)
1078
+
1079
+ # cache is now invalid:
1080
+ self._submission_parts_lst = None
1081
+
1082
+ def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
1083
+ self._update_at_submit_metadata(submission_parts={submit_time: submitted_js_idx})
1084
+
1085
+ def get_jobscript_functions_name(self, shell: Shell, shell_idx: int) -> str:
1086
+ """Get the name of the jobscript functions file for the specified shell."""
1087
+ return f"js_funcs_{shell_idx}{shell.JS_EXT}"
1088
+
1089
+ def get_jobscript_functions_path(self, shell: Shell, shell_idx: int) -> Path:
1090
+ """Get the path of the jobscript functions file for the specified shell."""
1091
+ return self.js_funcs_path / self.get_jobscript_functions_name(shell, shell_idx)
1092
+
1093
+ def _compose_functions_file(self, shell: Shell) -> str:
1094
+ """Prepare the contents of the jobscript functions file for the specified
1095
+ shell.
1096
+
1097
+ Notes
1098
+ -----
1099
+ The functions file includes, at a minimum, a shell function that invokes the app
1100
+ with provided arguments. This file will be sourced/invoked within all jobscripts
1101
+ and command files that share the specified shell.
1102
+
1103
+ """
1104
+
1105
+ cfg_invocation = self._app.config._file.get_invocation(
1106
+ self._app.config._config_key
1107
+ )
1108
+ env_setup = cfg_invocation["environment_setup"]
1109
+ if env_setup:
1110
+ env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
1111
+ env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
1112
+ else:
1113
+ env_setup = shell.JS_ENV_SETUP_INDENT
1114
+ app_invoc = list(self._app.run_time_info.invocation_command)
1115
+
1116
+ app_caps = self._app.package_name.upper()
1117
+ func_file_args = shell.process_JS_header_args( # TODO: rename?
1118
+ {
1119
+ "workflow_app_alias": self.WORKFLOW_APP_ALIAS,
1120
+ "env_setup": env_setup,
1121
+ "app_invoc": app_invoc,
1122
+ "app_caps": app_caps,
1123
+ "config_dir": str(self._app.config.config_directory),
1124
+ "config_invoc_key": self._app.config.config_key,
1125
+ }
518
1126
  )
1127
+ out = shell.JS_FUNCS.format(**func_file_args)
1128
+ return out
1129
+
1130
+ def _write_functions_file(self, shell: Shell, shell_idx: int) -> None:
1131
+ """Write the jobscript functions file for the specified shell.
1132
+
1133
+ Notes
1134
+ -----
1135
+ The functions file includes, at a minimum, a shell function that invokes the app
1136
+ with provided arguments. This file will be sourced/invoked within all jobscripts
1137
+ and command files that share the specified shell.
1138
+
1139
+ """
1140
+ js_funcs_str = self._compose_functions_file(shell)
1141
+ path = self.get_jobscript_functions_path(shell, shell_idx)
1142
+ with path.open("wt", newline="\n") as fp:
1143
+ fp.write(js_funcs_str)
519
1144
 
520
1145
  @TimeIt.decorator
521
1146
  def submit(
522
1147
  self,
523
- status,
524
- ignore_errors: Optional[bool] = False,
525
- print_stdout: Optional[bool] = False,
526
- add_to_known: Optional[bool] = True,
527
- ) -> List[int]:
1148
+ status: Status | None,
1149
+ ignore_errors: bool = False,
1150
+ print_stdout: bool = False,
1151
+ add_to_known: bool = True,
1152
+ ) -> list[int]:
528
1153
  """Generate and submit the jobscripts of this submission."""
529
1154
 
530
- # if JS_parallelism explicitly requested but store doesn't support, raise:
531
- supports_JS_para = self.workflow._store._features.jobscript_parallelism
532
- if self.JS_parallelism:
533
- if not supports_JS_para:
534
- if status:
535
- status.stop()
536
- raise ValueError(
537
- f"Store type {self.workflow._store!r} does not support jobscript "
538
- f"parallelism."
539
- )
540
- elif self.JS_parallelism is None:
541
- self._JS_parallelism = supports_JS_para
542
-
543
- # set os_name and shell_name for each jobscript:
544
- for js in self.jobscripts:
545
- js._set_os_name()
546
- js._set_shell_name()
547
- js._set_scheduler_name()
1155
+ # TODO: support passing list of jobscript indices to submit; this will allow us
1156
+ # to test a submision with multiple "submission parts". would also need to check
1157
+ # dependencies if this customised list is passed
548
1158
 
549
1159
  outstanding = self.outstanding_jobscripts
550
1160
 
551
1161
  # get scheduler, shell and OS version information (also an opportunity to fail
552
1162
  # before trying to submit jobscripts):
553
- js_vers_info = {}
554
- for js_indices, sched in self.get_unique_schedulers().items():
1163
+ js_vers_info: dict[int, dict[str, str | list[str]]] = {}
1164
+ for js_indices, sched in self._unique_schedulers:
555
1165
  try:
556
1166
  vers_info = sched.get_version_info()
557
- except Exception as err:
558
- if ignore_errors:
559
- vers_info = {}
560
- else:
561
- raise err
1167
+ except Exception:
1168
+ if not ignore_errors:
1169
+ raise
1170
+ vers_info = {}
562
1171
  for _, js_idx in js_indices:
563
1172
  if js_idx in outstanding:
564
- if js_idx not in js_vers_info:
565
- js_vers_info[js_idx] = {}
566
- js_vers_info[js_idx].update(vers_info)
1173
+ js_vers_info.setdefault(js_idx, {}).update(vers_info)
567
1174
 
568
- for js_indices, shell in self.get_unique_shells().items():
1175
+ js_shell_indices = {}
1176
+ for shell_idx, (js_indices_2, shell) in enumerate(self.get_unique_shells()):
569
1177
  try:
570
1178
  vers_info = shell.get_version_info()
571
- except Exception as err:
572
- if ignore_errors:
573
- vers_info = {}
574
- else:
575
- raise err
576
- for js_idx in js_indices:
1179
+ except Exception:
1180
+ if not ignore_errors:
1181
+ raise
1182
+ vers_info = {}
1183
+ for js_idx in js_indices_2:
577
1184
  if js_idx in outstanding:
578
- if js_idx not in js_vers_info:
579
- js_vers_info[js_idx] = {}
580
- js_vers_info[js_idx].update(vers_info)
1185
+ js_vers_info.setdefault(js_idx, {}).update(vers_info)
1186
+ js_shell_indices[js_idx] = shell_idx
581
1187
 
1188
+ # write a file containing useful shell functions:
1189
+ self._write_functions_file(shell, shell_idx)
1190
+
1191
+ hostname = socket.gethostname()
1192
+ machine = self._app.config.get("machine")
582
1193
  for js_idx, vers_info_i in js_vers_info.items():
583
- self.jobscripts[js_idx]._set_version_info(vers_info_i)
1194
+ js = self.jobscripts[js_idx]
1195
+ js._set_version_info(vers_info_i)
1196
+ js._set_submit_hostname(hostname)
1197
+ js._set_submit_machine(machine)
1198
+ js._set_shell_idx(js_shell_indices[js_idx])
584
1199
 
585
- # for direct submission, it's important that os_name/shell_name/scheduler_name
586
- # are made persistent now, because `Workflow.write_commands`, which might be
587
- # invoked in a new process before submission has completed, needs to know these:
588
1200
  self.workflow._store._pending.commit_all()
589
1201
 
590
- # TODO: a submission should only be "submitted" once shouldn't it?
591
- # no; there could be an IO error (e.g. internet connectivity), so might
592
- # need to be able to reattempt submission of outstanding jobscripts.
593
- self.path.mkdir(exist_ok=True)
594
- if not self.abort_EARs_file_path.is_file():
595
- self._write_abort_EARs_file()
596
-
597
1202
  # map jobscript `index` to (scheduler job ID or process ID, is_array):
598
- scheduler_refs = {}
599
- submitted_js_idx = []
600
- errs = []
1203
+ scheduler_refs: dict[int, tuple[str, bool]] = {}
1204
+ submitted_js_idx: list[int] = []
1205
+ errs: list[JobscriptSubmissionFailure] = []
601
1206
  for js in self.jobscripts:
602
1207
  # check not previously submitted:
603
1208
  if js.index not in outstanding:
@@ -605,14 +1210,20 @@ class Submission(JSONLike):
605
1210
 
606
1211
  # check all dependencies were submitted now or previously:
607
1212
  if not all(
608
- i in submitted_js_idx or i in self.submitted_jobscripts
609
- for i in js.dependencies
1213
+ js_idx in submitted_js_idx or js_idx in self.submitted_jobscripts
1214
+ for js_idx, _ in js.dependencies
610
1215
  ):
1216
+ warnings.warn(
1217
+ f"Cannot submit jobscript index {js.index} since not all of its "
1218
+ f"dependencies have been submitted: {js.dependencies!r}"
1219
+ )
611
1220
  continue
612
1221
 
613
1222
  try:
614
1223
  if status:
615
- status.update(f"Submitting jobscript {js.index}...")
1224
+ status.update(
1225
+ f"Submitting jobscript {js.index + 1}/{len(self.jobscripts)}..."
1226
+ )
616
1227
  js_ref_i = js.submit(scheduler_refs, print_stdout=print_stdout)
617
1228
  scheduler_refs[js.index] = (js_ref_i, js.is_array)
618
1229
  submitted_js_idx.append(js.index)
@@ -621,15 +1232,21 @@ class Submission(JSONLike):
621
1232
  errs.append(err)
622
1233
  continue
623
1234
 
1235
+ # TODO: some way to handle KeyboardInterrupt during submission?
1236
+ # - stop, and cancel already submitted?
1237
+
624
1238
  if submitted_js_idx:
625
- dt_str = datetime.utcnow().strftime(self.app._submission_ts_fmt)
1239
+ dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
626
1240
  self._append_submission_part(
627
1241
  submit_time=dt_str,
628
1242
  submitted_js_idx=submitted_js_idx,
629
1243
  )
1244
+ # ensure `_submission_parts` is committed
1245
+ self.workflow._store._pending.commit_all()
1246
+
630
1247
  # add a record of the submission part to the known-submissions file
631
1248
  if add_to_known:
632
- self.app._add_to_known_submissions(
1249
+ self._app._add_to_known_submissions(
633
1250
  wk_path=self.workflow.path,
634
1251
  wk_id=self.workflow.id_,
635
1252
  sub_idx=self.index,
@@ -639,7 +1256,7 @@ class Submission(JSONLike):
639
1256
  if errs and not ignore_errors:
640
1257
  if status:
641
1258
  status.stop()
642
- self._raise_failure(submitted_js_idx, errs)
1259
+ raise SubmissionFailure(self.index, submitted_js_idx, errs)
643
1260
 
644
1261
  len_js = len(submitted_js_idx)
645
1262
  print(f"Submitted {len_js} jobscript{'s' if len_js > 1 else ''}.")
@@ -647,24 +1264,86 @@ class Submission(JSONLike):
647
1264
  return submitted_js_idx
648
1265
 
649
1266
  @TimeIt.decorator
650
- def cancel(self):
1267
+ def cancel(self) -> None:
651
1268
  """
652
1269
  Cancel the active jobs for this submission's jobscripts.
653
1270
  """
654
- act_js = list(self.get_active_jobscripts())
655
- if not act_js:
1271
+ if not (act_js := self.get_active_jobscripts()):
656
1272
  print("No active jobscripts to cancel.")
657
1273
  return
658
- for js_indices, sched in self.get_unique_schedulers().items():
1274
+ for js_indices, sched in self._unique_schedulers:
659
1275
  # filter by active jobscripts:
660
- js_idx = [i[1] for i in js_indices if i[1] in act_js]
661
- if js_idx:
1276
+ if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
662
1277
  print(
663
- f"Cancelling jobscripts {js_idx!r} of submission {self.index} of "
664
- f"workflow {self.workflow.name!r}."
1278
+ f"Cancelling jobscripts {shorten_list_str(js_idx, items=5)} of "
1279
+ f"submission {self.index} of workflow {self.workflow.name!r}."
665
1280
  )
666
1281
  jobscripts = [self.jobscripts[i] for i in js_idx]
667
- sched_refs = [i.scheduler_js_ref for i in jobscripts]
1282
+ sched_refs = [js.scheduler_js_ref for js in jobscripts]
668
1283
  sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
669
1284
  else:
670
1285
  print("No active jobscripts to cancel.")
1286
+
1287
+ @TimeIt.decorator
1288
+ def get_scheduler_job_IDs(self) -> tuple[str, ...]:
1289
+ """Return jobscript scheduler job IDs."""
1290
+ return tuple(
1291
+ js_i.scheduler_job_ID
1292
+ for js_i in self.jobscripts
1293
+ if js_i.scheduler_job_ID is not None
1294
+ )
1295
+
1296
+ @TimeIt.decorator
1297
+ def get_process_IDs(self) -> tuple[int, ...]:
1298
+ """Return jobscript process IDs."""
1299
+ return tuple(
1300
+ js_i.process_ID for js_i in self.jobscripts if js_i.process_ID is not None
1301
+ )
1302
+
1303
+ @TimeIt.decorator
1304
+ def list_jobscripts(
1305
+ self,
1306
+ max_js: int | None = None,
1307
+ jobscripts: list[int] | None = None,
1308
+ width: int | None = None,
1309
+ ) -> None:
1310
+ """Print a table listing jobscripts and associated information.
1311
+
1312
+ Parameters
1313
+ ----------
1314
+ max_js
1315
+ Maximum jobscript index to display. This cannot be specified with `jobscripts`.
1316
+ jobscripts
1317
+ A list of jobscripts to display. This cannot be specified with `max_js`.
1318
+ width
1319
+ Width in characters of the printed table.
1320
+
1321
+ """
1322
+ self.workflow.list_jobscripts(
1323
+ sub_idx=self.index, max_js=max_js, jobscripts=jobscripts, width=width
1324
+ )
1325
+
1326
+ @TimeIt.decorator
1327
+ def list_task_jobscripts(
1328
+ self,
1329
+ task_names: list[str] | None = None,
1330
+ max_js: int | None = None,
1331
+ width: int | None = None,
1332
+ ) -> None:
1333
+ """Print a table listing the jobscripts associated with the specified (or all)
1334
+ tasks for the specified submission.
1335
+
1336
+ Parameters
1337
+ ----------
1338
+ task_names
1339
+ List of sub-strings to match to task names. Only matching task names will be
1340
+ included.
1341
+ max_js
1342
+ Maximum jobscript index to display.
1343
+ width
1344
+ Width in characters of the printed table.
1345
+
1346
+ """
1347
+ self.workflow.list_task_jobscripts(
1348
+ sub_idx=self.index, max_js=max_js, task_names=task_names, width=width
1349
+ )