hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
  2. hpcflow/_version.py +1 -1
  3. hpcflow/data/scripts/bad_script.py +2 -0
  4. hpcflow/data/scripts/do_nothing.py +2 -0
  5. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  6. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  7. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  8. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  11. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  12. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  13. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  15. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  16. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  23. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  24. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  25. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  26. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  27. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  28. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  29. hpcflow/data/scripts/script_exit_test.py +5 -0
  30. hpcflow/data/template_components/environments.yaml +1 -1
  31. hpcflow/sdk/__init__.py +5 -0
  32. hpcflow/sdk/app.py +150 -89
  33. hpcflow/sdk/cli.py +263 -84
  34. hpcflow/sdk/cli_common.py +99 -5
  35. hpcflow/sdk/config/callbacks.py +38 -1
  36. hpcflow/sdk/config/config.py +102 -13
  37. hpcflow/sdk/config/errors.py +19 -5
  38. hpcflow/sdk/config/types.py +3 -0
  39. hpcflow/sdk/core/__init__.py +25 -1
  40. hpcflow/sdk/core/actions.py +914 -262
  41. hpcflow/sdk/core/cache.py +76 -34
  42. hpcflow/sdk/core/command_files.py +14 -128
  43. hpcflow/sdk/core/commands.py +35 -6
  44. hpcflow/sdk/core/element.py +122 -50
  45. hpcflow/sdk/core/errors.py +58 -2
  46. hpcflow/sdk/core/execute.py +207 -0
  47. hpcflow/sdk/core/loop.py +408 -50
  48. hpcflow/sdk/core/loop_cache.py +4 -4
  49. hpcflow/sdk/core/parameters.py +382 -37
  50. hpcflow/sdk/core/run_dir_files.py +13 -40
  51. hpcflow/sdk/core/skip_reason.py +7 -0
  52. hpcflow/sdk/core/task.py +119 -30
  53. hpcflow/sdk/core/task_schema.py +68 -0
  54. hpcflow/sdk/core/test_utils.py +66 -27
  55. hpcflow/sdk/core/types.py +54 -1
  56. hpcflow/sdk/core/utils.py +78 -7
  57. hpcflow/sdk/core/workflow.py +1538 -336
  58. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  59. hpcflow/sdk/demo/cli.py +7 -0
  60. hpcflow/sdk/helper/cli.py +1 -0
  61. hpcflow/sdk/log.py +42 -15
  62. hpcflow/sdk/persistence/base.py +405 -53
  63. hpcflow/sdk/persistence/json.py +177 -52
  64. hpcflow/sdk/persistence/pending.py +237 -69
  65. hpcflow/sdk/persistence/store_resource.py +3 -2
  66. hpcflow/sdk/persistence/types.py +15 -4
  67. hpcflow/sdk/persistence/zarr.py +928 -81
  68. hpcflow/sdk/submission/jobscript.py +1408 -489
  69. hpcflow/sdk/submission/schedulers/__init__.py +40 -5
  70. hpcflow/sdk/submission/schedulers/direct.py +33 -19
  71. hpcflow/sdk/submission/schedulers/sge.py +51 -16
  72. hpcflow/sdk/submission/schedulers/slurm.py +44 -16
  73. hpcflow/sdk/submission/schedulers/utils.py +7 -2
  74. hpcflow/sdk/submission/shells/base.py +68 -20
  75. hpcflow/sdk/submission/shells/bash.py +222 -129
  76. hpcflow/sdk/submission/shells/powershell.py +200 -150
  77. hpcflow/sdk/submission/submission.py +852 -119
  78. hpcflow/sdk/submission/types.py +18 -21
  79. hpcflow/sdk/typing.py +24 -5
  80. hpcflow/sdk/utils/arrays.py +71 -0
  81. hpcflow/sdk/utils/deferred_file.py +55 -0
  82. hpcflow/sdk/utils/hashing.py +16 -0
  83. hpcflow/sdk/utils/patches.py +12 -0
  84. hpcflow/sdk/utils/strings.py +33 -0
  85. hpcflow/tests/api/test_api.py +32 -0
  86. hpcflow/tests/conftest.py +19 -0
  87. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  88. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  89. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  90. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  91. hpcflow/tests/scripts/test_main_scripts.py +821 -70
  92. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  93. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  94. hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
  95. hpcflow/tests/unit/test_action.py +176 -0
  96. hpcflow/tests/unit/test_app.py +20 -0
  97. hpcflow/tests/unit/test_cache.py +46 -0
  98. hpcflow/tests/unit/test_cli.py +133 -0
  99. hpcflow/tests/unit/test_config.py +122 -1
  100. hpcflow/tests/unit/test_element_iteration.py +47 -0
  101. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  102. hpcflow/tests/unit/test_loop.py +1332 -27
  103. hpcflow/tests/unit/test_meta_task.py +325 -0
  104. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  105. hpcflow/tests/unit/test_parameter.py +13 -0
  106. hpcflow/tests/unit/test_persistence.py +190 -8
  107. hpcflow/tests/unit/test_run.py +109 -3
  108. hpcflow/tests/unit/test_run_directories.py +29 -0
  109. hpcflow/tests/unit/test_shell.py +20 -0
  110. hpcflow/tests/unit/test_submission.py +5 -76
  111. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  112. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  113. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  114. hpcflow/tests/unit/utils/test_patches.py +5 -0
  115. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  116. hpcflow/tests/workflows/__init__.py +0 -0
  117. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  118. hpcflow/tests/workflows/test_jobscript.py +332 -0
  119. hpcflow/tests/workflows/test_run_status.py +198 -0
  120. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  121. hpcflow/tests/workflows/test_submission.py +140 -0
  122. hpcflow/tests/workflows/test_workflows.py +142 -2
  123. hpcflow/tests/workflows/test_zip.py +18 -0
  124. hpcflow/viz_demo.ipynb +6587 -3
  125. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
  126. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  127. hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
  128. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  129. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  130. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -4,10 +4,17 @@ A collection of submissions to a scheduler, generated from a workflow.
4
4
 
5
5
  from __future__ import annotations
6
6
  from collections import defaultdict
7
- import os
7
+ import shutil
8
8
  from pathlib import Path
9
- from typing import Any, overload, TYPE_CHECKING
9
+ import socket
10
+ from textwrap import indent
11
+ from typing import Any, Literal, overload, TYPE_CHECKING
10
12
  from typing_extensions import override
13
+ import warnings
14
+
15
+
16
+ from hpcflow.sdk.utils.strings import shorten_list_str
17
+ import numpy as np
11
18
 
12
19
  from hpcflow.sdk.typing import hydrate
13
20
  from hpcflow.sdk.core.errors import (
@@ -17,18 +24,22 @@ from hpcflow.sdk.core.errors import (
17
24
  MissingEnvironmentExecutableInstanceError,
18
25
  MultipleEnvironmentsError,
19
26
  SubmissionFailure,
27
+ OutputFileParserNoOutputError,
20
28
  )
21
29
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
22
30
  from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
23
31
  from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
24
32
  from hpcflow.sdk.submission.enums import SubmissionStatus
33
+ from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE
25
34
  from hpcflow.sdk.log import TimeIt
35
+ from hpcflow.sdk.utils.strings import shorten_list_str
26
36
 
27
37
  if TYPE_CHECKING:
28
38
  from collections.abc import Iterable, Mapping, Sequence
29
39
  from datetime import datetime
30
40
  from typing import ClassVar, Literal
31
41
  from rich.status import Status
42
+ from numpy.typing import NDArray
32
43
  from .jobscript import Jobscript
33
44
  from .enums import JobscriptElementState
34
45
  from .schedulers import Scheduler
@@ -38,6 +49,22 @@ if TYPE_CHECKING:
38
49
  from ..core.environment import Environment
39
50
  from ..core.object_list import EnvironmentsList
40
51
  from ..core.workflow import Workflow
52
+ from ..core.cache import ObjectCache
53
+
54
+
55
+ # jobscript attributes that are set persistently just after the jobscript has been
56
+ # submitted to the scheduler:
57
+ JOBSCRIPT_SUBMIT_TIME_KEYS = (
58
+ "submit_cmdline",
59
+ "scheduler_job_ID",
60
+ "process_ID",
61
+ "submit_time",
62
+ )
63
+ # submission attributes that are set persistently just after all of a submission's
64
+ # jobscripts have been submitted:
65
+ SUBMISSION_SUBMIT_TIME_KEYS = {
66
+ "submission_parts": dict,
67
+ }
41
68
 
42
69
 
43
70
  @hydrate
@@ -74,20 +101,35 @@ class Submission(JSONLike):
74
101
  ),
75
102
  )
76
103
 
104
+ TMP_DIR_NAME = "tmp"
105
+ LOG_DIR_NAME = "app_logs"
106
+ APP_STD_DIR_NAME = "app_std"
107
+ JS_DIR_NAME = "jobscripts"
108
+ JS_STD_DIR_NAME = "js_std"
109
+ JS_RUN_IDS_DIR_NAME = "js_run_ids"
110
+ JS_FUNCS_DIR_NAME = "js_funcs"
111
+ JS_WIN_PIDS_DIR_NAME = "js_pids"
112
+ JS_SCRIPT_INDICES_DIR_NAME = "js_script_indices"
113
+ SCRIPTS_DIR_NAME = "scripts"
114
+ COMMANDS_DIR_NAME = "commands"
115
+ WORKFLOW_APP_ALIAS = "wkflow_app"
116
+
77
117
  def __init__(
78
118
  self,
79
119
  index: int,
80
120
  jobscripts: list[Jobscript],
81
121
  workflow: Workflow | None = None,
82
- submission_parts: dict[str, list[int]] | None = None,
83
- JS_parallelism: bool | None = None,
122
+ at_submit_metadata: dict[str, Any] | None = None,
123
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
84
124
  environments: EnvironmentsList | None = None,
85
125
  ):
86
126
  self._index = index
87
127
  self._jobscripts = jobscripts
88
- self._submission_parts = submission_parts or {}
128
+ self._at_submit_metadata = at_submit_metadata or {
129
+ k: v() for k, v in SUBMISSION_SUBMIT_TIME_KEYS.items()
130
+ }
89
131
  self._JS_parallelism = JS_parallelism
90
- self._environments = environments
132
+ self._environments = environments # assigned by _set_environments
91
133
 
92
134
  self._submission_parts_lst: list[
93
135
  SubmissionPart
@@ -99,8 +141,30 @@ class Submission(JSONLike):
99
141
 
100
142
  self._set_parent_refs()
101
143
 
102
- for js_idx, js in enumerate(self.jobscripts):
103
- js._index = js_idx
144
+ def _ensure_JS_parallelism_set(self):
145
+ """Ensure that the JS_parallelism attribute is one of `True`, `False`, `'direct'`
146
+ or `'scheduled'`.
147
+
148
+ Notes
149
+ -----
150
+ This method is called after the Submission object is first created in
151
+ `Workflow._add_submission`.
152
+
153
+ """
154
+ # if JS_parallelism explicitly requested but store doesn't support, raise:
155
+ supports_JS_para = self.workflow._store._features.jobscript_parallelism
156
+ if self.JS_parallelism:
157
+ # could be: True | "direct" | "scheduled"
158
+ if not supports_JS_para:
159
+ # if status:
160
+ # status.stop()
161
+ raise ValueError(
162
+ f"Store type {self.workflow._store!r} does not support jobscript "
163
+ f"parallelism."
164
+ )
165
+ elif self.JS_parallelism is None:
166
+ # by default only use JS parallelism for scheduled jobscripts:
167
+ self._JS_parallelism = "scheduled" if supports_JS_para else False
104
168
 
105
169
  @TimeIt.decorator
106
170
  def _set_environments(self) -> None:
@@ -110,20 +174,22 @@ class Submission(JSONLike):
110
174
  req_envs: dict[
111
175
  tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]
112
176
  ] = defaultdict(lambda: defaultdict(set))
113
- for js_idx, js_i in enumerate(self.jobscripts):
114
- for run in js_i.all_EARs:
115
- # Alas, mypy can't typecheck the next line if the type is right!
116
- # So we use Any to get it to shut up...
117
- env_spec_h: Any = tuple(zip(*run.env_spec.items())) # hashable
118
- for exec_label_j in run.action.get_required_executables():
119
- req_envs[env_spec_h][exec_label_j].add(js_idx)
120
- # Ensure overall element is present
121
- req_envs[env_spec_h]
177
+ with self.workflow.cached_merged_parameters():
178
+ # using the cache (for `run.env_spec_hashable` -> `run.resources`) should
179
+ # significantly speed up this loop, unless a large resources sequence is used:
180
+ for js_idx, all_EARs_i in enumerate(self.all_EARs_by_jobscript):
181
+ for run in all_EARs_i:
182
+ env_spec_h = run.env_spec_hashable
183
+ for exec_label_j in run.action.get_required_executables():
184
+ req_envs[env_spec_h][exec_label_j].add(js_idx)
185
+ # add any environment for which an executable was not required:
186
+ if env_spec_h not in req_envs:
187
+ req_envs[env_spec_h]
122
188
 
123
189
  # check these envs/execs exist in app data:
124
190
  envs: list[Environment] = []
125
191
  for env_spec_h, exec_js in req_envs.items():
126
- env_spec = dict(zip(*env_spec_h))
192
+ env_spec = self._app.Action.env_spec_from_hashable(env_spec_h)
127
193
  try:
128
194
  env_i = self._app.envs.get(**env_spec)
129
195
  except ObjectListMultipleMatchError:
@@ -178,13 +244,17 @@ class Submission(JSONLike):
178
244
  return self._environments
179
245
 
180
246
  @property
181
- def submission_parts(self) -> list[SubmissionPart]:
182
- """
183
- Description of the parts of this submission.
184
- """
185
- if not self._submission_parts:
186
- return []
247
+ def at_submit_metadata(self) -> dict[str, dict[str, Any]]:
248
+ return self.workflow._store.get_submission_at_submit_metadata(
249
+ sub_idx=self.index, metadata_attr=self._at_submit_metadata
250
+ )
187
251
 
252
+ @property
253
+ def _submission_parts(self) -> dict[str, list[int]]:
254
+ return self.at_submit_metadata["submission_parts"] or {}
255
+
256
+ @property
257
+ def submission_parts(self) -> list[SubmissionPart]:
188
258
  if self._submission_parts_lst is None:
189
259
  self._submission_parts_lst = [
190
260
  {
@@ -233,7 +303,7 @@ class Submission(JSONLike):
233
303
  return self._jobscripts
234
304
 
235
305
  @property
236
- def JS_parallelism(self) -> bool | None:
306
+ def JS_parallelism(self) -> bool | Literal["direct", "scheduled"] | None:
237
307
  """
238
308
  Whether to exploit jobscript parallelism.
239
309
  """
@@ -287,14 +357,237 @@ class Submission(JSONLike):
287
357
  SubmissionStatus.PARTIALLY_SUBMITTED,
288
358
  )
289
359
 
360
+ @property
361
+ def needs_app_log_dir(self) -> bool:
362
+ """
363
+ Whether this submision requires an app log directory.
364
+ """
365
+ for js in self.jobscripts:
366
+ if js.resources.write_app_logs:
367
+ return True
368
+ return False
369
+
370
+ @property
371
+ def needs_win_pids_dir(self) -> bool:
372
+ """
373
+ Whether this submision requires a directory for process ID files (Windows only).
374
+ """
375
+ for js in self.jobscripts:
376
+ if js.os_name == "nt":
377
+ return True
378
+ return False
379
+
380
+ @property
381
+ def needs_script_indices_dir(self) -> bool:
382
+ """
383
+ Whether this submision requires a directory for combined-script script ID files.
384
+ """
385
+ for js in self.jobscripts:
386
+ if js.resources.combine_scripts:
387
+ return True
388
+ return False
389
+
390
+ @classmethod
391
+ def get_path(cls, submissions_path: Path, sub_idx: int) -> Path:
392
+ """
393
+ The directory path to files associated with the specified submission.
394
+ """
395
+ return submissions_path / str(sub_idx)
396
+
397
+ @classmethod
398
+ def get_tmp_path(cls, submissions_path: Path, sub_idx: int) -> Path:
399
+ """
400
+ The path to the temporary files directory, for the specified submission.
401
+ """
402
+ return cls.get_path(submissions_path, sub_idx) / cls.TMP_DIR_NAME
403
+
404
+ @classmethod
405
+ def get_app_log_path(cls, submissions_path: Path, sub_idx: int) -> Path:
406
+ """
407
+ The path to the app log directory for this submission, for the specified
408
+ submission.
409
+ """
410
+ return cls.get_path(submissions_path, sub_idx) / cls.LOG_DIR_NAME
411
+
412
+ @staticmethod
413
+ def get_app_log_file_name(run_ID: int | str) -> str:
414
+ """
415
+ The app log file name.
416
+ """
417
+ # TODO: consider combine_app_logs argument
418
+ return f"r_{run_ID}.log"
419
+
420
+ @classmethod
421
+ def get_app_log_file_path(cls, submissions_path: Path, sub_idx: int, run_ID: int):
422
+ """
423
+ The file path to the app log, for the specified submission.
424
+ """
425
+ return (
426
+ cls.get_path(submissions_path, sub_idx)
427
+ / cls.LOG_DIR_NAME
428
+ / cls.get_app_log_file_name(run_ID)
429
+ )
430
+
431
+ @classmethod
432
+ def get_app_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
433
+ """
434
+ The path to the app standard output and error stream files directory, for the
435
+ specified submission.
436
+ """
437
+ return cls.get_path(submissions_path, sub_idx) / cls.APP_STD_DIR_NAME
438
+
439
+ @classmethod
440
+ def get_js_path(cls, submissions_path: Path, sub_idx: int) -> Path:
441
+ """
442
+ The path to the jobscript files directory, for the specified submission.
443
+ """
444
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_DIR_NAME
445
+
446
+ @classmethod
447
+ def get_js_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
448
+ """
449
+ The path to the jobscript standard output and error files directory, for the
450
+ specified submission.
451
+ """
452
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_STD_DIR_NAME
453
+
454
+ @classmethod
455
+ def get_js_run_ids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
456
+ """
457
+ The path to the directory containing jobscript run IDs, for the specified
458
+ submission.
459
+ """
460
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_RUN_IDS_DIR_NAME
461
+
462
+ @classmethod
463
+ def get_js_funcs_path(cls, submissions_path: Path, sub_idx: int) -> Path:
464
+ """
465
+ The path to the directory containing the shell functions that are invoked within
466
+ jobscripts and commmand files, for the specified submission.
467
+ """
468
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_FUNCS_DIR_NAME
469
+
470
+ @classmethod
471
+ def get_js_win_pids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
472
+ """
473
+ The path to the directory containing process ID files (Windows only), for the
474
+ specified submission.
475
+ """
476
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_WIN_PIDS_DIR_NAME
477
+
478
+ @classmethod
479
+ def get_js_script_indices_path(cls, submissions_path: Path, sub_idx: int) -> Path:
480
+ """
481
+ The path to the directory containing script indices for combined-script jobscripts
482
+ only, for the specified submission.
483
+ """
484
+ return cls.get_path(submissions_path, sub_idx) / cls.JS_SCRIPT_INDICES_DIR_NAME
485
+
486
+ @classmethod
487
+ def get_scripts_path(cls, submissions_path: Path, sub_idx: int) -> Path:
488
+ """
489
+ The path to the directory containing action scripts, for the specified submission.
490
+ """
491
+ return cls.get_path(submissions_path, sub_idx) / cls.SCRIPTS_DIR_NAME
492
+
493
+ @classmethod
494
+ def get_commands_path(cls, submissions_path: Path, sub_idx: int) -> Path:
495
+ """
496
+ The path to the directory containing command files, for the specified submission.
497
+ """
498
+ return cls.get_path(submissions_path, sub_idx) / cls.COMMANDS_DIR_NAME
499
+
290
500
  @property
291
501
  def path(self) -> Path:
292
502
  """
293
- The path to files associated with this submission.
503
+ The path to the directory containing action scripts.
504
+ """
505
+ return self.get_path(self.workflow.submissions_path, self.index)
506
+
507
+ @property
508
+ def tmp_path(self) -> Path:
509
+ """
510
+ The path to the temporary files directory for this submission.
511
+ """
512
+ return self.get_tmp_path(self.workflow.submissions_path, self.index)
513
+
514
+ @property
515
+ def app_log_path(self) -> Path:
516
+ """
517
+ The path to the app log directory for this submission for this submission.
518
+ """
519
+ return self.get_app_log_path(self.workflow.submissions_path, self.index)
520
+
521
+ @property
522
+ def app_std_path(self) -> Path:
523
+ """
524
+ The path to the app standard output and error stream files directory, for the
525
+ this submission.
526
+ """
527
+ return self.get_app_std_path(self.workflow.submissions_path, self.index)
528
+
529
+ @property
530
+ def js_path(self) -> Path:
531
+ """
532
+ The path to the jobscript files directory, for this submission.
533
+ """
534
+ return self.get_js_path(self.workflow.submissions_path, self.index)
535
+
536
+ @property
537
+ def js_std_path(self) -> Path:
538
+ """
539
+ The path to the jobscript standard output and error files directory, for this
540
+ submission.
541
+ """
542
+ return self.get_js_std_path(self.workflow.submissions_path, self.index)
543
+
544
+ @property
545
+ def js_run_ids_path(self) -> Path:
546
+ """
547
+ The path to the directory containing jobscript run IDs, for this submission.
548
+ """
549
+ return self.get_js_run_ids_path(self.workflow.submissions_path, self.index)
550
+
551
+ @property
552
+ def js_funcs_path(self) -> Path:
553
+ """
554
+ The path to the directory containing the shell functions that are invoked within
555
+ jobscripts and commmand files, for this submission.
556
+ """
557
+ return self.get_js_funcs_path(self.workflow.submissions_path, self.index)
558
+
559
+ @property
560
+ def js_win_pids_path(self) -> Path:
561
+ """
562
+ The path to the directory containing process ID files (Windows only), for this
563
+ submission.
564
+ """
565
+ return self.get_js_win_pids_path(self.workflow.submissions_path, self.index)
566
+
567
+ @property
568
+ def js_script_indices_path(self) -> Path:
569
+ """
570
+ The path to the directory containing script indices for combined-script jobscripts
571
+ only, for this submission.
294
572
  """
295
- return self.workflow.submissions_path / str(self.index)
573
+ return self.get_js_script_indices_path(self.workflow.submissions_path, self.index)
296
574
 
297
575
  @property
576
+ def scripts_path(self) -> Path:
577
+ """
578
+ The path to the directory containing action scripts, for this submission.
579
+ """
580
+ return self.get_scripts_path(self.workflow.submissions_path, self.index)
581
+
582
+ @property
583
+ def commands_path(self) -> Path:
584
+ """
585
+ The path to the directory containing command files, for this submission.
586
+ """
587
+ return self.get_commands_path(self.workflow.submissions_path, self.index)
588
+
589
+ @property
590
+ @TimeIt.decorator
298
591
  def all_EAR_IDs(self) -> Iterable[int]:
299
592
  """
300
593
  The IDs of all EARs in this submission.
@@ -302,12 +595,25 @@ class Submission(JSONLike):
302
595
  return (i for js in self.jobscripts for i in js.all_EAR_IDs)
303
596
 
304
597
  @property
598
+ @TimeIt.decorator
305
599
  def all_EARs(self) -> Iterable[ElementActionRun]:
306
600
  """
307
- All EARs in this this submission.
601
+ All EARs in this submission.
308
602
  """
309
603
  return (ear for js in self.jobscripts for ear in js.all_EARs)
310
604
 
605
+ @property
606
+ @TimeIt.decorator
607
+ def all_EARs_IDs_by_jobscript(self) -> list[np.ndarray]:
608
+ return [i.all_EAR_IDs for i in self.jobscripts]
609
+
610
+ @property
611
+ @TimeIt.decorator
612
+ def all_EARs_by_jobscript(self) -> list[list[ElementActionRun]]:
613
+ ids = [i.all_EAR_IDs for i in self.jobscripts]
614
+ all_EARs = {i.id_: i for i in self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)}
615
+ return [[all_EARs[i] for i in js_ids] for js_ids in ids]
616
+
311
617
  @property
312
618
  @TimeIt.decorator
313
619
  def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
@@ -322,70 +628,358 @@ class Submission(JSONLike):
322
628
  return task_elem_EARs
323
629
 
324
630
  @property
325
- def abort_EARs_file_name(self) -> str:
326
- """
327
- The name of a file describing what EARs have aborted.
328
- """
329
- return "abort_EARs.txt"
330
-
331
- @property
332
- def abort_EARs_file_path(self) -> Path:
333
- """
334
- The path to the file describing what EARs have aborted in this submission.
335
- """
336
- return self.path / self.abort_EARs_file_name
631
+ def is_scheduled(self) -> tuple[bool, ...]:
632
+ """Return whether each jobscript of this submission uses a scheduler or not."""
633
+ return tuple(i.is_scheduled for i in self.jobscripts)
337
634
 
338
635
  @overload
339
636
  def get_active_jobscripts(
340
637
  self, as_json: Literal[False] = False
341
- ) -> Mapping[int, Mapping[int, JobscriptElementState]]:
638
+ ) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState]]]:
342
639
  ...
343
640
 
344
641
  @overload
345
- def get_active_jobscripts(self, as_json: Literal[True]) -> dict[int, dict[int, str]]:
642
+ def get_active_jobscripts(
643
+ self, as_json: Literal[True]
644
+ ) -> Mapping[int, Mapping[int, Mapping[int, str]]]:
346
645
  ...
347
646
 
348
647
  @TimeIt.decorator
349
648
  def get_active_jobscripts(
350
- self, as_json: bool = False
351
- ) -> Mapping[int, Mapping[int, JobscriptElementState]] | dict[int, dict[int, str]]:
649
+ self,
650
+ as_json: Literal[True] | Literal[False] = False, # TODO: why can't we use bool?
651
+ ) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState | str]]]:
352
652
  """Get jobscripts that are active on this machine, and their active states."""
353
- # this returns: {JS_IDX: {JS_ELEMENT_IDX: STATE}}
653
+ # this returns: {JS_IDX: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}}
354
654
  # TODO: query the scheduler once for all jobscripts?
355
- if as_json:
356
- details = (
357
- (js.index, js.get_active_states(as_json=True)) for js in self.jobscripts
358
- )
359
- return {idx: state for idx, state in details if state}
360
- else:
361
- dets2 = (
362
- (js.index, js.get_active_states(as_json=False)) for js in self.jobscripts
363
- )
364
- return {idx: state for idx, state in dets2 if state}
365
-
366
- def _write_abort_EARs_file(self) -> None:
367
- with self.abort_EARs_file_path.open(mode="wt", newline="\n") as fp:
368
- # write a single line for each EAR currently in the workflow:
369
- fp.write("\n".join("0" for _ in range(self.workflow.num_EARs)) + "\n")
370
-
371
- def _set_run_abort(self, run_ID: int) -> None:
372
- """Modify the abort runs file to indicate a specified run should be aborted."""
373
- with self.abort_EARs_file_path.open(mode="rt", newline="\n") as fp:
374
- lines = fp.read().splitlines()
375
- lines[run_ID] = "1"
376
-
377
- # write a new temporary run-abort file:
378
- tmp_suffix = self.abort_EARs_file_path.suffix + ".tmp"
379
- tmp = self.abort_EARs_file_path.with_suffix(tmp_suffix)
380
- self._app.submission_logger.debug(f"Creating temporary run abort file: {tmp!r}.")
381
- with tmp.open(mode="wt", newline="\n") as fp:
382
- fp.write("\n".join(lines) + "\n")
383
-
384
- # atomic rename, overwriting original:
385
- self._app.submission_logger.debug(
386
- "Replacing original run abort file with new temporary file."
655
+ return {
656
+ js.index: act_states
657
+ for js in self.jobscripts
658
+ if (act_states := js.get_active_states(as_json=as_json))
659
+ }
660
+
661
+ @TimeIt.decorator
662
+ def _write_scripts(
663
+ self, cache: ObjectCache, status: Status | None = None
664
+ ) -> tuple[dict[int, int | None], NDArray, dict[int, list[Path]]]:
665
+ """Write to disk all action scripts associated with this submission."""
666
+ # TODO: rename this method
667
+
668
+ # TODO: need to check is_snippet_script is exclusive? i.e. only `script` and no
669
+ # `commands` in the action?
670
+ # TODO: scripts must have the same exe and the same environment as well?
671
+ # TODO: env_spec should be included in jobscript hash if combine_scripts=True ?
672
+
673
+ actions_by_schema: dict[str, dict[int, set]] = defaultdict(
674
+ lambda: defaultdict(set)
387
675
  )
388
- os.replace(src=tmp, dst=self.abort_EARs_file_path)
676
+ combined_env_specs = {}
677
+
678
+ # task insert IDs and action indices for each combined_scripts jobscript:
679
+ combined_actions = {}
680
+
681
+ cmd_hashes = defaultdict(set)
682
+ num_runs_tot = sum(len(js.all_EAR_IDs) for js in self.jobscripts)
683
+ run_indices = np.ones((num_runs_tot, 9), dtype=int) * -1
684
+ run_inp_files = defaultdict(
685
+ list
686
+ ) # keys are `run_idx`, values are Paths to copy to run dir
687
+ run_cmd_file_names: dict[int, int | None] = {} # None if no commands to write
688
+ run_idx = 0
689
+
690
+ if status:
691
+ status.update(f"Adding new submission: processing run 1/{num_runs_tot}.")
692
+
693
+ all_runs = cache.runs
694
+ assert all_runs is not None
695
+ runs_ids_by_js = self.all_EARs_IDs_by_jobscript
696
+
697
+ with self.workflow.cached_merged_parameters():
698
+ for js in self.jobscripts:
699
+ js_idx = js.index
700
+ js_run_0 = all_runs[runs_ids_by_js[js.index][0]]
701
+
702
+ if js.resources.combine_scripts:
703
+ # this will be one or more snippet scripts that needs to be combined into
704
+ # one script for the whole jobscript
705
+
706
+ # need to write one script + one commands file for the whole jobscript
707
+
708
+ # env_spec will be the same for all runs of this jobscript:
709
+ combined_env_specs[js_idx] = js_run_0.env_spec
710
+ combined_actions[js_idx] = [
711
+ [j[0:2] for j in i.task_actions] for i in js.blocks
712
+ ]
713
+
714
+ for idx, run_id in enumerate(js.all_EAR_IDs):
715
+ run = all_runs[run_id]
716
+
717
+ run_indices[run_idx] = [
718
+ run.task.insert_ID,
719
+ run.element.id_,
720
+ run.element_iteration.id_,
721
+ run.id_,
722
+ run.element.index,
723
+ run.element_iteration.index,
724
+ run.element_action.action_idx,
725
+ run.index,
726
+ int(run.action.requires_dir),
727
+ ]
728
+ run_idx += 1
729
+
730
+ if status and run_idx % 10 == 0:
731
+ status.update(
732
+ f"Adding new submission: processing run {run_idx}/{num_runs_tot}."
733
+ )
734
+
735
+ if js.resources.combine_scripts:
736
+ if idx == 0:
737
+ # the commands file for a combined jobscript won't have
738
+ # any parameter data in the command line, so should raise
739
+ # if something is found to be unset:
740
+ run.try_write_commands(
741
+ environments=self.environments,
742
+ jobscript=js,
743
+ raise_on_unset=True,
744
+ )
745
+ run_cmd_file_names[run.id_] = None
746
+
747
+ else:
748
+ if run.is_snippet_script:
749
+ actions_by_schema[run.action.task_schema.name][
750
+ run.element_action.action_idx
751
+ ].add(run.env_spec_hashable)
752
+
753
+ if run.action.commands:
754
+ hash_i = run.get_commands_file_hash()
755
+ # TODO: could further reduce number of files in the case the data
756
+ # indices hash is the same: if commands objects are the same and
757
+ # environment objects are the same, then the files will be the
758
+ # same, even if runs come from different task schemas/actions...
759
+ if hash_i not in cmd_hashes:
760
+ try:
761
+ run.try_write_commands(
762
+ environments=self.environments,
763
+ jobscript=js,
764
+ )
765
+ except OutputFileParserNoOutputError:
766
+ # no commands to write, might be used just for saving
767
+ # files
768
+ run_cmd_file_names[run.id_] = None
769
+ cmd_hashes[hash_i].add(run.id_)
770
+ else:
771
+ run_cmd_file_names[run.id_] = None
772
+
773
+ if run.action.requires_dir:
774
+ # TODO: what is type of `path`?
775
+ for name, path in run.get("input_files", {}).items():
776
+ if path:
777
+ run_inp_files[run_idx].append(path)
778
+
779
+ for run_ids in cmd_hashes.values():
780
+ run_ids_srt = sorted(run_ids)
781
+ root_id = run_ids_srt[0] # used for command file name for this group
782
+ # TODO: could store multiple IDs to reduce number of files created
783
+ for run_id_i in run_ids_srt:
784
+ if run_id_i not in run_cmd_file_names:
785
+ run_cmd_file_names[run_id_i] = root_id
786
+
787
+ if status:
788
+ status.update("Adding new submission: writing scripts...")
789
+
790
+ seen: dict[int, Path] = {}
791
+ combined_script_data: dict[
792
+ int, dict[int, list[tuple[str, Path, bool]]]
793
+ ] = defaultdict(lambda: defaultdict(list))
794
+ for task in self.workflow.tasks:
795
+ for schema in task.template.schemas:
796
+ if schema.name in actions_by_schema:
797
+ for idx, action in enumerate(schema.actions):
798
+
799
+ if not action.script:
800
+ continue
801
+
802
+ for env_spec_h in actions_by_schema[schema.name][idx]:
803
+
804
+ env_spec = action.env_spec_from_hashable(env_spec_h)
805
+ name, snip_path, specs = action.get_script_artifact_name(
806
+ env_spec=env_spec,
807
+ act_idx=idx,
808
+ ret_specifiers=True,
809
+ )
810
+ script_hash = action.get_script_determinant_hash(specs)
811
+ script_path = self.scripts_path / name
812
+ prev_path = seen.get(script_hash)
813
+ if script_path == prev_path:
814
+ continue
815
+
816
+ elif prev_path:
817
+ # try to make a symbolic link to the file previously
818
+ # created:
819
+ try:
820
+ script_path.symlink_to(prev_path.name)
821
+ except OSError:
822
+ # windows requires admin permission, copy instead:
823
+ shutil.copy(prev_path, script_path)
824
+ else:
825
+ # write script to disk:
826
+ source_str = action.compose_source(snip_path)
827
+ if source_str:
828
+ with script_path.open("wt", newline="\n") as fp:
829
+ fp.write(source_str)
830
+ seen[script_hash] = script_path
831
+
832
+ # combined script stuff
833
+ for js_idx, act_IDs in combined_actions.items():
834
+ for block_idx, act_IDs_i in enumerate(act_IDs):
835
+ for task_iID, act_idx in act_IDs_i:
836
+ task = self.workflow.tasks.get(insert_ID=task_iID)
837
+ schema = task.template.schemas[0] # TODO: multiple schemas
838
+ action = schema.actions[act_idx]
839
+ func_name, snip_path = action.get_script_artifact_name(
840
+ env_spec=combined_env_specs[js_idx],
841
+ act_idx=act_idx,
842
+ ret_specifiers=False,
843
+ include_suffix=False,
844
+ specs_suffix_delim="_", # can't use "." in function name
845
+ )
846
+ combined_script_data[js_idx][block_idx].append(
847
+ (func_name, snip_path, action.requires_dir)
848
+ )
849
+
850
+ for js_idx, action_scripts in combined_script_data.items():
851
+ js = self.jobscripts[js_idx]
852
+
853
+ script_str, script_indices, num_elems, num_acts = js.compose_combined_script(
854
+ [i for _, i in sorted(action_scripts.items())]
855
+ )
856
+ js.write_script_indices_file(script_indices, num_elems, num_acts)
857
+
858
+ script_path = self.scripts_path / f"js_{js_idx}.py" # TODO: refactor name
859
+ with script_path.open("wt", newline="\n") as fp:
860
+ fp.write(script_str)
861
+
862
+ return run_cmd_file_names, run_indices, run_inp_files
863
+
864
+ @TimeIt.decorator
865
+ def _calculate_run_dir_indices(
866
+ self,
867
+ run_indices: np.ndarray,
868
+ cache: ObjectCache,
869
+ ) -> tuple[np.ndarray, np.ndarray]:
870
+
871
+ assert cache.elements is not None
872
+ assert cache.iterations is not None
873
+ # get the multiplicities of all tasks, elements, iterations, and runs:
874
+ wk_num_tasks = self.workflow.num_tasks
875
+ task_num_elems = {}
876
+ elem_num_iters = {}
877
+ iter_num_acts = {}
878
+ iter_acts_num_runs = {}
879
+ for task in self.workflow.tasks:
880
+ elem_IDs = task.element_IDs
881
+ task_num_elems[task.insert_ID] = len(elem_IDs)
882
+ for elem_ID in elem_IDs:
883
+ iter_IDs = cache.elements[elem_ID].iteration_IDs
884
+ elem_num_iters[elem_ID] = len(iter_IDs)
885
+ for iter_ID in iter_IDs:
886
+ run_IDs = cache.iterations[iter_ID].EAR_IDs
887
+ if run_IDs: # the schema might have no actions
888
+ iter_num_acts[iter_ID] = len(run_IDs)
889
+ for act_idx, act_run_IDs in run_IDs.items():
890
+ iter_acts_num_runs[(iter_ID, act_idx)] = len(act_run_IDs)
891
+ else:
892
+ iter_num_acts[iter_ID] = 0
893
+
894
+ max_u8 = np.iinfo(np.uint8).max
895
+ max_u32 = np.iinfo(np.uint32).max
896
+ MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
897
+ MAX_ITERS_PER_DIR = 1000
898
+ requires_dir_idx = np.where(run_indices[:, -1] == 1)[0]
899
+ run_dir_arr = np.empty(requires_dir_idx.size, dtype=RUN_DIR_ARR_DTYPE)
900
+ run_ids = np.empty(requires_dir_idx.size, dtype=int)
901
+
902
+ elem_depths: dict[int, int] = {}
903
+ iter_depths: dict[int, int] = {}
904
+ for idx in range(requires_dir_idx.size):
905
+ row = run_indices[requires_dir_idx[idx]]
906
+ t_iID, e_id, i_id, r_id, e_idx, i_idx, a_idx, r_idx = row[:-1]
907
+ run_ids[idx] = r_id
908
+
909
+ num_elems_i = task_num_elems[t_iID]
910
+ num_iters_i = elem_num_iters[e_id]
911
+ num_acts_i = iter_num_acts[i_id] # see TODO below
912
+ num_runs_i = iter_acts_num_runs[(i_id, a_idx)]
913
+
914
+ e_depth = 1
915
+ if num_elems_i == 1:
916
+ e_idx = max_u32
917
+ elif num_elems_i > MAX_ELEMS_PER_DIR:
918
+ if (e_depth := elem_depths.get(t_iID, -1)) == -1:
919
+ e_depth = int(
920
+ np.ceil(np.log(num_elems_i) / np.log(MAX_ELEMS_PER_DIR))
921
+ )
922
+ elem_depths[t_iID] = e_depth
923
+
924
+ # TODO: i_idx should be either MAX or the iteration ID, which will index into
925
+ # a separate array to get the formatted loop indices e.g.
926
+ # ("outer_loop_0_inner_loop_9")
927
+ i_depth = 1
928
+ if num_iters_i == 1:
929
+ i_idx = max_u32
930
+ elif num_iters_i > MAX_ITERS_PER_DIR:
931
+ if (i_depth := iter_depths.get(e_id, -1)) == -1:
932
+ i_depth = int(
933
+ np.ceil(np.log(num_iters_i) / np.log(MAX_ITERS_PER_DIR))
934
+ )
935
+ iter_depths[e_id] = i_depth
936
+
937
+ a_idx = max_u8 # TODO: for now, always exclude action index dir
938
+
939
+ if num_runs_i == 1:
940
+ r_idx = max_u8
941
+
942
+ if wk_num_tasks == 1:
943
+ t_iID = max_u8
944
+
945
+ run_dir_arr[idx] = (t_iID, e_idx, i_idx, a_idx, r_idx, e_depth, i_depth)
946
+
947
+ return run_dir_arr, run_ids
948
+
949
+ @TimeIt.decorator
950
+ def _write_execute_dirs(
951
+ self,
952
+ run_indices: NDArray,
953
+ run_inp_files: dict[int, list[Path]],
954
+ cache: ObjectCache,
955
+ status: Status | None = None,
956
+ ):
957
+
958
+ if status:
959
+ status.update("Adding new submission: resolving execution directories...")
960
+
961
+ run_dir_arr, run_idx = self._calculate_run_dir_indices(run_indices, cache)
962
+
963
+ # set run dirs in persistent array:
964
+ if run_idx.size:
965
+ self.workflow._store.set_run_dirs(run_dir_arr, run_idx)
966
+
967
+ # retrieve run directories as paths. array is not yet commited, so pass in
968
+ # directly:
969
+ run_dirs = self.workflow.get_run_directories(dir_indices_arr=run_dir_arr)
970
+
971
+ if status:
972
+ status.update("Adding new submission: making execution directories...")
973
+
974
+ # make directories
975
+ for idx, run_dir in enumerate(run_dirs):
976
+ assert run_dir
977
+ run_dir.mkdir(parents=True, exist_ok=True)
978
+ inp_files_i = run_inp_files.get(run_idx[idx])
979
+ if inp_files_i:
980
+ # copy (TODO: optionally symlink) any input files:
981
+ for path_i in inp_files_i:
982
+ shutil.copy(path_i, run_dir)
389
983
 
390
984
  @staticmethod
391
985
  def get_unique_schedulers_of_jobscripts(
@@ -393,7 +987,7 @@ class Submission(JSONLike):
393
987
  ) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
394
988
  """Get unique schedulers and which of the passed jobscripts they correspond to.
395
989
 
396
- Uniqueness is determines only by the `QueuedScheduler.unique_properties` tuple.
990
+ Uniqueness is determined only by the `QueuedScheduler.unique_properties` tuple.
397
991
 
398
992
  Parameters
399
993
  ----------
@@ -463,13 +1057,90 @@ class Submission(JSONLike):
463
1057
 
464
1058
  return zip(map(tuple, js_idx), shells)
465
1059
 
466
- def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
467
- self._submission_parts[submit_time] = submitted_js_idx
468
- self.workflow._store.add_submission_part(
1060
+ def _update_at_submit_metadata(self, submission_parts: dict[str, list[int]]):
1061
+ """Update persistent store and in-memory record of at-submit metadata.
1062
+
1063
+ Notes
1064
+ -----
1065
+ Currently there is only one type of at-submit metadata, which is the
1066
+ submission-parts: a mapping between a string submit-time, and the list of
1067
+ jobscript indices that were submitted at that submit-time. This method updates
1068
+ the recorded submission parts to include those passed here.
1069
+
1070
+ """
1071
+
1072
+ self.workflow._store.update_at_submit_metadata(
469
1073
  sub_idx=self.index,
470
- dt_str=submit_time,
471
- submitted_js_idx=submitted_js_idx,
1074
+ submission_parts=submission_parts,
1075
+ )
1076
+
1077
+ self._at_submit_metadata["submission_parts"].update(submission_parts)
1078
+
1079
+ # cache is now invalid:
1080
+ self._submission_parts_lst = None
1081
+
1082
+ def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
1083
+ self._update_at_submit_metadata(submission_parts={submit_time: submitted_js_idx})
1084
+
1085
+ def get_jobscript_functions_name(self, shell: Shell, shell_idx: int) -> str:
1086
+ """Get the name of the jobscript functions file for the specified shell."""
1087
+ return f"js_funcs_{shell_idx}{shell.JS_EXT}"
1088
+
1089
+ def get_jobscript_functions_path(self, shell: Shell, shell_idx: int) -> Path:
1090
+ """Get the path of the jobscript functions file for the specified shell."""
1091
+ return self.js_funcs_path / self.get_jobscript_functions_name(shell, shell_idx)
1092
+
1093
+ def _compose_functions_file(self, shell: Shell) -> str:
1094
+ """Prepare the contents of the jobscript functions file for the specified
1095
+ shell.
1096
+
1097
+ Notes
1098
+ -----
1099
+ The functions file includes, at a minimum, a shell function that invokes the app
1100
+ with provided arguments. This file will be sourced/invoked within all jobscripts
1101
+ and command files that share the specified shell.
1102
+
1103
+ """
1104
+
1105
+ cfg_invocation = self._app.config._file.get_invocation(
1106
+ self._app.config._config_key
472
1107
  )
1108
+ env_setup = cfg_invocation["environment_setup"]
1109
+ if env_setup:
1110
+ env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
1111
+ env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
1112
+ else:
1113
+ env_setup = shell.JS_ENV_SETUP_INDENT
1114
+ app_invoc = list(self._app.run_time_info.invocation_command)
1115
+
1116
+ app_caps = self._app.package_name.upper()
1117
+ func_file_args = shell.process_JS_header_args( # TODO: rename?
1118
+ {
1119
+ "workflow_app_alias": self.WORKFLOW_APP_ALIAS,
1120
+ "env_setup": env_setup,
1121
+ "app_invoc": app_invoc,
1122
+ "app_caps": app_caps,
1123
+ "config_dir": str(self._app.config.config_directory),
1124
+ "config_invoc_key": self._app.config.config_key,
1125
+ }
1126
+ )
1127
+ out = shell.JS_FUNCS.format(**func_file_args)
1128
+ return out
1129
+
1130
+ def _write_functions_file(self, shell: Shell, shell_idx: int) -> None:
1131
+ """Write the jobscript functions file for the specified shell.
1132
+
1133
+ Notes
1134
+ -----
1135
+ The functions file includes, at a minimum, a shell function that invokes the app
1136
+ with provided arguments. This file will be sourced/invoked within all jobscripts
1137
+ and command files that share the specified shell.
1138
+
1139
+ """
1140
+ js_funcs_str = self._compose_functions_file(shell)
1141
+ path = self.get_jobscript_functions_path(shell, shell_idx)
1142
+ with path.open("wt", newline="\n") as fp:
1143
+ fp.write(js_funcs_str)
473
1144
 
474
1145
  @TimeIt.decorator
475
1146
  def submit(
@@ -481,24 +1152,9 @@ class Submission(JSONLike):
481
1152
  ) -> list[int]:
482
1153
  """Generate and submit the jobscripts of this submission."""
483
1154
 
484
- # if JS_parallelism explicitly requested but store doesn't support, raise:
485
- supports_JS_para = self.workflow._store._features.jobscript_parallelism
486
- if self.JS_parallelism:
487
- if not supports_JS_para:
488
- if status:
489
- status.stop()
490
- raise ValueError(
491
- f"Store type {self.workflow._store!r} does not support jobscript "
492
- f"parallelism."
493
- )
494
- elif self.JS_parallelism is None:
495
- self._JS_parallelism = supports_JS_para
496
-
497
- # set os_name and shell_name for each jobscript:
498
- for js in self.jobscripts:
499
- js._set_os_name()
500
- js._set_shell_name()
501
- js._set_scheduler_name()
1155
+ # TODO: support passing list of jobscript indices to submit; this will allow us
1156
+ # to test a submision with multiple "submission parts". would also need to check
1157
+ # dependencies if this customised list is passed
502
1158
 
503
1159
  outstanding = self.outstanding_jobscripts
504
1160
 
@@ -516,7 +1172,8 @@ class Submission(JSONLike):
516
1172
  if js_idx in outstanding:
517
1173
  js_vers_info.setdefault(js_idx, {}).update(vers_info)
518
1174
 
519
- for js_indices_2, shell in self.get_unique_shells():
1175
+ js_shell_indices = {}
1176
+ for shell_idx, (js_indices_2, shell) in enumerate(self.get_unique_shells()):
520
1177
  try:
521
1178
  vers_info = shell.get_version_info()
522
1179
  except Exception:
@@ -526,22 +1183,22 @@ class Submission(JSONLike):
526
1183
  for js_idx in js_indices_2:
527
1184
  if js_idx in outstanding:
528
1185
  js_vers_info.setdefault(js_idx, {}).update(vers_info)
1186
+ js_shell_indices[js_idx] = shell_idx
529
1187
 
1188
+ # write a file containing useful shell functions:
1189
+ self._write_functions_file(shell, shell_idx)
1190
+
1191
+ hostname = socket.gethostname()
1192
+ machine = self._app.config.get("machine")
530
1193
  for js_idx, vers_info_i in js_vers_info.items():
531
- self.jobscripts[js_idx]._set_version_info(vers_info_i)
1194
+ js = self.jobscripts[js_idx]
1195
+ js._set_version_info(vers_info_i)
1196
+ js._set_submit_hostname(hostname)
1197
+ js._set_submit_machine(machine)
1198
+ js._set_shell_idx(js_shell_indices[js_idx])
532
1199
 
533
- # for direct submission, it's important that os_name/shell_name/scheduler_name
534
- # are made persistent now, because `Workflow.write_commands`, which might be
535
- # invoked in a new process before submission has completed, needs to know these:
536
1200
  self.workflow._store._pending.commit_all()
537
1201
 
538
- # TODO: a submission should only be "submitted" once shouldn't it?
539
- # no; there could be an IO error (e.g. internet connectivity), so might
540
- # need to be able to reattempt submission of outstanding jobscripts.
541
- self.path.mkdir(exist_ok=True)
542
- if not self.abort_EARs_file_path.is_file():
543
- self._write_abort_EARs_file()
544
-
545
1202
  # map jobscript `index` to (scheduler job ID or process ID, is_array):
546
1203
  scheduler_refs: dict[int, tuple[str, bool]] = {}
547
1204
  submitted_js_idx: list[int] = []
@@ -553,14 +1210,20 @@ class Submission(JSONLike):
553
1210
 
554
1211
  # check all dependencies were submitted now or previously:
555
1212
  if not all(
556
- i in submitted_js_idx or i in self.submitted_jobscripts
557
- for i in js.dependencies
1213
+ js_idx in submitted_js_idx or js_idx in self.submitted_jobscripts
1214
+ for js_idx, _ in js.dependencies
558
1215
  ):
1216
+ warnings.warn(
1217
+ f"Cannot submit jobscript index {js.index} since not all of its "
1218
+ f"dependencies have been submitted: {js.dependencies!r}"
1219
+ )
559
1220
  continue
560
1221
 
561
1222
  try:
562
1223
  if status:
563
- status.update(f"Submitting jobscript {js.index}...")
1224
+ status.update(
1225
+ f"Submitting jobscript {js.index + 1}/{len(self.jobscripts)}..."
1226
+ )
564
1227
  js_ref_i = js.submit(scheduler_refs, print_stdout=print_stdout)
565
1228
  scheduler_refs[js.index] = (js_ref_i, js.is_array)
566
1229
  submitted_js_idx.append(js.index)
@@ -569,12 +1232,18 @@ class Submission(JSONLike):
569
1232
  errs.append(err)
570
1233
  continue
571
1234
 
1235
+ # TODO: some way to handle KeyboardInterrupt during submission?
1236
+ # - stop, and cancel already submitted?
1237
+
572
1238
  if submitted_js_idx:
573
1239
  dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
574
1240
  self._append_submission_part(
575
1241
  submit_time=dt_str,
576
1242
  submitted_js_idx=submitted_js_idx,
577
1243
  )
1244
+ # ensure `_submission_parts` is committed
1245
+ self.workflow._store._pending.commit_all()
1246
+
578
1247
  # add a record of the submission part to the known-submissions file
579
1248
  if add_to_known:
580
1249
  self._app._add_to_known_submissions(
@@ -606,11 +1275,75 @@ class Submission(JSONLike):
606
1275
  # filter by active jobscripts:
607
1276
  if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
608
1277
  print(
609
- f"Cancelling jobscripts {js_idx!r} of submission {self.index} of "
610
- f"workflow {self.workflow.name!r}."
1278
+ f"Cancelling jobscripts {shorten_list_str(js_idx, items=5)} of "
1279
+ f"submission {self.index} of workflow {self.workflow.name!r}."
611
1280
  )
612
1281
  jobscripts = [self.jobscripts[i] for i in js_idx]
613
1282
  sched_refs = [js.scheduler_js_ref for js in jobscripts]
614
1283
  sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
615
1284
  else:
616
1285
  print("No active jobscripts to cancel.")
1286
+
1287
+ @TimeIt.decorator
1288
+ def get_scheduler_job_IDs(self) -> tuple[str, ...]:
1289
+ """Return jobscript scheduler job IDs."""
1290
+ return tuple(
1291
+ js_i.scheduler_job_ID
1292
+ for js_i in self.jobscripts
1293
+ if js_i.scheduler_job_ID is not None
1294
+ )
1295
+
1296
+ @TimeIt.decorator
1297
+ def get_process_IDs(self) -> tuple[int, ...]:
1298
+ """Return jobscript process IDs."""
1299
+ return tuple(
1300
+ js_i.process_ID for js_i in self.jobscripts if js_i.process_ID is not None
1301
+ )
1302
+
1303
+ @TimeIt.decorator
1304
+ def list_jobscripts(
1305
+ self,
1306
+ max_js: int | None = None,
1307
+ jobscripts: list[int] | None = None,
1308
+ width: int | None = None,
1309
+ ) -> None:
1310
+ """Print a table listing jobscripts and associated information.
1311
+
1312
+ Parameters
1313
+ ----------
1314
+ max_js
1315
+ Maximum jobscript index to display. This cannot be specified with `jobscripts`.
1316
+ jobscripts
1317
+ A list of jobscripts to display. This cannot be specified with `max_js`.
1318
+ width
1319
+ Width in characters of the printed table.
1320
+
1321
+ """
1322
+ self.workflow.list_jobscripts(
1323
+ sub_idx=self.index, max_js=max_js, jobscripts=jobscripts, width=width
1324
+ )
1325
+
1326
+ @TimeIt.decorator
1327
+ def list_task_jobscripts(
1328
+ self,
1329
+ task_names: list[str] | None = None,
1330
+ max_js: int | None = None,
1331
+ width: int | None = None,
1332
+ ) -> None:
1333
+ """Print a table listing the jobscripts associated with the specified (or all)
1334
+ tasks for the specified submission.
1335
+
1336
+ Parameters
1337
+ ----------
1338
+ task_names
1339
+ List of sub-strings to match to task names. Only matching task names will be
1340
+ included.
1341
+ max_js
1342
+ Maximum jobscript index to display.
1343
+ width
1344
+ Width in characters of the printed table.
1345
+
1346
+ """
1347
+ self.workflow.list_task_jobscripts(
1348
+ sub_idx=self.index, max_js=max_js, task_names=task_names, width=width
1349
+ )