hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
  2. hpcflow/_version.py +1 -1
  3. hpcflow/data/scripts/bad_script.py +2 -0
  4. hpcflow/data/scripts/do_nothing.py +2 -0
  5. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  6. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  7. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  8. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  11. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  12. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  13. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  15. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  16. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  23. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  24. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  25. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  26. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  27. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  28. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  29. hpcflow/data/scripts/script_exit_test.py +5 -0
  30. hpcflow/data/template_components/environments.yaml +1 -1
  31. hpcflow/sdk/__init__.py +5 -0
  32. hpcflow/sdk/app.py +166 -92
  33. hpcflow/sdk/cli.py +263 -84
  34. hpcflow/sdk/cli_common.py +99 -5
  35. hpcflow/sdk/config/callbacks.py +38 -1
  36. hpcflow/sdk/config/config.py +102 -13
  37. hpcflow/sdk/config/errors.py +19 -5
  38. hpcflow/sdk/config/types.py +3 -0
  39. hpcflow/sdk/core/__init__.py +25 -1
  40. hpcflow/sdk/core/actions.py +914 -262
  41. hpcflow/sdk/core/cache.py +76 -34
  42. hpcflow/sdk/core/command_files.py +14 -128
  43. hpcflow/sdk/core/commands.py +35 -6
  44. hpcflow/sdk/core/element.py +122 -50
  45. hpcflow/sdk/core/errors.py +58 -2
  46. hpcflow/sdk/core/execute.py +207 -0
  47. hpcflow/sdk/core/loop.py +408 -50
  48. hpcflow/sdk/core/loop_cache.py +4 -4
  49. hpcflow/sdk/core/parameters.py +382 -37
  50. hpcflow/sdk/core/run_dir_files.py +13 -40
  51. hpcflow/sdk/core/skip_reason.py +7 -0
  52. hpcflow/sdk/core/task.py +119 -30
  53. hpcflow/sdk/core/task_schema.py +68 -0
  54. hpcflow/sdk/core/test_utils.py +66 -27
  55. hpcflow/sdk/core/types.py +54 -1
  56. hpcflow/sdk/core/utils.py +136 -19
  57. hpcflow/sdk/core/workflow.py +1587 -356
  58. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  59. hpcflow/sdk/demo/cli.py +7 -0
  60. hpcflow/sdk/helper/cli.py +1 -0
  61. hpcflow/sdk/log.py +42 -15
  62. hpcflow/sdk/persistence/base.py +405 -53
  63. hpcflow/sdk/persistence/json.py +177 -52
  64. hpcflow/sdk/persistence/pending.py +237 -69
  65. hpcflow/sdk/persistence/store_resource.py +3 -2
  66. hpcflow/sdk/persistence/types.py +15 -4
  67. hpcflow/sdk/persistence/zarr.py +928 -81
  68. hpcflow/sdk/submission/jobscript.py +1408 -489
  69. hpcflow/sdk/submission/schedulers/__init__.py +40 -5
  70. hpcflow/sdk/submission/schedulers/direct.py +33 -19
  71. hpcflow/sdk/submission/schedulers/sge.py +51 -16
  72. hpcflow/sdk/submission/schedulers/slurm.py +44 -16
  73. hpcflow/sdk/submission/schedulers/utils.py +7 -2
  74. hpcflow/sdk/submission/shells/base.py +68 -20
  75. hpcflow/sdk/submission/shells/bash.py +222 -129
  76. hpcflow/sdk/submission/shells/powershell.py +200 -150
  77. hpcflow/sdk/submission/submission.py +852 -119
  78. hpcflow/sdk/submission/types.py +18 -21
  79. hpcflow/sdk/typing.py +24 -5
  80. hpcflow/sdk/utils/arrays.py +71 -0
  81. hpcflow/sdk/utils/deferred_file.py +55 -0
  82. hpcflow/sdk/utils/hashing.py +16 -0
  83. hpcflow/sdk/utils/patches.py +12 -0
  84. hpcflow/sdk/utils/strings.py +33 -0
  85. hpcflow/tests/api/test_api.py +32 -0
  86. hpcflow/tests/conftest.py +19 -0
  87. hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
  88. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  89. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  90. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  91. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  92. hpcflow/tests/scripts/test_main_scripts.py +821 -70
  93. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  94. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  95. hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
  96. hpcflow/tests/unit/test_action.py +176 -0
  97. hpcflow/tests/unit/test_app.py +20 -0
  98. hpcflow/tests/unit/test_cache.py +46 -0
  99. hpcflow/tests/unit/test_cli.py +133 -0
  100. hpcflow/tests/unit/test_config.py +122 -1
  101. hpcflow/tests/unit/test_element_iteration.py +47 -0
  102. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  103. hpcflow/tests/unit/test_loop.py +1332 -27
  104. hpcflow/tests/unit/test_meta_task.py +325 -0
  105. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  106. hpcflow/tests/unit/test_parameter.py +13 -0
  107. hpcflow/tests/unit/test_persistence.py +190 -8
  108. hpcflow/tests/unit/test_run.py +109 -3
  109. hpcflow/tests/unit/test_run_directories.py +29 -0
  110. hpcflow/tests/unit/test_shell.py +20 -0
  111. hpcflow/tests/unit/test_submission.py +5 -76
  112. hpcflow/tests/unit/test_workflow_template.py +31 -0
  113. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  114. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  115. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  116. hpcflow/tests/unit/utils/test_patches.py +5 -0
  117. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  118. hpcflow/tests/workflows/__init__.py +0 -0
  119. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  120. hpcflow/tests/workflows/test_jobscript.py +332 -0
  121. hpcflow/tests/workflows/test_run_status.py +198 -0
  122. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  123. hpcflow/tests/workflows/test_submission.py +140 -0
  124. hpcflow/tests/workflows/test_workflows.py +142 -2
  125. hpcflow/tests/workflows/test_zip.py +18 -0
  126. hpcflow/viz_demo.ipynb +6587 -3
  127. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/METADATA +7 -4
  128. hpcflow_new2-0.2.0a200.dist-info/RECORD +222 -0
  129. hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
  130. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/LICENSE +0 -0
  131. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/WHEEL +0 -0
  132. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/entry_points.txt +0 -0
@@ -30,6 +30,13 @@ class Scheduler(ABC, Generic[JSRefType], AppAware):
30
30
  """
31
31
  Abstract base class for schedulers.
32
32
 
33
+ Note
34
+ ----
35
+ Do not make immediate subclasses of this class other than
36
+ :py:class:`DirectScheduler` and :py:class:`QueuedScheduler`;
37
+ subclass those two instead. Code (e.g., in :py:class:`Jobscript`)
38
+ assumes that this model is followed and does not check it.
39
+
33
40
  Parameters
34
41
  ----------
35
42
  shell_args: str
@@ -119,8 +126,8 @@ class Scheduler(ABC, Generic[JSRefType], AppAware):
119
126
 
120
127
  @abstractmethod
121
128
  def get_job_state_info(
122
- self, *, js_refs: Sequence[JSRefType] | None = None, num_js_elements: int = 0
123
- ) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
129
+ self, *, js_refs: Sequence[JSRefType] | None = None
130
+ ) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
124
131
  """
125
132
  Get the state of one or more jobscripts.
126
133
  """
@@ -136,12 +143,23 @@ class Scheduler(ABC, Generic[JSRefType], AppAware):
136
143
  self,
137
144
  js_refs: list[JSRefType],
138
145
  jobscripts: list[Jobscript] | None = None,
139
- num_js_elements: int = 0, # Ignored!
140
146
  ) -> None:
141
147
  """
142
148
  Cancel one or more jobscripts.
143
149
  """
144
150
 
151
+ @abstractmethod
152
+ def get_std_out_err_filename(self, js_idx: int, *args, **kwargs) -> str:
153
+ """File name of combined standard output and error streams."""
154
+
155
+ @abstractmethod
156
+ def get_stdout_filename(self, js_idx: int, *args, **kwargs) -> str:
157
+ """File name of the standard output stream file."""
158
+
159
+ @abstractmethod
160
+ def get_stderr_filename(self, js_idx: int, *args, **kwargs) -> str:
161
+ """File name of the standard error stream file."""
162
+
145
163
 
146
164
  @hydrate
147
165
  class QueuedScheduler(Scheduler[str]):
@@ -226,7 +244,6 @@ class QueuedScheduler(Scheduler[str]):
226
244
  """
227
245
  while js_refs:
228
246
  info: Mapping[str, Any] = self.get_job_state_info(js_refs=js_refs)
229
- print(info)
230
247
  if not info:
231
248
  break
232
249
  js_refs = list(info)
@@ -234,8 +251,26 @@ class QueuedScheduler(Scheduler[str]):
234
251
 
235
252
  @abstractmethod
236
253
  def format_options(
237
- self, resources: ElementResources, num_elements: int, is_array: bool, sub_idx: int
254
+ self,
255
+ resources: ElementResources,
256
+ num_elements: int,
257
+ is_array: bool,
258
+ sub_idx: int,
259
+ js_idx: int,
238
260
  ) -> str:
239
261
  """
240
262
  Render options in a way that the scheduler can handle.
241
263
  """
264
+
265
+ def get_std_out_err_filename(
266
+ self, js_idx: int, job_ID: str, array_idx: int | None = None
267
+ ):
268
+ """File name of combined standard output and error streams.
269
+
270
+ Notes
271
+ -----
272
+ We use the standard output stream filename format for the combined output and
273
+ error streams file.
274
+
275
+ """
276
+ return self.get_stdout_filename(js_idx=js_idx, job_ID=job_ID, array_idx=array_idx)
@@ -23,6 +23,20 @@ if TYPE_CHECKING:
23
23
  DirectRef: TypeAlias = "tuple[int, list[str]]"
24
24
 
25
25
 
26
+ def _is_process_cmdline_equal(proc: psutil.Process, cmdline: list[str]) -> bool:
27
+ """Check if the `cmdline` of a psutil `Process` is equal to the specified
28
+ `cmdline`."""
29
+ try:
30
+ if proc.cmdline() == cmdline:
31
+ return True
32
+ else:
33
+ return False
34
+ except (psutil.NoSuchProcess, psutil.ZombieProcess):
35
+ # process no longer exists or, on unix, process has completed but still has a
36
+ # record
37
+ return False
38
+
39
+
26
40
  class DirectScheduler(Scheduler[DirectRef]):
27
41
  """
28
42
  A direct scheduler, that just runs jobs immediately as direct subprocesses.
@@ -95,8 +109,7 @@ class DirectScheduler(Scheduler[DirectRef]):
95
109
  except psutil.NoSuchProcess:
96
110
  # process might have completed already
97
111
  continue
98
- if proc_i.cmdline() == p_cmdline:
99
- # additional check this is the same process that we submitted
112
+ if _is_process_cmdline_equal(proc_i, p_cmdline):
100
113
  procs.append(proc_i)
101
114
  return procs
102
115
 
@@ -131,23 +144,18 @@ class DirectScheduler(Scheduler[DirectRef]):
131
144
 
132
145
  @override
133
146
  def get_job_state_info(
134
- self,
135
- *,
136
- js_refs: Sequence[DirectRef] | None = None,
137
- num_js_elements: int = 0,
138
- ) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
147
+ self, *, js_refs: Sequence[DirectRef] | None = None
148
+ ) -> Mapping[str, JobscriptElementState]:
139
149
  """Query the scheduler to get the states of all of this user's jobs, optionally
140
150
  filtering by specified job IDs.
141
151
 
142
152
  Jobs that are not in the scheduler's status output will not appear in the output
143
153
  of this method."""
144
- info: dict[str, Mapping[int | None, JobscriptElementState]] = {}
154
+ info: dict[str, JobscriptElementState] = {}
145
155
  for p_id, p_cmdline in js_refs or ():
146
156
  if self.is_jobscript_active(p_id, p_cmdline):
147
157
  # as far as the "scheduler" is concerned, all elements are running:
148
- info[str(p_id)] = {
149
- i: JobscriptElementState.running for i in range(num_js_elements)
150
- }
158
+ info[str(p_id)] = JobscriptElementState.running
151
159
 
152
160
  return info
153
161
 
@@ -156,7 +164,6 @@ class DirectScheduler(Scheduler[DirectRef]):
156
164
  self,
157
165
  js_refs: list[DirectRef],
158
166
  jobscripts: list[Jobscript] | None = None,
159
- num_js_elements: int = 0, # Ignored!
160
167
  ):
161
168
  """
162
169
  Cancel some jobs.
@@ -166,18 +173,13 @@ class DirectScheduler(Scheduler[DirectRef]):
166
173
 
167
174
  def callback(proc: psutil.Process):
168
175
  try:
169
- js = js_proc_id[proc.pid]
176
+ js_proc_id[proc.pid]
170
177
  except KeyError:
171
178
  # child process of one of the jobscripts
172
179
  self._app.submission_logger.debug(
173
180
  f"jobscript child process ({proc.pid}) killed"
174
181
  )
175
182
  return
176
- assert hasattr(proc, "returncode")
177
- print(
178
- f"Jobscript {js.index} from submission {js.submission.index} "
179
- f"terminated (user-initiated cancel) with exit code {proc.returncode}."
180
- )
181
183
 
182
184
  procs = self.__get_jobscript_processes(js_refs)
183
185
  self._app.submission_logger.info(
@@ -185,6 +187,7 @@ class DirectScheduler(Scheduler[DirectRef]):
185
187
  )
186
188
  js_proc_id = {i.pid: jobscripts[idx] for idx, i in enumerate(procs) if jobscripts}
187
189
  self.__kill_processes(procs, timeout=3, on_terminate=callback)
190
+ print(f"Cancelled {len(procs)} jobscript{'s' if len(procs) > 1 else ''}.")
188
191
  self._app.submission_logger.info("jobscripts cancel command executed.")
189
192
 
190
193
  def is_jobscript_active(self, process_ID: int, process_cmdline: list[str]):
@@ -198,8 +201,19 @@ class DirectScheduler(Scheduler[DirectRef]):
198
201
  proc = psutil.Process(process_ID)
199
202
  except psutil.NoSuchProcess:
200
203
  return False
204
+ return _is_process_cmdline_equal(proc, process_cmdline)
205
+
206
+ def get_std_out_err_filename(self, js_idx: int, **kwargs) -> str:
207
+ """File name of combined standard output and error streams."""
208
+ return f"js_{js_idx}_std.log"
209
+
210
+ def get_stdout_filename(self, js_idx: int, **kwargs) -> str:
211
+ """File name of the standard output stream file."""
212
+ return f"js_{js_idx}_stdout.log"
201
213
 
202
- return proc.cmdline() == process_cmdline
214
+ def get_stderr_filename(self, js_idx: int, **kwargs) -> str:
215
+ """File name of the standard error stream file."""
216
+ return f"js_{js_idx}_stderr.log"
203
217
 
204
218
 
205
219
  @hydrate
@@ -5,7 +5,7 @@ An interface to SGE.
5
5
  from __future__ import annotations
6
6
  from collections.abc import Sequence
7
7
  import re
8
- from typing import TYPE_CHECKING
8
+ from typing import cast, TYPE_CHECKING
9
9
  from typing_extensions import override
10
10
  from hpcflow.sdk.typing import hydrate
11
11
  from hpcflow.sdk.core.errors import (
@@ -131,7 +131,7 @@ class SGEPosix(QueuedScheduler):
131
131
  if resources.SGE_parallel_env is not None:
132
132
  # check user-specified `parallel_env` is valid and compatible with
133
133
  # `num_cores`:
134
- if resources.num_cores and resources.num_cores > 1:
134
+ if resources.num_cores and resources.num_cores == 1:
135
135
  raise ValueError(
136
136
  f"An SGE parallel environment should not be specified if `num_cores` "
137
137
  f"is 1 (`SGE_parallel_env` was specified as "
@@ -174,16 +174,42 @@ class SGEPosix(QueuedScheduler):
174
174
  def __format_array_request(self, num_elements: int) -> str:
175
175
  return f"{self.js_cmd} {self.array_switch} 1-{num_elements}"
176
176
 
177
+ def get_stdout_filename(
178
+ self, js_idx: int, job_ID: str, array_idx: int | None = None
179
+ ) -> str:
180
+ """File name of the standard output stream file."""
181
+ # TODO: untested, might not work!
182
+ array_idx_str = f".{array_idx}" if array_idx is not None else ""
183
+ return f"js_{js_idx}.sh.o{job_ID}{array_idx_str}"
184
+
185
+ def get_stderr_filename(
186
+ self, js_idx: int, job_ID: str, array_idx: int | None = None
187
+ ) -> str:
188
+ """File name of the standard error stream file."""
189
+ # TODO: untested, might not work!
190
+ array_idx_str = f".{array_idx}" if array_idx is not None else ""
191
+ return f"js_{js_idx}.sh.e{job_ID}{array_idx_str}"
192
+
177
193
  def __format_std_stream_file_option_lines(
178
- self, is_array: bool, sub_idx: int
194
+ self, is_array: bool, sub_idx: int, js_idx: int, combine_std: bool
179
195
  ) -> Iterator[str]:
180
- # note: we can't modify the file names
181
- yield f"{self.js_cmd} -o ./artifacts/submissions/{sub_idx}"
182
- yield f"{self.js_cmd} -e ./artifacts/submissions/{sub_idx}"
196
+ # note: if we modify the file names, there is, I believe, no way to include the
197
+ # job ID; so we don't modify the file names:
198
+ base = f"./artifacts/submissions/{sub_idx}/js_std/{js_idx}"
199
+ yield f"{self.js_cmd} -o {base}"
200
+ if combine_std:
201
+ yield f"{self.js_cmd} -j y" # redirect stderr to stdout
202
+ else:
203
+ yield f"{self.js_cmd} -e {base}"
183
204
 
184
205
  @override
185
206
  def format_options(
186
- self, resources: ElementResources, num_elements: int, is_array: bool, sub_idx: int
207
+ self,
208
+ resources: ElementResources,
209
+ num_elements: int,
210
+ is_array: bool,
211
+ sub_idx: int,
212
+ js_idx: int,
187
213
  ) -> str:
188
214
  """
189
215
  Format the options to the jobscript command.
@@ -194,7 +220,11 @@ class SGEPosix(QueuedScheduler):
194
220
  if is_array:
195
221
  opts.append(self.__format_array_request(num_elements))
196
222
 
197
- opts.extend(self.__format_std_stream_file_option_lines(is_array, sub_idx))
223
+ opts.extend(
224
+ self.__format_std_stream_file_option_lines(
225
+ is_array, sub_idx, js_idx, resources.combine_jobscript_std
226
+ )
227
+ )
198
228
 
199
229
  for opt_k, opt_v in self.options.items():
200
230
  if opt_v is None:
@@ -264,9 +294,9 @@ class SGEPosix(QueuedScheduler):
264
294
 
265
295
  def get_job_statuses(
266
296
  self,
267
- ) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
268
- """Get information about all of this user's jobscripts that currently listed by
269
- the scheduler."""
297
+ ) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
298
+ """Get information about all of this user's jobscripts that are currently listed
299
+ by the scheduler."""
270
300
  cmd = [*self.show_cmd, "-u", "$USER", "-g", "d"] # "-g d": separate arrays items
271
301
  stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
272
302
  if stderr:
@@ -277,7 +307,7 @@ class SGEPosix(QueuedScheduler):
277
307
  elif not stdout:
278
308
  return {}
279
309
 
280
- info: dict[str, dict[int | None, JobscriptElementState]] = {}
310
+ info: dict[str, dict[int, JobscriptElementState] | JobscriptElementState] = {}
281
311
  lines = stdout.split("\n")
282
312
  # assuming a job name with spaces means we can't split on spaces to get
283
313
  # anywhere beyond the job name, so get the column index of the state heading
@@ -300,13 +330,19 @@ class SGEPosix(QueuedScheduler):
300
330
  else None
301
331
  )
302
332
 
303
- info.setdefault(base_job_ID, {})[arr_idx] = state
333
+ if arr_idx is not None:
334
+ entry = cast(
335
+ dict[int, JobscriptElementState], info.setdefault(base_job_ID, {})
336
+ )
337
+ entry[arr_idx] = state
338
+ else:
339
+ info[base_job_ID] = state
304
340
  return info
305
341
 
306
342
  @override
307
343
  def get_job_state_info(
308
- self, *, js_refs: Sequence[str] | None = None, num_js_elements: int = 0
309
- ) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
344
+ self, *, js_refs: Sequence[str] | None = None
345
+ ) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
310
346
  """Query the scheduler to get the states of all of this user's jobs, optionally
311
347
  filtering by specified job IDs.
312
348
 
@@ -324,7 +360,6 @@ class SGEPosix(QueuedScheduler):
324
360
  self,
325
361
  js_refs: list[str],
326
362
  jobscripts: list[Jobscript] | None = None,
327
- num_js_elements: int = 0, # Ignored!
328
363
  ):
329
364
  """
330
365
  Cancel submitted jobs.
@@ -5,7 +5,7 @@ An interface to SLURM.
5
5
  from __future__ import annotations
6
6
  import subprocess
7
7
  import time
8
- from typing import TYPE_CHECKING
8
+ from typing import cast, TYPE_CHECKING
9
9
  from typing_extensions import override
10
10
  from hpcflow.sdk.typing import hydrate
11
11
  from hpcflow.sdk.core.enums import ParallelMode
@@ -344,17 +344,37 @@ class SlurmPosix(QueuedScheduler):
344
344
  max_str = f"%{resources.max_array_items}" if resources.max_array_items else ""
345
345
  return f"{self.js_cmd} {self.array_switch} 1-{num_elements}{max_str}"
346
346
 
347
+ def get_stdout_filename(
348
+ self, js_idx: int, job_ID: str, array_idx: int | None = None
349
+ ) -> str:
350
+ """File name of the standard output stream file."""
351
+ array_idx_str = f".{array_idx}" if array_idx is not None else ""
352
+ return f"js_{js_idx}.sh_{job_ID}{array_idx_str}.out"
353
+
354
+ def get_stderr_filename(
355
+ self, js_idx: int, job_ID: str, array_idx: int | None = None
356
+ ) -> str:
357
+ """File name of the standard error stream file."""
358
+ array_idx_str = f".{array_idx}" if array_idx is not None else ""
359
+ return f"js_{js_idx}.sh_{job_ID}{array_idx_str}.err"
360
+
347
361
  def __format_std_stream_file_option_lines(
348
- self, is_array: bool, sub_idx: int
362
+ self, is_array: bool, sub_idx: int, js_idx: int, combine_std: bool
349
363
  ) -> Iterator[str]:
350
364
  pattern = R"%x_%A.%a" if is_array else R"%x_%j"
351
- base = f"./artifacts/submissions/{sub_idx}/{pattern}"
352
- yield f"{self.js_cmd} -o {base}.out"
353
- yield f"{self.js_cmd} -e {base}.err"
365
+ base = f"./artifacts/submissions/{sub_idx}/js_std/{js_idx}/{pattern}"
366
+ yield f"{self.js_cmd} --output {base}.out"
367
+ if not combine_std:
368
+ yield f"{self.js_cmd} --error {base}.err"
354
369
 
355
370
  @override
356
371
  def format_options(
357
- self, resources: ElementResources, num_elements: int, is_array: bool, sub_idx: int
372
+ self,
373
+ resources: ElementResources,
374
+ num_elements: int,
375
+ is_array: bool,
376
+ sub_idx: int,
377
+ js_idx: int,
358
378
  ) -> str:
359
379
  """
360
380
  Format the options to the scheduler.
@@ -365,7 +385,11 @@ class SlurmPosix(QueuedScheduler):
365
385
  if is_array:
366
386
  opts.append(self.__format_array_request(num_elements, resources))
367
387
 
368
- opts.extend(self.__format_std_stream_file_option_lines(is_array, sub_idx))
388
+ opts.extend(
389
+ self.__format_std_stream_file_option_lines(
390
+ is_array, sub_idx, js_idx, resources.combine_jobscript_std
391
+ )
392
+ )
369
393
 
370
394
  for opt_k, opt_v in self.options.items():
371
395
  if isinstance(opt_v, list):
@@ -468,9 +492,9 @@ class SlurmPosix(QueuedScheduler):
468
492
 
469
493
  def __parse_job_states(
470
494
  self, stdout: str
471
- ) -> dict[str, dict[int | None, JobscriptElementState]]:
495
+ ) -> dict[str, JobscriptElementState | dict[int, JobscriptElementState]]:
472
496
  """Parse output from Slurm `squeue` command with a simple format."""
473
- info: dict[str, dict[int | None, JobscriptElementState]] = {}
497
+ info: dict[str, JobscriptElementState | dict[int, JobscriptElementState]] = {}
474
498
  for ln in stdout.split("\n"):
475
499
  if not ln:
476
500
  continue
@@ -478,9 +502,14 @@ class SlurmPosix(QueuedScheduler):
478
502
  base_job_ID, arr_idx = self._parse_job_IDs(job_id)
479
503
  state = self.state_lookup.get(job_state, JobscriptElementState.errored)
480
504
 
481
- entry = info.setdefault(base_job_ID, {})
482
- for arr_idx_i in arr_idx or ():
483
- entry[arr_idx_i] = state
505
+ if arr_idx is not None:
506
+ entry = cast(
507
+ dict[int, JobscriptElementState], info.setdefault(base_job_ID, {})
508
+ )
509
+ for arr_idx_i in arr_idx:
510
+ entry[arr_idx_i] = state
511
+ else:
512
+ info[base_job_ID] = state
484
513
 
485
514
  return info
486
515
 
@@ -490,7 +519,7 @@ class SlurmPosix(QueuedScheduler):
490
519
  *self.show_cmd,
491
520
  "--noheader",
492
521
  "--format",
493
- R"%40i %30T",
522
+ R"%200i %30T", # job ID (<base_job_id>_<index> for array job) and job state
494
523
  "--jobs",
495
524
  ",".join(job_IDs),
496
525
  ]
@@ -515,8 +544,8 @@ class SlurmPosix(QueuedScheduler):
515
544
 
516
545
  @override
517
546
  def get_job_state_info(
518
- self, *, js_refs: Sequence[str] | None = None, num_js_elements: int = 0
519
- ) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
547
+ self, *, js_refs: Sequence[str] | None = None
548
+ ) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
520
549
  """Query the scheduler to get the states of all of this user's jobs, optionally
521
550
  filtering by specified job IDs.
522
551
 
@@ -555,7 +584,6 @@ class SlurmPosix(QueuedScheduler):
555
584
  self,
556
585
  js_refs: list[str],
557
586
  jobscripts: list[Jobscript] | None = None,
558
- num_js_elements: int = 0, # Ignored!
559
587
  ):
560
588
  """
561
589
  Cancel submitted jobs.
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Helper for running a subprocess.
3
3
  """
4
+
4
5
  from __future__ import annotations
5
6
  import subprocess
6
7
  from typing import TYPE_CHECKING
@@ -10,11 +11,15 @@ if TYPE_CHECKING:
10
11
  from logging import Logger
11
12
 
12
13
 
13
- def run_cmd(cmd: str | Sequence[str], logger: Logger | None = None) -> tuple[str, str]:
14
+ def run_cmd(
15
+ cmd: str | Sequence[str], logger: Logger | None = None, **kwargs
16
+ ) -> tuple[str, str]:
14
17
  """Execute a command and return stdout, stderr as strings."""
15
18
  if logger:
16
19
  logger.debug(f"running shell command: {cmd}")
17
- proc = subprocess.run(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
20
+ proc = subprocess.run(
21
+ args=cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs
22
+ )
18
23
  stdout = proc.stdout.decode()
19
24
  stderr = proc.stderr.decode()
20
25
  return stdout, stderr
@@ -13,6 +13,8 @@ if TYPE_CHECKING:
13
13
  from typing import Any, ClassVar
14
14
  from ..types import JobscriptHeaderArgs, VersionInfo
15
15
 
16
+ from hpcflow.sdk.utils.hashing import get_hash
17
+
16
18
 
17
19
  @hydrate
18
20
  class Shell(ABC):
@@ -30,28 +32,53 @@ class Shell(ABC):
30
32
  Arguments to pass to the shell.
31
33
  """
32
34
 
35
+ #: Default for executable name.
36
+ DEFAULT_EXE: ClassVar[str] = "/bin/bash"
33
37
  #: File extension for jobscripts.
34
38
  JS_EXT: ClassVar[str]
35
- #: Default for executable name.
36
- DEFAULT_EXE: ClassVar[str]
39
+ #: Basic indent.
40
+ JS_INDENT: ClassVar[str]
37
41
  #: Indent for environment setup.
38
42
  JS_ENV_SETUP_INDENT: ClassVar[str]
39
43
  #: Template for the jobscript shebang line.
40
44
  JS_SHEBANG: ClassVar[str]
45
+ #: Template for the jobscript functions file.
46
+ JS_FUNCS: ClassVar[str]
41
47
  #: Template for the common part of the jobscript header.
42
48
  JS_HEADER: ClassVar[str]
43
49
  #: Template for the jobscript header when scheduled.
44
50
  JS_SCHEDULER_HEADER: ClassVar[str]
45
51
  #: Template for the jobscript header when directly executed.
46
52
  JS_DIRECT_HEADER: ClassVar[str]
47
- #: Template for the jobscript body.
53
+ #: Template for enabling writing of the app log.
54
+ JS_RUN_LOG_PATH_ENABLE: ClassVar[str]
55
+ #: Template for disabling writing of the app log.
56
+ JS_RUN_LOG_PATH_DISABLE: ClassVar[str]
57
+ #: Template for the run execution command.
58
+ JS_RUN_CMD: ClassVar[str]
59
+ #: Template for the execution command for multiple combined runs.
60
+ JS_RUN_CMD_COMBINED: ClassVar[str]
61
+ #: Template for setting up run environment variables and executing the run.
62
+ JS_RUN: ClassVar[str]
63
+ #: Template for the action-run processing loop in a jobscript.
64
+ JS_ACT_MULTI: ClassVar[str]
65
+ #: Template for the single-action-run execution in a jobscript.
66
+ JS_ACT_SINGLE: ClassVar[str]
67
+ #: Template for setting up environment variables and running one or more action-runs.
48
68
  JS_MAIN: ClassVar[str]
49
- #: Template for the array handling code in a jobscript.
50
- JS_ELEMENT_ARRAY: ClassVar[str]
69
+ #: Template for a jobscript-block header.
70
+ JS_BLOCK_HEADER: ClassVar[str]
71
+ #: Template for single-element execution.
72
+ JS_ELEMENT_SINGLE: ClassVar[str]
51
73
  #: Template for the element processing loop in a jobscript.
52
- JS_ELEMENT_LOOP: ClassVar[str]
53
- #: Basic indent.
54
- JS_INDENT: ClassVar[str]
74
+ JS_ELEMENT_MULTI_LOOP: ClassVar[str]
75
+ #: Template for the array handling code in a jobscript.
76
+ JS_ELEMENT_MULTI_ARRAY: ClassVar[str]
77
+ #: Template for the jobscript block loop in a jobscript.
78
+ JS_BLOCK_LOOP: ClassVar[str]
79
+ #: Template for the jobscript footer.
80
+ JS_FOOTER: ClassVar[str]
81
+
55
82
  __slots__ = ("_executable", "os_args")
56
83
 
57
84
  def __init__(
@@ -67,6 +94,9 @@ class Shell(ABC):
67
94
  return False
68
95
  return self._executable == other._executable and self.os_args == other.os_args
69
96
 
97
+ def __hash__(self):
98
+ return get_hash((self._executable, self.os_args))
99
+
70
100
  @property
71
101
  def executable(self) -> list[str]:
72
102
  """
@@ -85,6 +115,10 @@ class Shell(ABC):
85
115
  """Get the command for submitting a non-scheduled jobscript."""
86
116
  return self.executable + [js_path]
87
117
 
118
+ def get_command_file_launch_command(self, cmd_file_path: str) -> list[str]:
119
+ """Get the command for launching the commands file for a given run."""
120
+ return self.executable + [cmd_file_path]
121
+
88
122
  @abstractmethod
89
123
  def get_version_info(self, exclude_os: bool = False) -> VersionInfo:
90
124
  """Get shell and operating system information."""
@@ -141,32 +175,46 @@ class Shell(ABC):
141
175
  workflow_app_alias: str,
142
176
  param_name: str,
143
177
  shell_var_name: str,
144
- EAR_ID: int,
145
178
  cmd_idx: int,
146
179
  stderr: bool,
147
- ):
180
+ app_name: str,
181
+ ) -> str:
182
+ """
183
+ Produce code to save a parameter's value into the workflow persistent store.
148
184
  """
149
- Format instructions to save a parameter.
185
+
186
+ @abstractmethod
187
+ def format_stream_assignment(self, shell_var_name: str, command: str) -> str:
188
+ """
189
+ Format a stream assignment.
150
190
  """
151
191
 
152
192
  @abstractmethod
153
- def wrap_in_subshell(self, commands: str, abortable: bool) -> str:
193
+ def format_env_var_get(self, var: str) -> str:
194
+ """
195
+ Format retrieval of a shell environment variable.
154
196
  """
155
- Format commands to run within a child scope.
156
197
 
157
- This assumes `commands` ends in a newline.
198
+ @abstractmethod
199
+ def format_array(self, lst: list) -> str:
200
+ """
201
+ Format construction of a shell array.
158
202
  """
159
203
 
160
204
  @abstractmethod
161
- def format_loop_check(
162
- self, workflow_app_alias: str, loop_name: str, run_ID: int
163
- ) -> str:
205
+ def format_array_get_item(self, arr_name: str, index: int | str) -> str:
164
206
  """
165
- Format a loop check.
207
+ Format retrieval of a shell array item at a specified index.
166
208
  """
167
209
 
168
210
  @abstractmethod
169
- def format_stream_assignment(self, shell_var_name: str, command: str) -> str:
211
+ def format_source_functions_file(self, app_name: str, commands: str) -> str:
170
212
  """
171
- Format a stream assignment.
213
+ Format sourcing (i.e. invocation) of the jobscript functions file.
214
+ """
215
+
216
+ @abstractmethod
217
+ def format_commands_file(self, app_name: str, commands: str) -> str:
218
+ """
219
+ Format the commands file.
172
220
  """