hpcflow-new2 0.2.0a50__py3-none-any.whl → 0.2.0a52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/_version.py +1 -1
- hpcflow/sdk/__init__.py +1 -1
- hpcflow/sdk/api.py +1 -1
- hpcflow/sdk/app.py +20 -11
- hpcflow/sdk/cli.py +34 -59
- hpcflow/sdk/core/__init__.py +13 -1
- hpcflow/sdk/core/actions.py +235 -126
- hpcflow/sdk/core/command_files.py +32 -24
- hpcflow/sdk/core/element.py +110 -114
- hpcflow/sdk/core/errors.py +57 -0
- hpcflow/sdk/core/loop.py +18 -34
- hpcflow/sdk/core/parameters.py +5 -3
- hpcflow/sdk/core/task.py +135 -131
- hpcflow/sdk/core/task_schema.py +11 -4
- hpcflow/sdk/core/utils.py +110 -2
- hpcflow/sdk/core/workflow.py +964 -676
- hpcflow/sdk/data/template_components/environments.yaml +0 -44
- hpcflow/sdk/data/template_components/task_schemas.yaml +52 -10
- hpcflow/sdk/persistence/__init__.py +21 -33
- hpcflow/sdk/persistence/base.py +1340 -458
- hpcflow/sdk/persistence/json.py +424 -546
- hpcflow/sdk/persistence/pending.py +563 -0
- hpcflow/sdk/persistence/store_resource.py +131 -0
- hpcflow/sdk/persistence/utils.py +57 -0
- hpcflow/sdk/persistence/zarr.py +852 -841
- hpcflow/sdk/submission/jobscript.py +133 -112
- hpcflow/sdk/submission/shells/bash.py +62 -16
- hpcflow/sdk/submission/shells/powershell.py +87 -16
- hpcflow/sdk/submission/submission.py +59 -35
- hpcflow/tests/unit/test_element.py +4 -9
- hpcflow/tests/unit/test_persistence.py +218 -0
- hpcflow/tests/unit/test_task.py +11 -12
- hpcflow/tests/unit/test_utils.py +82 -0
- hpcflow/tests/unit/test_workflow.py +3 -1
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/METADATA +3 -1
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/RECORD +38 -34
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/entry_points.txt +0 -0
hpcflow/sdk/core/workflow.py
CHANGED
@@ -3,14 +3,27 @@ from contextlib import contextmanager
|
|
3
3
|
import copy
|
4
4
|
from dataclasses import dataclass, field
|
5
5
|
from datetime import datetime, timezone
|
6
|
+
|
6
7
|
from pathlib import Path
|
7
|
-
|
8
|
-
|
8
|
+
import random
|
9
|
+
import string
|
10
|
+
import time
|
11
|
+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
|
12
|
+
from fsspec.implementations.local import LocalFileSystem
|
13
|
+
from fsspec.implementations.zip import ZipFileSystem
|
9
14
|
|
10
15
|
import numpy as np
|
16
|
+
from fsspec.core import url_to_fs
|
11
17
|
|
12
18
|
from hpcflow.sdk import app
|
13
|
-
from hpcflow.sdk.core
|
19
|
+
from hpcflow.sdk.core import (
|
20
|
+
ALL_TEMPLATE_FORMATS,
|
21
|
+
DEFAULT_TEMPLATE_FORMAT,
|
22
|
+
ABORT_EXIT_CODE,
|
23
|
+
)
|
24
|
+
from hpcflow.sdk.persistence import store_cls_from_str_NEW, DEFAULT_STORE_FORMAT
|
25
|
+
from hpcflow.sdk.persistence.base import TEMPLATE_COMP_TYPES, AnySEAR
|
26
|
+
from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc, infer_store
|
14
27
|
from hpcflow.sdk.submission.jobscript import (
|
15
28
|
generate_EAR_resource_map,
|
16
29
|
group_resource_map_into_jobscripts,
|
@@ -28,22 +41,10 @@ from .utils import (
|
|
28
41
|
replace_items,
|
29
42
|
)
|
30
43
|
from hpcflow.sdk.core.errors import (
|
31
|
-
InvalidInputSourceTaskReference,
|
32
44
|
LoopAlreadyExistsError,
|
33
45
|
SubmissionFailure,
|
34
|
-
WorkflowBatchUpdateFailedError,
|
35
|
-
WorkflowNotFoundError,
|
36
46
|
WorkflowSubmissionFailure,
|
37
47
|
)
|
38
|
-
from hpcflow.sdk.persistence import (
|
39
|
-
store_cls_from_path,
|
40
|
-
store_cls_from_str,
|
41
|
-
temporary_workflow_rename,
|
42
|
-
DEFAULT_STORE_FORMAT,
|
43
|
-
)
|
44
|
-
|
45
|
-
DEFAULT_TEMPLATE_FORMAT = "yaml"
|
46
|
-
ALL_TEMPLATE_FORMATS = ("yaml", "json")
|
47
48
|
|
48
49
|
|
49
50
|
class _DummyPersistentWorkflow:
|
@@ -63,7 +64,7 @@ class _DummyPersistentWorkflow:
|
|
63
64
|
return self._data_ref[-1]
|
64
65
|
|
65
66
|
def get_parameter_data(self, data_idx):
|
66
|
-
return
|
67
|
+
return self._parameters[self._data_ref.index(data_idx)]
|
67
68
|
|
68
69
|
def make_persistent(self, workflow: app.Workflow):
|
69
70
|
for dat_i, source_i in zip(self._parameters, self._sources):
|
@@ -265,32 +266,75 @@ class WorkflowTemplate(JSONLike):
|
|
265
266
|
self.loops.append(loop)
|
266
267
|
|
267
268
|
|
268
|
-
|
269
|
-
"""
|
269
|
+
def resolve_fsspec(path: PathLike, **kwargs) -> Tuple[Any, str, str]:
|
270
|
+
"""
|
271
|
+
Parameters
|
272
|
+
----------
|
273
|
+
kwargs
|
274
|
+
This can include a `password` key, for connections via SSH.
|
275
|
+
|
276
|
+
"""
|
277
|
+
|
278
|
+
path = str(path)
|
279
|
+
if path.endswith(".zip"):
|
280
|
+
# `url_to_fs` does not seem to work for zip combos e.g. `zip::ssh://`, so we
|
281
|
+
# construct a `ZipFileSystem` ourselves and assume it is signified only by the
|
282
|
+
# file extension:
|
283
|
+
fs, pw = ask_pw_on_auth_exc(
|
284
|
+
ZipFileSystem,
|
285
|
+
fo=path,
|
286
|
+
mode="r",
|
287
|
+
target_options=kwargs or {},
|
288
|
+
add_pw_to="target_options",
|
289
|
+
)
|
290
|
+
path = ""
|
270
291
|
|
271
|
-
|
292
|
+
else:
|
293
|
+
(fs, path), pw = ask_pw_on_auth_exc(url_to_fs, str(path), **kwargs)
|
294
|
+
path = str(Path(path).as_posix())
|
295
|
+
if isinstance(fs, LocalFileSystem):
|
296
|
+
path = str(Path(path).resolve())
|
297
|
+
|
298
|
+
return fs, path, pw
|
272
299
|
|
300
|
+
|
301
|
+
class Workflow:
|
302
|
+
_app_attr = "app"
|
273
303
|
_default_ts_fmt = r"%Y-%m-%d %H:%M:%S.%f"
|
274
304
|
_default_ts_name_fmt = r"%Y-%m-%d_%H%M%S"
|
305
|
+
_input_files_dir_name = "input_files"
|
306
|
+
_exec_dir_name = "execute"
|
307
|
+
|
308
|
+
def __init__(
|
309
|
+
self,
|
310
|
+
path: Union[str, Path],
|
311
|
+
store_fmt: Optional[str] = None,
|
312
|
+
fs_kwargs: Optional[Dict] = None,
|
313
|
+
):
|
314
|
+
"""
|
315
|
+
Parameters
|
316
|
+
----------
|
275
317
|
|
276
|
-
|
277
|
-
self.path = Path(path).resolve()
|
278
|
-
if not self.path.is_dir():
|
279
|
-
raise WorkflowNotFoundError(f"No workflow found at path: {self.path}")
|
318
|
+
path :
|
280
319
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
320
|
+
"""
|
321
|
+
|
322
|
+
fs_path = str(path)
|
323
|
+
fs, path, _ = resolve_fsspec(fs_path or "", **(fs_kwargs or {}))
|
324
|
+
store_fmt = store_fmt or infer_store(fs_path, fs)
|
325
|
+
store_cls = store_cls_from_str_NEW(store_fmt)
|
285
326
|
|
327
|
+
self.path = path
|
328
|
+
|
329
|
+
self._creation_info = None
|
330
|
+
self._fs_path = None
|
286
331
|
self._template = None
|
287
332
|
self._template_components = None
|
288
333
|
self._tasks = None
|
289
334
|
self._loops = None
|
290
335
|
self._submissions = None
|
291
336
|
|
292
|
-
self._store =
|
293
|
-
|
337
|
+
self._store = store_cls(self.app, self, self.path, fs)
|
294
338
|
self._in_batch_mode = False # flag to track when processing batch updates
|
295
339
|
|
296
340
|
# store indices of updates during batch update, so we can revert on failure:
|
@@ -300,54 +344,16 @@ class Workflow:
|
|
300
344
|
def name(self):
|
301
345
|
"""The workflow name may be different from the template name, as it includes the
|
302
346
|
creation date-timestamp if generated."""
|
303
|
-
|
304
|
-
|
305
|
-
def _get_empty_pending(self) -> Dict:
|
306
|
-
return {
|
307
|
-
"template_components": {k: [] for k in self.app._template_component_types},
|
308
|
-
"tasks": [], # list of int
|
309
|
-
"loops": [], # list of int
|
310
|
-
"submissions": [], # list of int
|
311
|
-
}
|
312
|
-
|
313
|
-
def _accept_pending(self) -> None:
|
314
|
-
self._reset_pending()
|
315
|
-
|
316
|
-
def _reset_pending(self) -> None:
|
317
|
-
self._pending = self._get_empty_pending()
|
318
|
-
|
319
|
-
def _reject_pending(self) -> None:
|
320
|
-
"""Revert pending changes to the in-memory representation of the workflow.
|
321
|
-
|
322
|
-
This deletes new tasks, new template component data, new loops, and new
|
323
|
-
submissions. Element additions to existing (non-pending) tasks are separately
|
324
|
-
rejected/accepted by the WorkflowTask object.
|
325
|
-
|
326
|
-
"""
|
327
|
-
for task_idx in self._pending["tasks"][::-1]:
|
328
|
-
# iterate in reverse so the index references are correct
|
329
|
-
self.tasks._remove_object(task_idx)
|
330
|
-
self.template.tasks.pop(task_idx)
|
331
|
-
|
332
|
-
for comp_type, comp_indices in self._pending["template_components"].items():
|
333
|
-
for comp_idx in comp_indices[::-1]:
|
334
|
-
# iterate in reverse so the index references are correct
|
335
|
-
self.template_components[comp_type]._remove_object(comp_idx)
|
336
|
-
|
337
|
-
for loop_idx in self._pending["loops"][::-1]:
|
338
|
-
# iterate in reverse so the index references are correct
|
339
|
-
self.loops._remove_object(loop_idx)
|
340
|
-
self.template.loops.pop(loop_idx)
|
341
|
-
|
342
|
-
for sub_idx in self._pending["submissions"][::-1]:
|
343
|
-
# iterate in reverse so the index references are correct
|
344
|
-
self._submissions.pop(sub_idx)
|
345
|
-
|
346
|
-
self._reset_pending()
|
347
|
+
# TODO: this won't work for zip stores
|
348
|
+
return str(Path(self.fs_path).parts[-1])
|
347
349
|
|
348
350
|
@property
|
349
351
|
def store_format(self):
|
350
|
-
return self._store.
|
352
|
+
return self._store._name
|
353
|
+
|
354
|
+
@property
|
355
|
+
def num_tasks(self) -> int:
|
356
|
+
return len(self.tasks)
|
351
357
|
|
352
358
|
@classmethod
|
353
359
|
def from_template(
|
@@ -440,7 +446,7 @@ class Workflow:
|
|
440
446
|
The datetime format to use when generating the workflow name, where it
|
441
447
|
includes a timestamp.
|
442
448
|
"""
|
443
|
-
template =
|
449
|
+
template = cls.app.WorkflowTemplate.from_YAML_file(YAML_path)
|
444
450
|
return cls.from_template(
|
445
451
|
template,
|
446
452
|
path,
|
@@ -488,7 +494,7 @@ class Workflow:
|
|
488
494
|
The datetime format to use when generating the workflow name, where it
|
489
495
|
includes a timestamp.
|
490
496
|
"""
|
491
|
-
template =
|
497
|
+
template = cls.app.WorkflowTemplate.from_YAML_string(YAML_str)
|
492
498
|
return cls.from_template(
|
493
499
|
template,
|
494
500
|
path,
|
@@ -713,193 +719,151 @@ class Workflow:
|
|
713
719
|
ts_name_fmt,
|
714
720
|
)
|
715
721
|
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
722
|
+
def _add_empty_task(
|
723
|
+
self,
|
724
|
+
task: app.Task,
|
725
|
+
new_index: Optional[int] = None,
|
726
|
+
) -> app.WorkflowTask:
|
727
|
+
if new_index is None:
|
728
|
+
new_index = self.num_tasks
|
720
729
|
|
721
|
-
|
722
|
-
yield
|
723
|
-
else:
|
724
|
-
try:
|
725
|
-
self._in_batch_mode = True
|
726
|
-
yield
|
730
|
+
insert_ID = self.num_added_tasks
|
727
731
|
|
728
|
-
|
729
|
-
|
732
|
+
# make a copy with persistent schema inputs:
|
733
|
+
task_c, _ = task.to_persistent(self, insert_ID)
|
730
734
|
|
731
|
-
|
732
|
-
|
735
|
+
# add to the WorkflowTemplate:
|
736
|
+
self.template._add_empty_task(task_c, new_index, insert_ID)
|
733
737
|
|
734
|
-
|
735
|
-
|
738
|
+
# create and insert a new WorkflowTask:
|
739
|
+
self.tasks.add_object(
|
740
|
+
self.app.WorkflowTask.new_empty_task(self, task_c, new_index),
|
741
|
+
index=new_index,
|
742
|
+
)
|
736
743
|
|
737
|
-
|
738
|
-
|
744
|
+
# update persistent store:
|
745
|
+
task_js, temp_comps_js = task_c.to_json_like()
|
746
|
+
self._store.add_template_components(temp_comps_js)
|
747
|
+
self._store.add_task(new_index, task_js)
|
739
748
|
|
740
|
-
|
749
|
+
# update in-memory workflow template components:
|
750
|
+
temp_comps = self.app.template_components_from_json_like(temp_comps_js)
|
751
|
+
for comp_type, comps in temp_comps.items():
|
752
|
+
for comp in comps:
|
753
|
+
comp._set_hash()
|
754
|
+
if comp not in self.template_components[comp_type]:
|
755
|
+
idx = self.template_components[comp_type].add_object(comp)
|
756
|
+
self._pending["template_components"][comp_type].append(idx)
|
741
757
|
|
742
|
-
|
743
|
-
# creation failed, so no need to keep the newly generated workflow:
|
744
|
-
self._store.delete_no_confirm()
|
745
|
-
self._store.reinstate_replaced_dir()
|
758
|
+
self._pending["tasks"].append(new_index)
|
746
759
|
|
747
|
-
|
760
|
+
return self.tasks[new_index]
|
748
761
|
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
if is_diff:
|
753
|
-
raise WorkflowBatchUpdateFailedError(
|
754
|
-
f"Workflow modified on disk since it was loaded!"
|
755
|
-
)
|
762
|
+
def _add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
|
763
|
+
new_wk_task = self._add_empty_task(task=task, new_index=new_index)
|
764
|
+
new_wk_task._add_elements(element_sets=task.element_sets) # TODO
|
756
765
|
|
757
|
-
|
758
|
-
|
766
|
+
def add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
|
767
|
+
with self._store.cached_load():
|
768
|
+
with self.batch_update():
|
769
|
+
self._add_task(task, new_index=new_index)
|
759
770
|
|
760
|
-
|
761
|
-
|
771
|
+
def add_task_after(self, new_task: app.Task, task_ref: app.Task = None) -> None:
|
772
|
+
"""Add a new task after the specified task.
|
762
773
|
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
self._in_batch_mode = False
|
774
|
+
Parameters
|
775
|
+
----------
|
776
|
+
task_ref
|
777
|
+
If not given, the new task will be added at the end of the workflow.
|
768
778
|
|
769
|
-
@classmethod
|
770
|
-
def _write_empty_workflow(
|
771
|
-
cls,
|
772
|
-
template: app.WorkflowTemplate,
|
773
|
-
path: Optional[PathLike] = None,
|
774
|
-
name: Optional[str] = None,
|
775
|
-
overwrite: Optional[bool] = False,
|
776
|
-
store: Optional[str] = DEFAULT_STORE_FORMAT,
|
777
|
-
ts_fmt: Optional[str] = None,
|
778
|
-
ts_name_fmt: Optional[str] = None,
|
779
|
-
) -> app.Workflow:
|
780
779
|
"""
|
780
|
+
new_index = task_ref.index + 1 if task_ref else None
|
781
|
+
self.add_task(new_task, new_index)
|
782
|
+
# TODO: add new downstream elements?
|
783
|
+
|
784
|
+
def add_task_before(self, new_task: app.Task, task_ref: app.Task = None) -> None:
|
785
|
+
"""Add a new task before the specified task.
|
786
|
+
|
781
787
|
Parameters
|
782
788
|
----------
|
783
|
-
|
784
|
-
|
785
|
-
if not specified.
|
786
|
-
name
|
787
|
-
The name of the workflow. If specified, the workflow directory will be `path`
|
788
|
-
joined with `name`. If not specified the WorkflowTemplate name will be used,
|
789
|
-
in combination with a date-timestamp.
|
790
|
-
overwrite
|
791
|
-
If True and the workflow directory (`path` + `name`) already exists, the
|
792
|
-
existing directory will be overwritten.
|
793
|
-
store
|
794
|
-
The persistent store to use for this workflow.
|
795
|
-
ts_fmt
|
796
|
-
The datetime format to use for storing datetimes. Datetimes are always stored
|
797
|
-
in UTC (because Numpy does not store time zone info), so this should not
|
798
|
-
include a time zone name.
|
799
|
-
ts_name_fmt
|
800
|
-
The datetime format to use when generating the workflow name, where it
|
801
|
-
includes a timestamp.
|
802
|
-
"""
|
789
|
+
task_ref
|
790
|
+
If not given, the new task will be added at the beginning of the workflow.
|
803
791
|
|
804
|
-
|
792
|
+
"""
|
793
|
+
new_index = task_ref.index if task_ref else 0
|
794
|
+
self.add_task(new_task, new_index)
|
795
|
+
# TODO: add new downstream elements?
|
805
796
|
|
806
|
-
|
807
|
-
|
797
|
+
def _add_empty_loop(self, loop: app.Loop) -> app.WorkflowLoop:
|
798
|
+
"""Add a new loop (zeroth iterations only) to the workflow."""
|
808
799
|
|
809
|
-
|
810
|
-
ts_fmt = ts_fmt or cls._default_ts_fmt
|
800
|
+
new_index = self.num_loops
|
811
801
|
|
812
|
-
|
813
|
-
|
814
|
-
workflow_path = path.joinpath(name)
|
802
|
+
# don't modify passed object:
|
803
|
+
loop_c = copy.deepcopy(loop)
|
815
804
|
|
816
|
-
|
817
|
-
|
818
|
-
if overwrite:
|
819
|
-
replaced_dir = temporary_workflow_rename(workflow_path)
|
820
|
-
else:
|
821
|
-
raise ValueError(f"Path already exists: {workflow_path}.")
|
805
|
+
# add to the WorkflowTemplate:
|
806
|
+
self.template._add_empty_loop(loop_c)
|
822
807
|
|
823
|
-
#
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
808
|
+
# create and insert a new WorkflowLoop:
|
809
|
+
self.loops.add_object(
|
810
|
+
self.app.WorkflowLoop.new_empty_loop(
|
811
|
+
index=new_index,
|
812
|
+
workflow=self,
|
813
|
+
template=loop_c,
|
814
|
+
)
|
815
|
+
)
|
816
|
+
wk_loop = self.loops[new_index]
|
828
817
|
|
829
|
-
|
830
|
-
template_js["tasks"] = []
|
831
|
-
template_js["loops"] = []
|
818
|
+
loop_js, _ = loop_c.to_json_like()
|
832
819
|
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
"ts_name_fmt": ts_name_fmt,
|
838
|
-
}
|
820
|
+
# all these element iterations will be initialised for the new loop:
|
821
|
+
iter_IDs = [
|
822
|
+
i.id_ for i in self.get_element_iterations_of_tasks(loop_c.task_insert_IDs)
|
823
|
+
]
|
839
824
|
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
replaced_dir=replaced_dir,
|
846
|
-
creation_info=creation_info,
|
825
|
+
# update persistent store:
|
826
|
+
self._store.add_loop(
|
827
|
+
loop_template=loop_js,
|
828
|
+
iterable_parameters=wk_loop.iterable_parameters,
|
829
|
+
iter_IDs=iter_IDs,
|
847
830
|
)
|
848
|
-
wk = cls(workflow_path)
|
849
831
|
|
850
|
-
|
851
|
-
wk_dummy.make_persistent(wk)
|
832
|
+
self._pending["loops"].append(new_index)
|
852
833
|
|
853
|
-
return
|
834
|
+
return wk_loop
|
854
835
|
|
855
|
-
|
856
|
-
|
857
|
-
if not
|
858
|
-
|
859
|
-
|
836
|
+
def _add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
|
837
|
+
new_wk_loop = self._add_empty_loop(loop)
|
838
|
+
if loop.num_iterations is not None:
|
839
|
+
# fixed number of iterations, so add remaining N > 0 iterations:
|
840
|
+
for _ in range(loop.num_iterations - 1):
|
841
|
+
new_wk_loop.add_iteration(parent_loop_indices=parent_loop_indices)
|
842
|
+
|
843
|
+
def add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
|
844
|
+
"""Add a loop to a subset of workflow tasks."""
|
845
|
+
with self._store.cached_load():
|
846
|
+
with self.batch_update():
|
847
|
+
self._add_loop(loop, parent_loop_indices)
|
860
848
|
|
861
849
|
@property
|
862
|
-
def
|
863
|
-
if not self.
|
864
|
-
self.
|
865
|
-
return self.
|
850
|
+
def fs_path(self):
|
851
|
+
if not self._fs_path:
|
852
|
+
self._fs_path = self._store.get_fs_path()
|
853
|
+
return self._fs_path
|
866
854
|
|
867
855
|
@property
|
868
856
|
def creation_info(self):
|
869
857
|
if not self._creation_info:
|
870
|
-
|
871
|
-
|
872
|
-
info["create_time"]
|
873
|
-
|
874
|
-
|
875
|
-
|
858
|
+
info = self._store.get_creation_info()
|
859
|
+
info["create_time"] = (
|
860
|
+
datetime.strptime(info["create_time"], info["ts_fmt"])
|
861
|
+
.replace(tzinfo=timezone.utc)
|
862
|
+
.astimezone()
|
863
|
+
)
|
864
|
+
self._creation_info = info
|
876
865
|
return self._creation_info
|
877
866
|
|
878
|
-
@property
|
879
|
-
def num_tasks(self) -> int:
|
880
|
-
return len(self.tasks)
|
881
|
-
|
882
|
-
@property
|
883
|
-
def num_added_tasks(self) -> int:
|
884
|
-
with self._store.cached_load():
|
885
|
-
return self._store.get_num_added_tasks()
|
886
|
-
|
887
|
-
@property
|
888
|
-
def num_elements(self) -> int:
|
889
|
-
return sum(task.num_elements for task in self.tasks)
|
890
|
-
|
891
|
-
@property
|
892
|
-
def num_element_iterations(self) -> int:
|
893
|
-
return sum(task.num_element_iterations for task in self.tasks)
|
894
|
-
|
895
|
-
@property
|
896
|
-
def num_loops(self) -> int:
|
897
|
-
return len(self.loops)
|
898
|
-
|
899
|
-
@property
|
900
|
-
def num_submissions(self) -> int:
|
901
|
-
return len(self.submissions)
|
902
|
-
|
903
867
|
@property
|
904
868
|
def template_components(self) -> Dict:
|
905
869
|
if self._template_components is None:
|
@@ -913,6 +877,11 @@ class Workflow:
|
|
913
877
|
if self._template is None:
|
914
878
|
with self._store.cached_load():
|
915
879
|
temp_js = self._store.get_template()
|
880
|
+
|
881
|
+
# TODO: insert_ID and id_ are the same thing:
|
882
|
+
for task in temp_js["tasks"]:
|
883
|
+
task.pop("id_", None)
|
884
|
+
|
916
885
|
template = self.app.WorkflowTemplate.from_json_like(
|
917
886
|
temp_js, self.template_components
|
918
887
|
)
|
@@ -925,19 +894,18 @@ class Workflow:
|
|
925
894
|
def tasks(self) -> app.WorkflowTaskList:
|
926
895
|
if self._tasks is None:
|
927
896
|
with self._store.cached_load():
|
928
|
-
|
897
|
+
all_tasks = self._store.get_tasks()
|
929
898
|
wk_tasks = []
|
930
|
-
for
|
899
|
+
for i in all_tasks:
|
931
900
|
wk_task = self.app.WorkflowTask(
|
932
901
|
workflow=self,
|
933
|
-
template=self.template.tasks[
|
934
|
-
index=
|
935
|
-
|
936
|
-
num_element_iterations=i["num_element_iterations"],
|
937
|
-
num_EARs=i["num_EARs"],
|
902
|
+
template=self.template.tasks[i.index],
|
903
|
+
index=i.index,
|
904
|
+
element_IDs=i.element_IDs,
|
938
905
|
)
|
939
906
|
wk_tasks.append(wk_task)
|
940
907
|
self._tasks = self.app.WorkflowTaskList(wk_tasks)
|
908
|
+
|
941
909
|
return self._tasks
|
942
910
|
|
943
911
|
@property
|
@@ -945,12 +913,13 @@ class Workflow:
|
|
945
913
|
if self._loops is None:
|
946
914
|
with self._store.cached_load():
|
947
915
|
wk_loops = []
|
948
|
-
for idx, loop_dat in
|
916
|
+
for idx, loop_dat in self._store.get_loops().items():
|
949
917
|
wk_loop = self.app.WorkflowLoop(
|
950
918
|
index=idx,
|
951
919
|
workflow=self,
|
952
920
|
template=self.template.loops[idx],
|
953
|
-
|
921
|
+
num_added_iterations=loop_dat["num_added_iterations"],
|
922
|
+
iterable_parameters=loop_dat["iterable_parameters"],
|
954
923
|
)
|
955
924
|
wk_loops.append(wk_loop)
|
956
925
|
self._loops = self.app.WorkflowLoopList(wk_loops)
|
@@ -961,134 +930,466 @@ class Workflow:
|
|
961
930
|
if self._submissions is None:
|
962
931
|
with self._store.cached_load():
|
963
932
|
subs = []
|
964
|
-
for idx, sub_dat in
|
965
|
-
sub_js = {"index": idx,
|
933
|
+
for idx, sub_dat in self._store.get_submissions().items():
|
934
|
+
sub_js = {"index": idx, **sub_dat}
|
966
935
|
sub = self.app.Submission.from_json_like(sub_js)
|
936
|
+
sub.workflow = self
|
967
937
|
subs.append(sub)
|
968
938
|
self._submissions = subs
|
969
939
|
return self._submissions
|
970
940
|
|
971
941
|
@property
|
972
|
-
def
|
973
|
-
|
974
|
-
return self.path / "artifacts"
|
942
|
+
def num_added_tasks(self) -> int:
|
943
|
+
return self._store._get_num_total_added_tasks()
|
975
944
|
|
976
|
-
|
977
|
-
|
978
|
-
return self.artifacts_path / "submissions"
|
945
|
+
def get_store_EARs(self, id_lst: Iterable[int]) -> List[AnySEAR]:
|
946
|
+
return self._store.get_EARs(id_lst)
|
979
947
|
|
980
|
-
|
981
|
-
|
982
|
-
|
948
|
+
def get_store_element_iterations(
|
949
|
+
self, id_lst: Iterable[int]
|
950
|
+
) -> List[AnySElementIter]:
|
951
|
+
return self._store.get_element_iterations(id_lst)
|
983
952
|
|
984
|
-
def
|
985
|
-
|
986
|
-
for element in task.elements:
|
987
|
-
yield element
|
953
|
+
def get_store_elements(self, id_lst: Iterable[int]) -> List[AnySElement]:
|
954
|
+
return self._store.get_elements(id_lst)
|
988
955
|
|
989
|
-
def
|
990
|
-
|
991
|
-
if path is None:
|
992
|
-
path = self.path.parent / Path(self.path.stem + "_copy" + self.path.suffix)
|
993
|
-
if path.exists():
|
994
|
-
raise ValueError(f"Path already exists: {path}.")
|
995
|
-
self._store.copy(path=path)
|
996
|
-
return self.app.Workflow(path=path)
|
956
|
+
def get_store_tasks(self, id_lst: Iterable[int]) -> List[AnySTask]:
|
957
|
+
return self._store.get_tasks_by_IDs(id_lst)
|
997
958
|
|
998
|
-
def
|
999
|
-
self.
|
959
|
+
def get_element_iteration_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> List[int]:
|
960
|
+
return [i.elem_iter_ID for i in self.get_store_EARs(id_lst)]
|
1000
961
|
|
1001
|
-
def
|
1002
|
-
self.
|
962
|
+
def get_element_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> List[int]:
|
963
|
+
iter_IDs = self.get_element_iteration_IDs_from_EAR_IDs(id_lst)
|
964
|
+
return [i.element_ID for i in self.get_store_element_iterations(iter_IDs)]
|
1003
965
|
|
1004
|
-
def
|
1005
|
-
|
966
|
+
def get_task_IDs_from_element_IDs(self, id_lst: Iterable[int]) -> List[int]:
|
967
|
+
return [i.task_ID for i in self.get_store_elements(id_lst)]
|
1006
968
|
|
1007
|
-
def
|
1008
|
-
|
1009
|
-
|
1010
|
-
JS_parallelism: Optional[bool] = None,
|
1011
|
-
print_stdout: Optional[bool] = False,
|
1012
|
-
) -> Tuple[List[Exception], Dict[int, int]]:
|
1013
|
-
"""Submit outstanding EARs for execution."""
|
969
|
+
def get_EAR_IDs_of_tasks(self, id_lst: int) -> List[int]:
|
970
|
+
"""Get EAR IDs belonging to multiple tasks"""
|
971
|
+
return [i.id_ for i in self.get_EARs_of_tasks(id_lst)]
|
1014
972
|
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
973
|
+
def get_EARs_of_tasks(self, id_lst: Iterable[int]) -> List[app.ElementActionRun]:
|
974
|
+
"""Get EARs belonging to multiple tasks"""
|
975
|
+
EARs = []
|
976
|
+
for i in id_lst:
|
977
|
+
task = self.tasks.get(insert_ID=i)
|
978
|
+
for elem in task.elements[:]:
|
979
|
+
for iter_ in elem.iterations:
|
980
|
+
for run in iter_.action_runs:
|
981
|
+
EARs.append(run)
|
982
|
+
return EARs
|
1022
983
|
|
1023
|
-
|
1024
|
-
self
|
984
|
+
def get_element_iterations_of_tasks(
|
985
|
+
self, id_lst: Iterable[int]
|
986
|
+
) -> List[app.ElementIteration]:
|
987
|
+
"""Get element iterations belonging to multiple tasks"""
|
988
|
+
iters = []
|
989
|
+
for i in id_lst:
|
990
|
+
task = self.tasks.get(insert_ID=i)
|
991
|
+
for elem in task.elements[:]:
|
992
|
+
for iter_i in elem.iterations:
|
993
|
+
iters.append(iter_i)
|
994
|
+
return iters
|
995
|
+
|
996
|
+
def get_elements_from_IDs(self, id_lst: Iterable[int]) -> List[app.Element]:
|
997
|
+
"""Return element objects from a list of IDs."""
|
998
|
+
|
999
|
+
store_elems = self._store.get_elements(id_lst)
|
1000
|
+
|
1001
|
+
task_IDs = [i.task_ID for i in store_elems]
|
1002
|
+
store_tasks = self._store.get_tasks_by_IDs(task_IDs)
|
1003
|
+
|
1004
|
+
index_paths = []
|
1005
|
+
for el, tk in zip(store_elems, store_tasks):
|
1006
|
+
elem_idx = tk.element_IDs.index(el.id_)
|
1007
|
+
index_paths.append(
|
1008
|
+
{
|
1009
|
+
"elem_idx": elem_idx,
|
1010
|
+
"task_idx": tk.index,
|
1011
|
+
}
|
1012
|
+
)
|
1025
1013
|
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1014
|
+
objs = []
|
1015
|
+
for idx_dat in index_paths:
|
1016
|
+
task = self.tasks[idx_dat["task_idx"]]
|
1017
|
+
elem = task.elements[idx_dat["elem_idx"]]
|
1018
|
+
objs.append(elem)
|
1029
1019
|
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1020
|
+
return objs
|
1021
|
+
|
1022
|
+
def get_element_iterations_from_IDs(
|
1023
|
+
self, id_lst: Iterable[int]
|
1024
|
+
) -> List[app.ElementIteration]:
|
1025
|
+
"""Return element iteration objects from a list of IDs."""
|
1026
|
+
|
1027
|
+
store_iters = self._store.get_element_iterations(id_lst)
|
1028
|
+
|
1029
|
+
elem_IDs = [i.element_ID for i in store_iters]
|
1030
|
+
store_elems = self._store.get_elements(elem_IDs)
|
1031
|
+
|
1032
|
+
task_IDs = [i.task_ID for i in store_elems]
|
1033
|
+
store_tasks = self._store.get_tasks_by_IDs(task_IDs)
|
1034
|
+
|
1035
|
+
index_paths = []
|
1036
|
+
for it, el, tk in zip(store_iters, store_elems, store_tasks):
|
1037
|
+
iter_idx = el.iteration_IDs.index(it.id_)
|
1038
|
+
elem_idx = tk.element_IDs.index(el.id_)
|
1039
|
+
index_paths.append(
|
1040
|
+
{
|
1041
|
+
"iter_idx": iter_idx,
|
1042
|
+
"elem_idx": elem_idx,
|
1043
|
+
"task_idx": tk.index,
|
1044
|
+
}
|
1045
|
+
)
|
1046
|
+
|
1047
|
+
objs = []
|
1048
|
+
for idx_dat in index_paths:
|
1049
|
+
task = self.tasks[idx_dat["task_idx"]]
|
1050
|
+
elem = task.elements[idx_dat["elem_idx"]]
|
1051
|
+
iter_ = elem.iterations[idx_dat["iter_idx"]]
|
1052
|
+
objs.append(iter_)
|
1053
|
+
|
1054
|
+
return objs
|
1055
|
+
|
1056
|
+
def get_EARs_from_IDs(self, id_lst: Iterable[int]) -> List[app.ElementActionRun]:
|
1057
|
+
"""Return element action run objects from a list of IDs."""
|
1058
|
+
|
1059
|
+
store_EARs = self._store.get_EARs(id_lst)
|
1060
|
+
|
1061
|
+
elem_iter_IDs = [i.elem_iter_ID for i in store_EARs]
|
1062
|
+
store_iters = self._store.get_element_iterations(elem_iter_IDs)
|
1063
|
+
|
1064
|
+
elem_IDs = [i.element_ID for i in store_iters]
|
1065
|
+
store_elems = self._store.get_elements(elem_IDs)
|
1066
|
+
|
1067
|
+
task_IDs = [i.task_ID for i in store_elems]
|
1068
|
+
store_tasks = self._store.get_tasks_by_IDs(task_IDs)
|
1069
|
+
|
1070
|
+
index_paths = []
|
1071
|
+
for rn, it, el, tk in zip(store_EARs, store_iters, store_elems, store_tasks):
|
1072
|
+
act_idx = rn.action_idx
|
1073
|
+
run_idx = it.EAR_IDs[act_idx].index(rn.id_)
|
1074
|
+
iter_idx = el.iteration_IDs.index(it.id_)
|
1075
|
+
elem_idx = tk.element_IDs.index(el.id_)
|
1076
|
+
index_paths.append(
|
1077
|
+
{
|
1078
|
+
"run_idx": run_idx,
|
1079
|
+
"action_idx": act_idx,
|
1080
|
+
"iter_idx": iter_idx,
|
1081
|
+
"elem_idx": elem_idx,
|
1082
|
+
"task_idx": tk.index,
|
1083
|
+
}
|
1084
|
+
)
|
1085
|
+
|
1086
|
+
objs = []
|
1087
|
+
for idx_dat in index_paths:
|
1088
|
+
task = self.tasks[idx_dat["task_idx"]]
|
1089
|
+
elem = task.elements[idx_dat["elem_idx"]]
|
1090
|
+
iter_ = elem.iterations[idx_dat["iter_idx"]]
|
1091
|
+
run = iter_.actions[idx_dat["action_idx"]].runs[idx_dat["run_idx"]]
|
1092
|
+
objs.append(run)
|
1093
|
+
|
1094
|
+
return objs
|
1095
|
+
|
1096
|
+
def get_all_elements(self) -> List[app.Element]:
|
1097
|
+
return self.get_elements_from_IDs(range(self.num_elements))
|
1098
|
+
|
1099
|
+
def get_all_element_iterations(self) -> List[app.ElementIteration]:
|
1100
|
+
return self.get_element_iterations_from_IDs(range(self.num_element_iterations))
|
1101
|
+
|
1102
|
+
def get_all_EARs(self) -> List[app.ElementActionRun]:
|
1103
|
+
return self.get_EARs_from_IDs(range(self.num_EARs))
|
1104
|
+
|
1105
|
+
@contextmanager
|
1106
|
+
def batch_update(self, is_workflow_creation: bool = False) -> Iterator[None]:
|
1107
|
+
"""A context manager that batches up structural changes to the workflow and
|
1108
|
+
commits them to disk all together when the context manager exits."""
|
1109
|
+
|
1110
|
+
if self._in_batch_mode:
|
1111
|
+
yield
|
1112
|
+
else:
|
1034
1113
|
try:
|
1035
|
-
|
1036
|
-
|
1037
|
-
ignore_errors=ignore_errors,
|
1038
|
-
print_stdout=print_stdout,
|
1114
|
+
self.app.persistence_logger.info(
|
1115
|
+
f"entering batch update (is_workflow_creation={is_workflow_creation!r})"
|
1039
1116
|
)
|
1040
|
-
|
1041
|
-
|
1042
|
-
exceptions.append(exc)
|
1117
|
+
self._in_batch_mode = True
|
1118
|
+
yield
|
1043
1119
|
|
1044
|
-
|
1120
|
+
except Exception as err:
|
1121
|
+
self.app.persistence_logger.error("batch update exception!")
|
1122
|
+
self._in_batch_mode = False
|
1123
|
+
self._store._pending.reset()
|
1045
1124
|
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1125
|
+
for task in self.tasks:
|
1126
|
+
task._reset_pending_element_IDs()
|
1127
|
+
task.template._reset_pending_element_sets()
|
1128
|
+
|
1129
|
+
for loop in self.loops:
|
1130
|
+
loop._reset_pending_num_added_iters()
|
1131
|
+
|
1132
|
+
self._reject_pending()
|
1133
|
+
|
1134
|
+
if is_workflow_creation:
|
1135
|
+
# creation failed, so no need to keep the newly generated workflow:
|
1136
|
+
self._store.delete_no_confirm()
|
1137
|
+
self._store.reinstate_replaced_dir()
|
1138
|
+
|
1139
|
+
raise err
|
1140
|
+
|
1141
|
+
else:
|
1142
|
+
if self._store._pending:
|
1143
|
+
# is_diff = self._store.is_modified_on_disk()
|
1144
|
+
# if is_diff:
|
1145
|
+
# raise WorkflowBatchUpdateFailedError(
|
1146
|
+
# f"Workflow modified on disk since it was loaded!"
|
1147
|
+
# )
|
1148
|
+
|
1149
|
+
for task in self.tasks:
|
1150
|
+
task._accept_pending_element_IDs()
|
1151
|
+
task.template._accept_pending_element_sets()
|
1152
|
+
|
1153
|
+
for loop in self.loops:
|
1154
|
+
loop._accept_pending_num_added_iters()
|
1155
|
+
|
1156
|
+
if is_workflow_creation:
|
1157
|
+
self._store.remove_replaced_dir()
|
1158
|
+
|
1159
|
+
# TODO: handle errors in commit pending?
|
1160
|
+
self._store._pending.commit_all()
|
1161
|
+
|
1162
|
+
self._accept_pending()
|
1163
|
+
self.app.persistence_logger.info("exiting batch update")
|
1164
|
+
self._in_batch_mode = False
|
1165
|
+
|
1166
|
+
@classmethod
|
1167
|
+
def temporary_rename(cls, path: str, fs) -> List[str]:
|
1168
|
+
"""Rename an existing same-path workflow (directory) so we can restore it if
|
1169
|
+
workflow creation fails.
|
1170
|
+
|
1171
|
+
Renaming will occur until the successfully completed. This means multiple new
|
1172
|
+
paths may be created, where only the final path should be considered the
|
1173
|
+
successfully renamed workflow. Other paths will be deleted."""
|
1174
|
+
|
1175
|
+
all_replaced = []
|
1176
|
+
|
1177
|
+
@cls.app.perm_error_retry()
|
1178
|
+
def _temp_rename(path: str, fs) -> str:
|
1179
|
+
temp_ext = "".join(random.choices(string.ascii_letters, k=10))
|
1180
|
+
replaced = str(Path(f"{path}.{temp_ext}").as_posix())
|
1181
|
+
cls.app.persistence_logger.debug(
|
1182
|
+
f"temporary_rename: _temp_rename: {path!r} --> {replaced!r}."
|
1183
|
+
)
|
1184
|
+
all_replaced.append(replaced)
|
1185
|
+
try:
|
1186
|
+
fs.rename(path, replaced, recursive=True)
|
1187
|
+
except TypeError:
|
1188
|
+
# `SFTPFileSystem.rename` has no `recursive` argument:
|
1189
|
+
fs.rename(path, replaced)
|
1190
|
+
return replaced
|
1191
|
+
|
1192
|
+
@cls.app.perm_error_retry()
|
1193
|
+
def _remove_path(path: str, fs) -> None:
|
1194
|
+
cls.app.persistence_logger.debug(f"temporary_rename: _remove_path: {path!r}.")
|
1195
|
+
while fs.exists(path):
|
1196
|
+
fs.rm(path, recursive=True)
|
1197
|
+
time.sleep(0.5)
|
1198
|
+
|
1199
|
+
_temp_rename(path, fs)
|
1200
|
+
|
1201
|
+
for i in all_replaced[:-1]:
|
1202
|
+
_remove_path(i, fs)
|
1203
|
+
|
1204
|
+
return all_replaced[-1]
|
1205
|
+
|
1206
|
+
@classmethod
|
1207
|
+
def _write_empty_workflow(
|
1208
|
+
cls,
|
1209
|
+
template: app.WorkflowTemplate,
|
1210
|
+
path: Optional[PathLike] = None,
|
1211
|
+
name: Optional[str] = None,
|
1212
|
+
overwrite: Optional[bool] = False,
|
1213
|
+
store: Optional[str] = DEFAULT_STORE_FORMAT,
|
1214
|
+
ts_fmt: Optional[str] = None,
|
1215
|
+
ts_name_fmt: Optional[str] = None,
|
1216
|
+
fs_kwargs: Optional[Dict] = None,
|
1217
|
+
) -> app.Workflow:
|
1218
|
+
"""
|
1219
|
+
Parameters
|
1220
|
+
----------
|
1221
|
+
path
|
1222
|
+
The directory in which the workflow will be generated. The current directory
|
1223
|
+
if not specified.
|
1224
|
+
|
1225
|
+
"""
|
1226
|
+
ts = datetime.now()
|
1227
|
+
|
1228
|
+
# store all times in UTC, since NumPy doesn't support time zone info:
|
1229
|
+
ts_utc = ts.astimezone(tz=timezone.utc)
|
1230
|
+
|
1231
|
+
ts_name_fmt = ts_name_fmt or cls._default_ts_name_fmt
|
1232
|
+
ts_fmt = ts_fmt or cls._default_ts_fmt
|
1233
|
+
|
1234
|
+
name = name or f"{template.name}_{ts.strftime(ts_name_fmt)}"
|
1235
|
+
|
1236
|
+
fs_path = f"{path or '.'}/{name}"
|
1237
|
+
fs_kwargs = fs_kwargs or {}
|
1238
|
+
fs, path, pw = resolve_fsspec(path or "", **fs_kwargs)
|
1239
|
+
wk_path = f"{path}/{name}"
|
1240
|
+
|
1241
|
+
replaced_wk = None
|
1242
|
+
if fs.exists(wk_path):
|
1243
|
+
cls.app.logger.debug("workflow path exists")
|
1244
|
+
if overwrite:
|
1245
|
+
cls.app.logger.debug("renaming existing workflow path")
|
1246
|
+
replaced_wk = cls.temporary_rename(wk_path, fs)
|
1247
|
+
else:
|
1248
|
+
raise ValueError(
|
1249
|
+
f"Path already exists: {wk_path} on file system " f"{fs!r}."
|
1059
1250
|
)
|
1060
1251
|
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1252
|
+
# make template-level inputs/resources think they are persistent:
|
1253
|
+
wk_dummy = _DummyPersistentWorkflow()
|
1254
|
+
param_src = {"type": "workflow_resources"}
|
1255
|
+
for res_i in template.resources:
|
1256
|
+
res_i.make_persistent(wk_dummy, param_src)
|
1064
1257
|
|
1065
|
-
|
1258
|
+
template_js, template_sh = template.to_json_like(exclude=["tasks", "loops"])
|
1259
|
+
template_js["tasks"] = []
|
1260
|
+
template_js["loops"] = []
|
1066
1261
|
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1262
|
+
creation_info = {
|
1263
|
+
"app_info": cls.app.get_info(),
|
1264
|
+
"create_time": ts_utc.strftime(ts_fmt),
|
1265
|
+
"ts_fmt": ts_fmt,
|
1266
|
+
"ts_name_fmt": ts_name_fmt,
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
store_cls = store_cls_from_str_NEW(store)
|
1270
|
+
store_cls.write_empty_workflow(
|
1271
|
+
app=cls.app,
|
1272
|
+
template_js=template_js,
|
1273
|
+
template_components_js=template_sh,
|
1274
|
+
wk_path=wk_path,
|
1275
|
+
fs=fs,
|
1276
|
+
fs_path=fs_path,
|
1277
|
+
replaced_wk=replaced_wk,
|
1278
|
+
creation_info=creation_info,
|
1074
1279
|
)
|
1075
|
-
EAR_indices = sub_obj.prepare_EAR_submission_idx_update()
|
1076
|
-
if not EAR_indices:
|
1077
|
-
print(
|
1078
|
-
f"There are no pending element action runs, so a new submission was not "
|
1079
|
-
f"added."
|
1080
|
-
)
|
1081
|
-
return
|
1082
1280
|
|
1083
|
-
|
1084
|
-
|
1085
|
-
self._submissions.append(sub_obj)
|
1086
|
-
self._pending["submissions"].append(new_idx)
|
1087
|
-
with self._store.cached_load():
|
1088
|
-
with self.batch_update():
|
1089
|
-
self._store.add_submission(sub_obj_js)
|
1281
|
+
fs_kwargs = {"password": pw, **fs_kwargs}
|
1282
|
+
wk = cls(fs_path, store_fmt=store, fs_kwargs=fs_kwargs)
|
1090
1283
|
|
1091
|
-
|
1284
|
+
# actually make template inputs/resources persistent, now the workflow exists:
|
1285
|
+
wk_dummy.make_persistent(wk)
|
1286
|
+
|
1287
|
+
return wk
|
1288
|
+
|
1289
|
+
def to_zip(self) -> str:
|
1290
|
+
return self._store.to_zip()
|
1291
|
+
|
1292
|
+
def copy(self, path=None) -> str:
|
1293
|
+
"""Copy the workflow to a new path and return the copied workflow path."""
|
1294
|
+
return self._store.copy(path)
|
1295
|
+
|
1296
|
+
def delete(self):
|
1297
|
+
self._store.delete()
|
1298
|
+
|
1299
|
+
def _delete_no_confirm(self):
|
1300
|
+
self._store.delete_no_confirm()
|
1301
|
+
|
1302
|
+
def get_parameters(
|
1303
|
+
self, id_lst: Iterable[int], **kwargs: Dict
|
1304
|
+
) -> List[AnySParameter]:
|
1305
|
+
return self._store.get_parameters(id_lst, **kwargs)
|
1306
|
+
|
1307
|
+
def get_parameter_sources(self, id_lst: Iterable[int]) -> List[Dict]:
|
1308
|
+
return self._store.get_parameter_sources(id_lst)
|
1309
|
+
|
1310
|
+
def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> List[bool]:
|
1311
|
+
return self._store.get_parameter_set_statuses(id_lst)
|
1312
|
+
|
1313
|
+
def get_parameter(self, index: int, **kwargs: Dict) -> AnySParameter:
|
1314
|
+
return self.get_parameters([index], **kwargs)[0]
|
1315
|
+
|
1316
|
+
def get_parameter_data(self, index: int, **kwargs: Dict) -> Any:
|
1317
|
+
return (
|
1318
|
+
self.get_parameter(index, **kwargs).data
|
1319
|
+
or self.get_parameter(index, **kwargs).file
|
1320
|
+
)
|
1321
|
+
|
1322
|
+
def get_parameter_source(self, index: int) -> Dict:
|
1323
|
+
return self.get_parameter_sources([index])[0]
|
1324
|
+
|
1325
|
+
def is_parameter_set(self, index: int) -> bool:
|
1326
|
+
return self.get_parameter_set_statuses([index])[0]
|
1327
|
+
|
1328
|
+
def get_all_parameters(self, **kwargs: Dict) -> List[AnySParameter]:
|
1329
|
+
"""Retrieve all store parameters."""
|
1330
|
+
num_params = self._store._get_num_total_parameters()
|
1331
|
+
id_lst = list(range(num_params))
|
1332
|
+
return self._store.get_parameters(id_lst, **kwargs)
|
1333
|
+
|
1334
|
+
def get_all_parameter_data(self, **kwargs: Dict) -> Dict[int, Any]:
|
1335
|
+
"""Retrieve all workflow parameter data."""
|
1336
|
+
params = self.get_all_parameters(**kwargs)
|
1337
|
+
return {i.id_: (i.data or i.file) for i in params}
|
1338
|
+
|
1339
|
+
def check_parameters_exist(
|
1340
|
+
self, id_lst: Union[int, List[int]]
|
1341
|
+
) -> Union[bool, List[bool]]:
|
1342
|
+
is_multi = True
|
1343
|
+
if isinstance(id_lst, int):
|
1344
|
+
is_multi = False
|
1345
|
+
id_lst = [id_lst]
|
1346
|
+
exists = self._store.check_parameters_exist(id_lst)
|
1347
|
+
if not is_multi:
|
1348
|
+
exists = exists[0]
|
1349
|
+
return exists
|
1350
|
+
|
1351
|
+
def _add_unset_parameter_data(self, source: Dict) -> int:
|
1352
|
+
# TODO: use this for unset files as well
|
1353
|
+
return self._store.add_unset_parameter(source)
|
1354
|
+
|
1355
|
+
def _add_parameter_data(self, data, source: Dict) -> int:
|
1356
|
+
return self._store.add_set_parameter(data, source)
|
1357
|
+
|
1358
|
+
def _add_file(
|
1359
|
+
self,
|
1360
|
+
store_contents: bool,
|
1361
|
+
is_input: bool,
|
1362
|
+
source: Dict,
|
1363
|
+
path=None,
|
1364
|
+
contents=None,
|
1365
|
+
filename: str = None,
|
1366
|
+
) -> int:
|
1367
|
+
return self._store.add_file(
|
1368
|
+
store_contents=store_contents,
|
1369
|
+
is_input=is_input,
|
1370
|
+
source=source,
|
1371
|
+
path=path,
|
1372
|
+
contents=contents,
|
1373
|
+
filename=filename,
|
1374
|
+
)
|
1375
|
+
|
1376
|
+
def _set_file(
|
1377
|
+
self,
|
1378
|
+
param_id: int,
|
1379
|
+
store_contents: bool,
|
1380
|
+
is_input: bool,
|
1381
|
+
path=None,
|
1382
|
+
contents=None,
|
1383
|
+
filename: str = None,
|
1384
|
+
) -> int:
|
1385
|
+
self._store.set_file(
|
1386
|
+
param_id=param_id,
|
1387
|
+
store_contents=store_contents,
|
1388
|
+
is_input=is_input,
|
1389
|
+
path=path,
|
1390
|
+
contents=contents,
|
1391
|
+
filename=filename,
|
1392
|
+
)
|
1092
1393
|
|
1093
1394
|
def get_task_unique_names(
|
1094
1395
|
self, map_to_insert_ID: bool = False
|
@@ -1116,358 +1417,313 @@ class Workflow:
|
|
1116
1417
|
|
1117
1418
|
return uniq_names[new_index]
|
1118
1419
|
|
1119
|
-
def
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1420
|
+
def _get_empty_pending(self) -> Dict:
|
1421
|
+
return {
|
1422
|
+
"template_components": {k: [] for k in TEMPLATE_COMP_TYPES},
|
1423
|
+
"tasks": [], # list of int
|
1424
|
+
"loops": [], # list of int
|
1425
|
+
"submissions": [], # list of int
|
1426
|
+
}
|
1126
1427
|
|
1127
|
-
|
1428
|
+
def _accept_pending(self) -> None:
|
1429
|
+
self._reset_pending()
|
1128
1430
|
|
1129
|
-
|
1130
|
-
|
1431
|
+
def _reset_pending(self) -> None:
|
1432
|
+
self._pending = self._get_empty_pending()
|
1131
1433
|
|
1132
|
-
|
1133
|
-
|
1434
|
+
def _reject_pending(self) -> None:
|
1435
|
+
"""Revert pending changes to the in-memory representation of the workflow.
|
1134
1436
|
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
index=new_index,
|
1139
|
-
)
|
1437
|
+
This deletes new tasks, new template component data, new loops, and new
|
1438
|
+
submissions. Element additions to existing (non-pending) tasks are separately
|
1439
|
+
rejected/accepted by the WorkflowTask object.
|
1140
1440
|
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1441
|
+
"""
|
1442
|
+
for task_idx in self._pending["tasks"][::-1]:
|
1443
|
+
# iterate in reverse so the index references are correct
|
1444
|
+
self.tasks._remove_object(task_idx)
|
1445
|
+
self.template.tasks.pop(task_idx)
|
1145
1446
|
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
comp._set_hash()
|
1151
|
-
if comp not in self.template_components[comp_type]:
|
1152
|
-
idx = self.template_components[comp_type].add_object(comp)
|
1153
|
-
self._pending["template_components"][comp_type].append(idx)
|
1447
|
+
for comp_type, comp_indices in self._pending["template_components"].items():
|
1448
|
+
for comp_idx in comp_indices[::-1]:
|
1449
|
+
# iterate in reverse so the index references are correct
|
1450
|
+
self.template_components[comp_type]._remove_object(comp_idx)
|
1154
1451
|
|
1155
|
-
self._pending["
|
1452
|
+
for loop_idx in self._pending["loops"][::-1]:
|
1453
|
+
# iterate in reverse so the index references are correct
|
1454
|
+
self.loops._remove_object(loop_idx)
|
1455
|
+
self.template.loops.pop(loop_idx)
|
1156
1456
|
|
1157
|
-
|
1457
|
+
for sub_idx in self._pending["submissions"][::-1]:
|
1458
|
+
# iterate in reverse so the index references are correct
|
1459
|
+
self._submissions.pop(sub_idx)
|
1158
1460
|
|
1159
|
-
|
1160
|
-
"""Add a new loop (zeroth iterations only) to the workflow."""
|
1461
|
+
self._reset_pending()
|
1161
1462
|
|
1162
|
-
|
1463
|
+
@property
|
1464
|
+
def num_tasks(self):
|
1465
|
+
return self._store._get_num_total_tasks()
|
1163
1466
|
|
1164
|
-
|
1165
|
-
|
1467
|
+
@property
|
1468
|
+
def num_submissions(self):
|
1469
|
+
return self._store._get_num_total_submissions()
|
1166
1470
|
|
1167
|
-
|
1168
|
-
|
1471
|
+
@property
|
1472
|
+
def num_elements(self):
|
1473
|
+
return self._store._get_num_total_elements()
|
1169
1474
|
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
index=new_index,
|
1174
|
-
workflow=self,
|
1175
|
-
template=loop_c,
|
1176
|
-
)
|
1177
|
-
)
|
1178
|
-
wk_loop = self.loops[new_index]
|
1475
|
+
@property
|
1476
|
+
def num_element_iterations(self):
|
1477
|
+
return self._store._get_num_total_elem_iters()
|
1179
1478
|
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
self._store.add_loop(
|
1184
|
-
task_indices=task_indices,
|
1185
|
-
loop_js=loop_js,
|
1186
|
-
iterable_parameters=wk_loop.iterable_parameters,
|
1187
|
-
)
|
1479
|
+
@property
|
1480
|
+
def num_EARs(self):
|
1481
|
+
return self._store._get_num_total_EARs()
|
1188
1482
|
|
1189
|
-
|
1483
|
+
@property
|
1484
|
+
def num_loops(self) -> int:
|
1485
|
+
return self._store._get_num_total_loops()
|
1190
1486
|
|
1191
|
-
|
1487
|
+
@property
|
1488
|
+
def artifacts_path(self):
|
1489
|
+
# TODO: allow customisation of artifacts path at submission and resources level
|
1490
|
+
return Path(self.path) / "artifacts"
|
1192
1491
|
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
# fixed number of iterations, so add remaining N > 0 iterations:
|
1197
|
-
for _ in range(loop.num_iterations - 1):
|
1198
|
-
new_wk_loop.add_iteration(parent_loop_indices=parent_loop_indices)
|
1492
|
+
@property
|
1493
|
+
def input_files_path(self):
|
1494
|
+
return self.artifacts_path / self._input_files_dir_name
|
1199
1495
|
|
1200
|
-
|
1201
|
-
|
1496
|
+
@property
|
1497
|
+
def submissions_path(self):
|
1498
|
+
return self.artifacts_path / "submissions"
|
1499
|
+
|
1500
|
+
@property
|
1501
|
+
def task_artifacts_path(self):
|
1502
|
+
return self.artifacts_path / "tasks"
|
1503
|
+
|
1504
|
+
@property
|
1505
|
+
def execution_path(self):
|
1506
|
+
return Path(self.path) / self._exec_dir_name
|
1507
|
+
|
1508
|
+
def get_task_elements(self, task: app.Task, selection: slice) -> List[app.Element]:
|
1509
|
+
return [
|
1510
|
+
self.app.Element(task=task, **{k: v for k, v in i.items() if k != "task_ID"})
|
1511
|
+
for i in self._store.get_task_elements(task.insert_ID, selection)
|
1512
|
+
]
|
1513
|
+
|
1514
|
+
def set_EAR_submission_index(self, EAR_ID: int, sub_idx: int) -> None:
|
1515
|
+
"""Set the submission index of an EAR."""
|
1202
1516
|
with self._store.cached_load():
|
1203
1517
|
with self.batch_update():
|
1204
|
-
self.
|
1205
|
-
|
1206
|
-
def _add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
|
1207
|
-
new_wk_task = self._add_empty_task(task=task, new_index=new_index)
|
1208
|
-
new_wk_task._add_elements(element_sets=task.element_sets)
|
1518
|
+
self._store.set_EAR_submission_index(EAR_ID, sub_idx)
|
1209
1519
|
|
1210
|
-
def
|
1520
|
+
def set_EAR_start(self, EAR_ID: int) -> None:
|
1521
|
+
"""Set the start time on an EAR."""
|
1522
|
+
self.app.logger.debug(f"Setting start for EAR ID {EAR_ID!r}")
|
1211
1523
|
with self._store.cached_load():
|
1212
1524
|
with self.batch_update():
|
1213
|
-
self.
|
1525
|
+
self._store.set_EAR_start(EAR_ID)
|
1214
1526
|
|
1215
|
-
def
|
1216
|
-
"""
|
1527
|
+
def set_EAR_end(self, EAR_ID: int, exit_code: int) -> None:
|
1528
|
+
"""Set the end time and exit code on an EAR.
|
1217
1529
|
|
1218
|
-
|
1219
|
-
|
1220
|
-
task_ref
|
1221
|
-
If not given, the new task will be added at the end of the workflow.
|
1530
|
+
If the exit code is non-zero, also set all downstream dependent EARs to be
|
1531
|
+
skipped. Also save any generated input/output files.
|
1222
1532
|
|
1223
1533
|
"""
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1534
|
+
self.app.logger.debug(
|
1535
|
+
f"Setting end for EAR ID {EAR_ID!r} with exit code {exit_code!r}."
|
1536
|
+
)
|
1537
|
+
with self._store.cached_load():
|
1538
|
+
EAR = self.get_EARs_from_IDs([EAR_ID])[0]
|
1539
|
+
with self.batch_update():
|
1540
|
+
success = exit_code == 0 # TODO more sophisticated success heuristics
|
1541
|
+
if EAR.action.abortable and exit_code == ABORT_EXIT_CODE:
|
1542
|
+
# the point of aborting an EAR is to continue with the workflow:
|
1543
|
+
success = True
|
1544
|
+
self._store.set_EAR_end(EAR_ID, exit_code, success)
|
1545
|
+
|
1546
|
+
for IFG_i in EAR.action.input_file_generators:
|
1547
|
+
inp_file = IFG_i.input_file
|
1548
|
+
self.app.logger.debug(
|
1549
|
+
f"Saving EAR input file: {inp_file.label} for EAR ID {EAR_ID!r}."
|
1550
|
+
)
|
1551
|
+
param_id = EAR.data_idx[f"input_files.{inp_file.label}"]
|
1552
|
+
self._set_file(
|
1553
|
+
param_id=param_id,
|
1554
|
+
store_contents=True, # TODO: make optional according to IFG
|
1555
|
+
is_input=False,
|
1556
|
+
path=Path(inp_file.value()).resolve(),
|
1557
|
+
)
|
1230
1558
|
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1559
|
+
for OFP_i in EAR.action.output_file_parsers:
|
1560
|
+
for out_file_j in OFP_i.output_files:
|
1561
|
+
if out_file_j.label not in OFP_i.save_files:
|
1562
|
+
continue
|
1563
|
+
self.app.logger.debug(
|
1564
|
+
f"Saving EAR output file: {out_file_j.label} for EAR ID "
|
1565
|
+
f"{EAR_ID!r}."
|
1566
|
+
)
|
1567
|
+
param_id = EAR.data_idx[f"output_files.{out_file_j.label}"]
|
1568
|
+
self._set_file(
|
1569
|
+
param_id=param_id,
|
1570
|
+
store_contents=True, # TODO: make optional according to OFP
|
1571
|
+
is_input=False,
|
1572
|
+
path=Path(out_file_j.value()).resolve(),
|
1573
|
+
)
|
1235
1574
|
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1575
|
+
if exit_code != 0:
|
1576
|
+
for EAR_dep_ID in EAR.get_dependent_EARs(as_objects=False):
|
1577
|
+
# TODO: this needs to be recursive?
|
1578
|
+
self.app.logger.debug(
|
1579
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it depends on"
|
1580
|
+
f" EAR ID {EAR_ID!r}, which exited with a non-zero exit code:"
|
1581
|
+
f" {exit_code!r}."
|
1582
|
+
)
|
1583
|
+
self._store.set_EAR_skip(EAR_dep_ID)
|
1240
1584
|
|
1241
|
-
def
|
1242
|
-
|
1585
|
+
def set_EAR_skip(self, EAR_ID: int) -> None:
|
1586
|
+
"""Record that an EAR is to be skipped due to an upstream failure."""
|
1587
|
+
with self._store.cached_load():
|
1588
|
+
with self.batch_update():
|
1589
|
+
self._store.set_EAR_skip(EAR_ID)
|
1243
1590
|
|
1244
|
-
def
|
1245
|
-
|
1591
|
+
def get_EAR_skipped(self, EAR_ID: int) -> None:
|
1592
|
+
"""Check if an EAR is to be skipped."""
|
1593
|
+
with self._store.cached_load():
|
1594
|
+
return self._store.get_EAR_skipped(EAR_ID)
|
1246
1595
|
|
1247
|
-
def
|
1248
|
-
|
1596
|
+
def set_parameter_value(self, param_id: int, value: Any) -> None:
|
1597
|
+
with self._store.cached_load():
|
1598
|
+
with self.batch_update():
|
1599
|
+
self._store.set_parameter_value(param_id, value)
|
1249
1600
|
|
1250
|
-
def
|
1251
|
-
|
1601
|
+
def elements(self) -> Iterator[app.Element]:
|
1602
|
+
for task in self.tasks:
|
1603
|
+
for element in task.elements[:]:
|
1604
|
+
yield element
|
1252
1605
|
|
1253
|
-
def
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1606
|
+
def get_iteration_task_pathway(self):
|
1607
|
+
pathway = []
|
1608
|
+
for task in self.tasks:
|
1609
|
+
loop_idx = {}
|
1610
|
+
pathway.append((task.insert_ID, loop_idx))
|
1257
1611
|
|
1258
|
-
|
1259
|
-
|
1612
|
+
for loop in self.loops: # TODO: order by depth (inner loops first?)
|
1613
|
+
task_subset = loop.task_insert_IDs
|
1614
|
+
subset_idx = [idx for idx, i in enumerate(pathway) if i[0] in task_subset]
|
1615
|
+
looped_pathway = []
|
1616
|
+
for iter_i in range(loop.num_added_iterations):
|
1617
|
+
for j in subset_idx:
|
1618
|
+
item_j = copy.deepcopy(pathway[j])
|
1619
|
+
item_j[1][loop.name] = iter_i
|
1620
|
+
looped_pathway.append(item_j)
|
1260
1621
|
|
1261
|
-
|
1262
|
-
|
1622
|
+
# replaced pathway `sub_idx` items with `looped_pathway` items:
|
1623
|
+
pathway = replace_items(
|
1624
|
+
pathway, subset_idx[0], subset_idx[-1] + 1, looped_pathway
|
1625
|
+
)
|
1263
1626
|
|
1264
|
-
|
1265
|
-
self, input_source: app.InputSource, new_task_name: str
|
1266
|
-
) -> None:
|
1267
|
-
"""Normalise the input source task reference and convert a source to a local type
|
1268
|
-
if required."""
|
1627
|
+
return pathway
|
1269
1628
|
|
1270
|
-
|
1629
|
+
def _submit(
|
1630
|
+
self,
|
1631
|
+
ignore_errors: Optional[bool] = False,
|
1632
|
+
JS_parallelism: Optional[bool] = None,
|
1633
|
+
print_stdout: Optional[bool] = False,
|
1634
|
+
) -> Tuple[List[Exception], Dict[int, int]]:
|
1635
|
+
"""Submit outstanding EARs for execution."""
|
1271
1636
|
|
1272
|
-
if
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
else:
|
1280
|
-
warn(
|
1281
|
-
f"Changing input source {input_source.to_string()!r} to a local "
|
1282
|
-
f"type, since the input source task reference refers to its own "
|
1283
|
-
f"task."
|
1284
|
-
)
|
1285
|
-
# TODO: add an InputSource source_type setter to reset
|
1286
|
-
# task_ref/source_type?
|
1287
|
-
input_source.source_type = self.app.InputSourceType.LOCAL
|
1288
|
-
input_source.task_ref = None
|
1289
|
-
input_source.task_source_type = None
|
1290
|
-
else:
|
1291
|
-
try:
|
1292
|
-
uniq_names_cur = self.get_task_unique_names(map_to_insert_ID=True)
|
1293
|
-
input_source.task_ref = uniq_names_cur[input_source.task_ref]
|
1294
|
-
except KeyError:
|
1295
|
-
raise InvalidInputSourceTaskReference(
|
1296
|
-
f"Input source {input_source.to_string()!r} refers to a missing "
|
1297
|
-
f"or inaccessible task: {input_source.task_ref!r}."
|
1298
|
-
)
|
1637
|
+
# generate a new submission if there are no pending submissions:
|
1638
|
+
pending = [i for i in self.submissions if i.needs_submit]
|
1639
|
+
if not pending:
|
1640
|
+
new_sub = self._add_submission(JS_parallelism=JS_parallelism)
|
1641
|
+
if not new_sub:
|
1642
|
+
raise ValueError("No pending element action runs to submit!")
|
1643
|
+
pending = [new_sub]
|
1299
1644
|
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
for i in self._store.get_task_elements(task.index, task.insert_ID, selection)
|
1304
|
-
]
|
1645
|
+
self.submissions_path.mkdir(exist_ok=True, parents=True)
|
1646
|
+
self.execution_path.mkdir(exist_ok=True, parents=True)
|
1647
|
+
self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
|
1305
1648
|
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
for i in self._store.get_task_elements_islice(
|
1310
|
-
task.index, task.insert_ID, selection
|
1311
|
-
):
|
1312
|
-
yield self.app.Element(task=task, **i)
|
1313
|
-
|
1314
|
-
def get_EARs_from_IDs(self, indices: List[EAR_ID]) -> List[app.ElementActionRun]:
|
1315
|
-
"""Return element action run objects from a list of five-tuples, representing the
|
1316
|
-
task insert ID, element index, iteration index, action index, and run index,
|
1317
|
-
respectively.
|
1318
|
-
"""
|
1319
|
-
objs = []
|
1320
|
-
for _EAR_ID in indices:
|
1321
|
-
task = self.tasks.get(insert_ID=_EAR_ID.task_insert_ID)
|
1322
|
-
elem_iters = task.elements[_EAR_ID.element_idx].iterations
|
1323
|
-
for i in elem_iters:
|
1324
|
-
if i.index == _EAR_ID.iteration_idx:
|
1325
|
-
iter_i = i
|
1326
|
-
break
|
1327
|
-
EAR_i = iter_i.actions[_EAR_ID.action_idx].runs[_EAR_ID.run_idx]
|
1328
|
-
objs.append(EAR_i)
|
1329
|
-
return objs
|
1649
|
+
# for direct execution the submission must be persistent at submit-time, because
|
1650
|
+
# it will be read by a new instance of the app:
|
1651
|
+
self._store._pending.commit_all()
|
1330
1652
|
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
.
|
1343
|
-
)
|
1344
|
-
objs.append(iter_i)
|
1345
|
-
return objs
|
1653
|
+
# submit all pending submissions:
|
1654
|
+
exceptions = []
|
1655
|
+
submitted_js = {}
|
1656
|
+
for sub in pending:
|
1657
|
+
try:
|
1658
|
+
sub_js_idx = sub.submit(
|
1659
|
+
ignore_errors=ignore_errors,
|
1660
|
+
print_stdout=print_stdout,
|
1661
|
+
)
|
1662
|
+
submitted_js[sub.index] = sub_js_idx
|
1663
|
+
except SubmissionFailure as exc:
|
1664
|
+
exceptions.append(exc)
|
1346
1665
|
|
1347
|
-
|
1348
|
-
"""Return element objects from a list of two-tuples, representing the task insert
|
1349
|
-
ID, and element index, respectively."""
|
1350
|
-
return [
|
1351
|
-
self.tasks.get(insert_ID=idx.task_insert_ID).elements[idx.element_idx]
|
1352
|
-
for idx in indices
|
1353
|
-
]
|
1666
|
+
return exceptions, submitted_js
|
1354
1667
|
|
1355
|
-
def
|
1668
|
+
def submit(
|
1356
1669
|
self,
|
1357
|
-
|
1358
|
-
|
1359
|
-
|
1360
|
-
|
1670
|
+
ignore_errors: Optional[bool] = False,
|
1671
|
+
JS_parallelism: Optional[bool] = None,
|
1672
|
+
print_stdout: Optional[bool] = False,
|
1673
|
+
) -> Dict[int, int]:
|
1361
1674
|
with self._store.cached_load():
|
1362
1675
|
with self.batch_update():
|
1363
|
-
|
1676
|
+
# commit updates before raising exception:
|
1677
|
+
exceptions, submitted_js = self._submit(
|
1678
|
+
ignore_errors=ignore_errors,
|
1679
|
+
JS_parallelism=JS_parallelism,
|
1680
|
+
print_stdout=print_stdout,
|
1681
|
+
)
|
1364
1682
|
|
1365
|
-
|
1366
|
-
|
1367
|
-
|
1368
|
-
jobscript_idx: int,
|
1369
|
-
JS_element_idx: int,
|
1370
|
-
JS_action_idx: int,
|
1371
|
-
) -> None:
|
1372
|
-
"""Set the start time on an EAR."""
|
1373
|
-
with self._store.cached_load():
|
1374
|
-
with self.batch_update():
|
1375
|
-
jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
|
1376
|
-
(t_iD, _, i_idx, a_idx, r_idx, _) = jobscript.get_EAR_ID_array()[
|
1377
|
-
JS_action_idx, JS_element_idx
|
1378
|
-
].item()
|
1379
|
-
self._store.set_EAR_start(t_iD, i_idx, a_idx, r_idx)
|
1683
|
+
if exceptions:
|
1684
|
+
msg = "\n" + "\n\n".join([i.message for i in exceptions])
|
1685
|
+
raise WorkflowSubmissionFailure(msg)
|
1380
1686
|
|
1381
|
-
|
1382
|
-
self,
|
1383
|
-
submission_idx: int,
|
1384
|
-
jobscript_idx: int,
|
1385
|
-
JS_element_idx: int,
|
1386
|
-
JS_action_idx: int,
|
1387
|
-
) -> None:
|
1388
|
-
"""Set the end time on an EAR."""
|
1389
|
-
with self._store.cached_load():
|
1390
|
-
with self.batch_update():
|
1391
|
-
jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
|
1392
|
-
(t_iD, _, i_idx, a_idx, r_idx, _) = jobscript.get_EAR_ID_array()[
|
1393
|
-
JS_action_idx, JS_element_idx
|
1394
|
-
].item()
|
1395
|
-
self._store.set_EAR_end(t_iD, i_idx, a_idx, r_idx)
|
1687
|
+
return submitted_js
|
1396
1688
|
|
1397
|
-
def
|
1398
|
-
self,
|
1399
|
-
submission_idx: int,
|
1400
|
-
jobscript_idx: int,
|
1401
|
-
JS_element_idx: int,
|
1402
|
-
JS_action_idx: int,
|
1403
|
-
):
|
1689
|
+
def add_submission(self, JS_parallelism: Optional[bool] = None) -> app.Submission:
|
1404
1690
|
with self._store.cached_load():
|
1405
|
-
|
1406
|
-
|
1407
|
-
EAR_id = EAR_ID(*id_args)
|
1408
|
-
EAR = self.get_EARs_from_IDs([EAR_id])[0]
|
1409
|
-
|
1410
|
-
return jobscript, EAR
|
1691
|
+
with self.batch_update():
|
1692
|
+
return self._add_submission(JS_parallelism)
|
1411
1693
|
|
1412
|
-
def
|
1413
|
-
self
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
1694
|
+
def _add_submission(self, JS_parallelism: Optional[bool] = None) -> app.Submission:
|
1695
|
+
new_idx = self.num_submissions
|
1696
|
+
_ = self.submissions # TODO: just to ensure `submissions` is loaded
|
1697
|
+
sub_obj = self.app.Submission(
|
1698
|
+
index=new_idx,
|
1699
|
+
workflow=self,
|
1700
|
+
jobscripts=self.resolve_jobscripts(),
|
1701
|
+
JS_parallelism=JS_parallelism,
|
1702
|
+
)
|
1703
|
+
all_EAR_ID = [i for js in sub_obj.jobscripts for i in js.EAR_ID.flatten()]
|
1704
|
+
# EAR_indices = sub_obj.prepare_EAR_submission_idx_update()
|
1705
|
+
if not all_EAR_ID:
|
1706
|
+
print(
|
1707
|
+
f"There are no pending element action runs, so a new submission was not "
|
1708
|
+
f"added."
|
1423
1709
|
)
|
1424
|
-
|
1425
|
-
for param_name, shell_var_name in shell_vars:
|
1426
|
-
commands += jobscript.shell.format_save_parameter(
|
1427
|
-
workflow_app_alias=jobscript.workflow_app_alias,
|
1428
|
-
param_name=param_name,
|
1429
|
-
shell_var_name=shell_var_name,
|
1430
|
-
)
|
1431
|
-
commands = jobscript.shell.wrap_in_subshell(commands)
|
1432
|
-
cmd_file_name = jobscript.get_commands_file_name(JS_action_idx)
|
1433
|
-
with Path(cmd_file_name).open("wt", newline="\n") as fp:
|
1434
|
-
# (assuming we have CD'd correctly to the element run directory)
|
1435
|
-
fp.write(commands)
|
1710
|
+
return
|
1436
1711
|
|
1437
|
-
def save_parameter(
|
1438
|
-
self,
|
1439
|
-
name,
|
1440
|
-
value,
|
1441
|
-
submission_idx: int,
|
1442
|
-
jobscript_idx: int,
|
1443
|
-
JS_element_idx: int,
|
1444
|
-
JS_action_idx: int,
|
1445
|
-
):
|
1446
1712
|
with self._store.cached_load():
|
1447
1713
|
with self.batch_update():
|
1448
|
-
|
1449
|
-
|
1450
|
-
)
|
1451
|
-
data_idx = EAR.data_idx[name]
|
1452
|
-
self._store.set_parameter(data_idx, value)
|
1714
|
+
for i in all_EAR_ID:
|
1715
|
+
self._store.set_EAR_submission_index(EAR_ID=i, sub_idx=new_idx)
|
1453
1716
|
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
JS_element_idx: int,
|
1460
|
-
JS_action_idx: int,
|
1461
|
-
):
|
1462
|
-
"""Save multiple parameters to a given EAR."""
|
1717
|
+
# self.set_EAR_submission_indices(sub_idx=new_idx, EAR_indices=EAR_indices)
|
1718
|
+
|
1719
|
+
sub_obj_js, _ = sub_obj.to_json_like()
|
1720
|
+
self._submissions.append(sub_obj)
|
1721
|
+
self._pending["submissions"].append(new_idx)
|
1463
1722
|
with self._store.cached_load():
|
1464
1723
|
with self.batch_update():
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
for name, value in values.items():
|
1469
|
-
data_idx = EAR.data_idx[name]
|
1470
|
-
self._store.set_parameter(data_idx, value)
|
1724
|
+
self._store.add_submission(new_idx, sub_obj_js)
|
1725
|
+
|
1726
|
+
return self.submissions[new_idx]
|
1471
1727
|
|
1472
1728
|
def resolve_jobscripts(self) -> List[app.Jobscript]:
|
1473
1729
|
js, element_deps = self._resolve_singular_jobscripts()
|
@@ -1492,6 +1748,8 @@ class Workflow:
|
|
1492
1748
|
-------
|
1493
1749
|
submission_jobscripts
|
1494
1750
|
all_element_deps
|
1751
|
+
For a given jobscript index, for a given jobscript element index within that
|
1752
|
+
jobscript, this is a list of EAR IDs dependencies of that element.
|
1495
1753
|
|
1496
1754
|
"""
|
1497
1755
|
|
@@ -1520,8 +1778,8 @@ class Workflow:
|
|
1520
1778
|
len(task_actions),
|
1521
1779
|
len(task_elements[task.insert_ID]),
|
1522
1780
|
)
|
1523
|
-
|
1524
|
-
|
1781
|
+
EAR_ID_arr = np.empty(EAR_idx_arr_shape, dtype=np.int32)
|
1782
|
+
EAR_ID_arr[:] = -1
|
1525
1783
|
|
1526
1784
|
new_js_idx = len(submission_jobscripts)
|
1527
1785
|
|
@@ -1530,8 +1788,8 @@ class Workflow:
|
|
1530
1788
|
"task_loop_idx": [loop_idx_i],
|
1531
1789
|
"task_actions": task_actions, # map jobscript actions to task actions
|
1532
1790
|
"task_elements": task_elements, # map jobscript elements to task elements
|
1533
|
-
"EARs": {}, # keys are (task insert ID, elem_idx, EAR_idx)
|
1534
|
-
"
|
1791
|
+
# "EARs": {}, # keys are (task insert ID, elem_idx, EAR_idx)
|
1792
|
+
"EAR_ID": EAR_ID_arr,
|
1535
1793
|
"resources": res[js_dat["resources"]],
|
1536
1794
|
"resource_hash": res_hash[js_dat["resources"]],
|
1537
1795
|
"dependencies": {},
|
@@ -1540,43 +1798,51 @@ class Workflow:
|
|
1540
1798
|
js_elem_idx = task_elements[task.insert_ID].index((elem_idx))
|
1541
1799
|
all_EAR_IDs = []
|
1542
1800
|
for act_idx in act_indices:
|
1543
|
-
|
1544
|
-
|
1545
|
-
)
|
1801
|
+
EAR_ID_i = EAR_map[act_idx, elem_idx].item()
|
1802
|
+
# EAR_idx, run_idx, iter_idx = (
|
1803
|
+
# i.item() for i in EAR_map[act_idx, elem_idx]
|
1804
|
+
# )
|
1546
1805
|
# construct EAR_ID object so we can retrieve the EAR objects and
|
1547
1806
|
# so their dependencies:
|
1548
|
-
EAR_id = EAR_ID(
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
)
|
1556
|
-
all_EAR_IDs.append(
|
1557
|
-
js_i["EARs"][(task.insert_ID, elem_idx, EAR_idx)] = (
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1561
|
-
)
|
1807
|
+
# EAR_id = EAR_ID(
|
1808
|
+
# task_insert_ID=task.insert_ID,
|
1809
|
+
# element_idx=elem_idx,
|
1810
|
+
# iteration_idx=iter_idx,
|
1811
|
+
# action_idx=act_idx,
|
1812
|
+
# run_idx=run_idx,
|
1813
|
+
# EAR_idx=EAR_idx,
|
1814
|
+
# )
|
1815
|
+
all_EAR_IDs.append(EAR_ID_i)
|
1816
|
+
# js_i["EARs"][(task.insert_ID, elem_idx, EAR_idx)] = (
|
1817
|
+
# iter_idx,
|
1818
|
+
# act_idx,
|
1819
|
+
# run_idx,
|
1820
|
+
# )
|
1562
1821
|
|
1563
1822
|
js_act_idx = task_actions.index([task.insert_ID, act_idx, 0])
|
1564
|
-
js_i["EAR_idx"][js_act_idx][js_elem_idx] = EAR_idx
|
1823
|
+
# js_i["EAR_idx"][js_act_idx][js_elem_idx] = EAR_idx
|
1824
|
+
js_i["EAR_ID"][js_act_idx][js_elem_idx] = EAR_ID_i
|
1565
1825
|
|
1566
1826
|
# get indices of EARs that this element depends on:
|
1567
1827
|
EAR_objs = self.get_EARs_from_IDs(all_EAR_IDs)
|
1568
1828
|
EAR_deps = [i.get_EAR_dependencies() for i in EAR_objs]
|
1569
1829
|
EAR_deps_flat = [j for i in EAR_deps for j in i]
|
1570
1830
|
|
1831
|
+
# print(f"{EAR_deps=}")
|
1832
|
+
# print(f"{EAR_deps_flat=}")
|
1833
|
+
|
1571
1834
|
# represent EAR dependencies of this jobscripts using the same key
|
1572
1835
|
# format as in the "EARs" dict, to allow for quick lookup when
|
1573
1836
|
# resolving dependencies between jobscripts; also, no need to include
|
1574
1837
|
# EAR dependencies that are in this jobscript:
|
1838
|
+
# EAR_deps_EAR_idx = [
|
1839
|
+
# (i.task_insert_ID, i.element_idx, i.EAR_idx)
|
1840
|
+
# for i in EAR_deps_flat
|
1841
|
+
# if (i.task_insert_ID, i.element_idx, i.EAR_idx)
|
1842
|
+
# not in js_i["EARs"]
|
1843
|
+
# ]
|
1575
1844
|
EAR_deps_EAR_idx = [
|
1576
|
-
|
1577
|
-
for i in EAR_deps_flat
|
1578
|
-
if (i.task_insert_ID, i.element_idx, i.EAR_idx)
|
1579
|
-
not in js_i["EARs"]
|
1845
|
+
i for i in EAR_deps_flat if i not in js_i["EAR_ID"]
|
1580
1846
|
]
|
1581
1847
|
if EAR_deps_EAR_idx:
|
1582
1848
|
if new_js_idx not in all_element_deps:
|
@@ -1588,43 +1854,65 @@ class Workflow:
|
|
1588
1854
|
|
1589
1855
|
return submission_jobscripts, all_element_deps
|
1590
1856
|
|
1591
|
-
def
|
1592
|
-
|
1593
|
-
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1609
|
-
|
1610
|
-
)
|
1857
|
+
def write_commands(
|
1858
|
+
self,
|
1859
|
+
submission_idx: int,
|
1860
|
+
jobscript_idx: int,
|
1861
|
+
JS_action_idx: int,
|
1862
|
+
EAR_ID: int,
|
1863
|
+
) -> None:
|
1864
|
+
"""Write run-time commands for a given EAR."""
|
1865
|
+
with self._store.cached_load():
|
1866
|
+
jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
|
1867
|
+
EAR = self.get_EARs_from_IDs([EAR_ID])[0]
|
1868
|
+
commands, shell_vars = EAR.compose_commands(jobscript)
|
1869
|
+
for param_name, shell_var_name in shell_vars:
|
1870
|
+
commands += jobscript.shell.format_save_parameter(
|
1871
|
+
workflow_app_alias=jobscript.workflow_app_alias,
|
1872
|
+
param_name=param_name,
|
1873
|
+
shell_var_name=shell_var_name,
|
1874
|
+
EAR_ID=EAR_ID,
|
1875
|
+
)
|
1876
|
+
commands = jobscript.shell.wrap_in_subshell(commands, EAR.action.abortable)
|
1877
|
+
cmd_file_name = jobscript.get_commands_file_name(JS_action_idx)
|
1878
|
+
with Path(cmd_file_name).open("wt", newline="\n") as fp:
|
1879
|
+
# (assuming we have CD'd correctly to the element run directory)
|
1880
|
+
fp.write(commands)
|
1611
1881
|
|
1612
|
-
|
1882
|
+
def save_parameter(
|
1883
|
+
self,
|
1884
|
+
name: str,
|
1885
|
+
value: Any,
|
1886
|
+
EAR_ID: int,
|
1887
|
+
):
|
1888
|
+
with self._store.cached_load():
|
1889
|
+
with self.batch_update():
|
1890
|
+
EAR = self.get_EARs_from_IDs([EAR_ID])[0]
|
1891
|
+
param_id = EAR.data_idx[name]
|
1892
|
+
self.set_parameter_value(param_id, value)
|
1613
1893
|
|
1614
1894
|
def show_all_EAR_statuses(self):
|
1615
1895
|
print(
|
1616
1896
|
f"{'task':8s} {'element':8s} {'iteration':8s} {'action':8s} "
|
1617
|
-
f"{'run':8s} {'
|
1897
|
+
f"{'run':8s} {'sub.':8s} {'exitcode':8s} {'success':8s} {'skip':8s}"
|
1618
1898
|
)
|
1619
1899
|
for task in self.tasks:
|
1620
|
-
for element in task.elements:
|
1900
|
+
for element in task.elements[:]:
|
1621
1901
|
for iter_idx, iteration in enumerate(element.iterations):
|
1622
1902
|
for act_idx, action_runs in iteration.actions.items():
|
1623
1903
|
for run_idx, EAR in enumerate(action_runs.runs):
|
1904
|
+
suc = EAR.success if EAR.success is not None else "-"
|
1905
|
+
if EAR.exit_code is not None:
|
1906
|
+
exc = f"{EAR.exit_code:^8d}"
|
1907
|
+
else:
|
1908
|
+
exc = f"{'-':^8}"
|
1624
1909
|
print(
|
1625
1910
|
f"{task.insert_ID:^8d} {element.index:^8d} "
|
1626
1911
|
f"{iter_idx:^8d} {act_idx:^8d} {run_idx:^8d} "
|
1627
1912
|
f"{EAR.submission_status.name.lower():^8s}"
|
1913
|
+
f"{exc}"
|
1914
|
+
f"{suc:^8}"
|
1915
|
+
f"{EAR.skip:^8}"
|
1628
1916
|
)
|
1629
1917
|
|
1630
1918
|
|