hpcflow-new2 0.2.0a50__py3-none-any.whl → 0.2.0a52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. hpcflow/_version.py +1 -1
  2. hpcflow/sdk/__init__.py +1 -1
  3. hpcflow/sdk/api.py +1 -1
  4. hpcflow/sdk/app.py +20 -11
  5. hpcflow/sdk/cli.py +34 -59
  6. hpcflow/sdk/core/__init__.py +13 -1
  7. hpcflow/sdk/core/actions.py +235 -126
  8. hpcflow/sdk/core/command_files.py +32 -24
  9. hpcflow/sdk/core/element.py +110 -114
  10. hpcflow/sdk/core/errors.py +57 -0
  11. hpcflow/sdk/core/loop.py +18 -34
  12. hpcflow/sdk/core/parameters.py +5 -3
  13. hpcflow/sdk/core/task.py +135 -131
  14. hpcflow/sdk/core/task_schema.py +11 -4
  15. hpcflow/sdk/core/utils.py +110 -2
  16. hpcflow/sdk/core/workflow.py +964 -676
  17. hpcflow/sdk/data/template_components/environments.yaml +0 -44
  18. hpcflow/sdk/data/template_components/task_schemas.yaml +52 -10
  19. hpcflow/sdk/persistence/__init__.py +21 -33
  20. hpcflow/sdk/persistence/base.py +1340 -458
  21. hpcflow/sdk/persistence/json.py +424 -546
  22. hpcflow/sdk/persistence/pending.py +563 -0
  23. hpcflow/sdk/persistence/store_resource.py +131 -0
  24. hpcflow/sdk/persistence/utils.py +57 -0
  25. hpcflow/sdk/persistence/zarr.py +852 -841
  26. hpcflow/sdk/submission/jobscript.py +133 -112
  27. hpcflow/sdk/submission/shells/bash.py +62 -16
  28. hpcflow/sdk/submission/shells/powershell.py +87 -16
  29. hpcflow/sdk/submission/submission.py +59 -35
  30. hpcflow/tests/unit/test_element.py +4 -9
  31. hpcflow/tests/unit/test_persistence.py +218 -0
  32. hpcflow/tests/unit/test_task.py +11 -12
  33. hpcflow/tests/unit/test_utils.py +82 -0
  34. hpcflow/tests/unit/test_workflow.py +3 -1
  35. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/METADATA +3 -1
  36. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/RECORD +38 -34
  37. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/WHEEL +0 -0
  38. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/entry_points.txt +0 -0
@@ -3,14 +3,27 @@ from contextlib import contextmanager
3
3
  import copy
4
4
  from dataclasses import dataclass, field
5
5
  from datetime import datetime, timezone
6
+
6
7
  from pathlib import Path
7
- from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
8
- from warnings import warn
8
+ import random
9
+ import string
10
+ import time
11
+ from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
12
+ from fsspec.implementations.local import LocalFileSystem
13
+ from fsspec.implementations.zip import ZipFileSystem
9
14
 
10
15
  import numpy as np
16
+ from fsspec.core import url_to_fs
11
17
 
12
18
  from hpcflow.sdk import app
13
- from hpcflow.sdk.core.actions import EAR_ID
19
+ from hpcflow.sdk.core import (
20
+ ALL_TEMPLATE_FORMATS,
21
+ DEFAULT_TEMPLATE_FORMAT,
22
+ ABORT_EXIT_CODE,
23
+ )
24
+ from hpcflow.sdk.persistence import store_cls_from_str_NEW, DEFAULT_STORE_FORMAT
25
+ from hpcflow.sdk.persistence.base import TEMPLATE_COMP_TYPES, AnySEAR
26
+ from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc, infer_store
14
27
  from hpcflow.sdk.submission.jobscript import (
15
28
  generate_EAR_resource_map,
16
29
  group_resource_map_into_jobscripts,
@@ -28,22 +41,10 @@ from .utils import (
28
41
  replace_items,
29
42
  )
30
43
  from hpcflow.sdk.core.errors import (
31
- InvalidInputSourceTaskReference,
32
44
  LoopAlreadyExistsError,
33
45
  SubmissionFailure,
34
- WorkflowBatchUpdateFailedError,
35
- WorkflowNotFoundError,
36
46
  WorkflowSubmissionFailure,
37
47
  )
38
- from hpcflow.sdk.persistence import (
39
- store_cls_from_path,
40
- store_cls_from_str,
41
- temporary_workflow_rename,
42
- DEFAULT_STORE_FORMAT,
43
- )
44
-
45
- DEFAULT_TEMPLATE_FORMAT = "yaml"
46
- ALL_TEMPLATE_FORMATS = ("yaml", "json")
47
48
 
48
49
 
49
50
  class _DummyPersistentWorkflow:
@@ -63,7 +64,7 @@ class _DummyPersistentWorkflow:
63
64
  return self._data_ref[-1]
64
65
 
65
66
  def get_parameter_data(self, data_idx):
66
- return (True, self._parameters[self._data_ref.index(data_idx)])
67
+ return self._parameters[self._data_ref.index(data_idx)]
67
68
 
68
69
  def make_persistent(self, workflow: app.Workflow):
69
70
  for dat_i, source_i in zip(self._parameters, self._sources):
@@ -265,32 +266,75 @@ class WorkflowTemplate(JSONLike):
265
266
  self.loops.append(loop)
266
267
 
267
268
 
268
- class Workflow:
269
- """Class to represent a persistent {app_name} workflow."""
269
+ def resolve_fsspec(path: PathLike, **kwargs) -> Tuple[Any, str, str]:
270
+ """
271
+ Parameters
272
+ ----------
273
+ kwargs
274
+ This can include a `password` key, for connections via SSH.
275
+
276
+ """
277
+
278
+ path = str(path)
279
+ if path.endswith(".zip"):
280
+ # `url_to_fs` does not seem to work for zip combos e.g. `zip::ssh://`, so we
281
+ # construct a `ZipFileSystem` ourselves and assume it is signified only by the
282
+ # file extension:
283
+ fs, pw = ask_pw_on_auth_exc(
284
+ ZipFileSystem,
285
+ fo=path,
286
+ mode="r",
287
+ target_options=kwargs or {},
288
+ add_pw_to="target_options",
289
+ )
290
+ path = ""
270
291
 
271
- _app_attr = "app"
292
+ else:
293
+ (fs, path), pw = ask_pw_on_auth_exc(url_to_fs, str(path), **kwargs)
294
+ path = str(Path(path).as_posix())
295
+ if isinstance(fs, LocalFileSystem):
296
+ path = str(Path(path).resolve())
297
+
298
+ return fs, path, pw
272
299
 
300
+
301
+ class Workflow:
302
+ _app_attr = "app"
273
303
  _default_ts_fmt = r"%Y-%m-%d %H:%M:%S.%f"
274
304
  _default_ts_name_fmt = r"%Y-%m-%d_%H%M%S"
305
+ _input_files_dir_name = "input_files"
306
+ _exec_dir_name = "execute"
307
+
308
+ def __init__(
309
+ self,
310
+ path: Union[str, Path],
311
+ store_fmt: Optional[str] = None,
312
+ fs_kwargs: Optional[Dict] = None,
313
+ ):
314
+ """
315
+ Parameters
316
+ ----------
275
317
 
276
- def __init__(self, path: PathLike) -> None:
277
- self.path = Path(path).resolve()
278
- if not self.path.is_dir():
279
- raise WorkflowNotFoundError(f"No workflow found at path: {self.path}")
318
+ path :
280
319
 
281
- # assigned on first access to corresponding properties:
282
- self._ts_fmt = None
283
- self._ts_name_fmt = None
284
- self._creation_info = None
320
+ """
321
+
322
+ fs_path = str(path)
323
+ fs, path, _ = resolve_fsspec(fs_path or "", **(fs_kwargs or {}))
324
+ store_fmt = store_fmt or infer_store(fs_path, fs)
325
+ store_cls = store_cls_from_str_NEW(store_fmt)
285
326
 
327
+ self.path = path
328
+
329
+ self._creation_info = None
330
+ self._fs_path = None
286
331
  self._template = None
287
332
  self._template_components = None
288
333
  self._tasks = None
289
334
  self._loops = None
290
335
  self._submissions = None
291
336
 
292
- self._store = store_cls_from_path(self.path)(self)
293
-
337
+ self._store = store_cls(self.app, self, self.path, fs)
294
338
  self._in_batch_mode = False # flag to track when processing batch updates
295
339
 
296
340
  # store indices of updates during batch update, so we can revert on failure:
@@ -300,54 +344,16 @@ class Workflow:
300
344
  def name(self):
301
345
  """The workflow name may be different from the template name, as it includes the
302
346
  creation date-timestamp if generated."""
303
- return self.path.parts[-1]
304
-
305
- def _get_empty_pending(self) -> Dict:
306
- return {
307
- "template_components": {k: [] for k in self.app._template_component_types},
308
- "tasks": [], # list of int
309
- "loops": [], # list of int
310
- "submissions": [], # list of int
311
- }
312
-
313
- def _accept_pending(self) -> None:
314
- self._reset_pending()
315
-
316
- def _reset_pending(self) -> None:
317
- self._pending = self._get_empty_pending()
318
-
319
- def _reject_pending(self) -> None:
320
- """Revert pending changes to the in-memory representation of the workflow.
321
-
322
- This deletes new tasks, new template component data, new loops, and new
323
- submissions. Element additions to existing (non-pending) tasks are separately
324
- rejected/accepted by the WorkflowTask object.
325
-
326
- """
327
- for task_idx in self._pending["tasks"][::-1]:
328
- # iterate in reverse so the index references are correct
329
- self.tasks._remove_object(task_idx)
330
- self.template.tasks.pop(task_idx)
331
-
332
- for comp_type, comp_indices in self._pending["template_components"].items():
333
- for comp_idx in comp_indices[::-1]:
334
- # iterate in reverse so the index references are correct
335
- self.template_components[comp_type]._remove_object(comp_idx)
336
-
337
- for loop_idx in self._pending["loops"][::-1]:
338
- # iterate in reverse so the index references are correct
339
- self.loops._remove_object(loop_idx)
340
- self.template.loops.pop(loop_idx)
341
-
342
- for sub_idx in self._pending["submissions"][::-1]:
343
- # iterate in reverse so the index references are correct
344
- self._submissions.pop(sub_idx)
345
-
346
- self._reset_pending()
347
+ # TODO: this won't work for zip stores
348
+ return str(Path(self.fs_path).parts[-1])
347
349
 
348
350
  @property
349
351
  def store_format(self):
350
- return self._store.store_name
352
+ return self._store._name
353
+
354
+ @property
355
+ def num_tasks(self) -> int:
356
+ return len(self.tasks)
351
357
 
352
358
  @classmethod
353
359
  def from_template(
@@ -440,7 +446,7 @@ class Workflow:
440
446
  The datetime format to use when generating the workflow name, where it
441
447
  includes a timestamp.
442
448
  """
443
- template = self.app.WorkflowTemplate.from_YAML_file(YAML_path)
449
+ template = cls.app.WorkflowTemplate.from_YAML_file(YAML_path)
444
450
  return cls.from_template(
445
451
  template,
446
452
  path,
@@ -488,7 +494,7 @@ class Workflow:
488
494
  The datetime format to use when generating the workflow name, where it
489
495
  includes a timestamp.
490
496
  """
491
- template = self.app.WorkflowTemplate.from_YAML_string(YAML_str)
497
+ template = cls.app.WorkflowTemplate.from_YAML_string(YAML_str)
492
498
  return cls.from_template(
493
499
  template,
494
500
  path,
@@ -713,193 +719,151 @@ class Workflow:
713
719
  ts_name_fmt,
714
720
  )
715
721
 
716
- @contextmanager
717
- def batch_update(self, is_workflow_creation: bool = False) -> Iterator[None]:
718
- """A context manager that batches up structural changes to the workflow and
719
- commits them to disk all together when the context manager exits."""
722
+ def _add_empty_task(
723
+ self,
724
+ task: app.Task,
725
+ new_index: Optional[int] = None,
726
+ ) -> app.WorkflowTask:
727
+ if new_index is None:
728
+ new_index = self.num_tasks
720
729
 
721
- if self._in_batch_mode:
722
- yield
723
- else:
724
- try:
725
- self._in_batch_mode = True
726
- yield
730
+ insert_ID = self.num_added_tasks
727
731
 
728
- except Exception as err:
729
- print("batch update exception!")
732
+ # make a copy with persistent schema inputs:
733
+ task_c, _ = task.to_persistent(self, insert_ID)
730
734
 
731
- self._in_batch_mode = False
732
- self._store.reject_pending()
735
+ # add to the WorkflowTemplate:
736
+ self.template._add_empty_task(task_c, new_index, insert_ID)
733
737
 
734
- for task in self.tasks:
735
- task._reset_pending_elements()
738
+ # create and insert a new WorkflowTask:
739
+ self.tasks.add_object(
740
+ self.app.WorkflowTask.new_empty_task(self, task_c, new_index),
741
+ index=new_index,
742
+ )
736
743
 
737
- for loop in self.loops:
738
- loop._reset_pending_num_added_iters()
744
+ # update persistent store:
745
+ task_js, temp_comps_js = task_c.to_json_like()
746
+ self._store.add_template_components(temp_comps_js)
747
+ self._store.add_task(new_index, task_js)
739
748
 
740
- self._reject_pending()
749
+ # update in-memory workflow template components:
750
+ temp_comps = self.app.template_components_from_json_like(temp_comps_js)
751
+ for comp_type, comps in temp_comps.items():
752
+ for comp in comps:
753
+ comp._set_hash()
754
+ if comp not in self.template_components[comp_type]:
755
+ idx = self.template_components[comp_type].add_object(comp)
756
+ self._pending["template_components"][comp_type].append(idx)
741
757
 
742
- if is_workflow_creation:
743
- # creation failed, so no need to keep the newly generated workflow:
744
- self._store.delete_no_confirm()
745
- self._store.reinstate_replaced_dir()
758
+ self._pending["tasks"].append(new_index)
746
759
 
747
- raise err
760
+ return self.tasks[new_index]
748
761
 
749
- else:
750
- if self._store.has_pending:
751
- is_diff = self._store.is_modified_on_disk()
752
- if is_diff:
753
- raise WorkflowBatchUpdateFailedError(
754
- f"Workflow modified on disk since it was loaded!"
755
- )
762
+ def _add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
763
+ new_wk_task = self._add_empty_task(task=task, new_index=new_index)
764
+ new_wk_task._add_elements(element_sets=task.element_sets) # TODO
756
765
 
757
- for task in self.tasks:
758
- task._accept_pending_elements()
766
+ def add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
767
+ with self._store.cached_load():
768
+ with self.batch_update():
769
+ self._add_task(task, new_index=new_index)
759
770
 
760
- for loop in self.loops:
761
- loop._accept_pending_num_added_iters()
771
+ def add_task_after(self, new_task: app.Task, task_ref: app.Task = None) -> None:
772
+ """Add a new task after the specified task.
762
773
 
763
- self._store.remove_replaced_dir()
764
- # TODO: handle errors in commit pending?
765
- self._store.commit_pending()
766
- self._accept_pending()
767
- self._in_batch_mode = False
774
+ Parameters
775
+ ----------
776
+ task_ref
777
+ If not given, the new task will be added at the end of the workflow.
768
778
 
769
- @classmethod
770
- def _write_empty_workflow(
771
- cls,
772
- template: app.WorkflowTemplate,
773
- path: Optional[PathLike] = None,
774
- name: Optional[str] = None,
775
- overwrite: Optional[bool] = False,
776
- store: Optional[str] = DEFAULT_STORE_FORMAT,
777
- ts_fmt: Optional[str] = None,
778
- ts_name_fmt: Optional[str] = None,
779
- ) -> app.Workflow:
780
779
  """
780
+ new_index = task_ref.index + 1 if task_ref else None
781
+ self.add_task(new_task, new_index)
782
+ # TODO: add new downstream elements?
783
+
784
+ def add_task_before(self, new_task: app.Task, task_ref: app.Task = None) -> None:
785
+ """Add a new task before the specified task.
786
+
781
787
  Parameters
782
788
  ----------
783
- path
784
- The directory in which the workflow will be generated. The current directory
785
- if not specified.
786
- name
787
- The name of the workflow. If specified, the workflow directory will be `path`
788
- joined with `name`. If not specified the WorkflowTemplate name will be used,
789
- in combination with a date-timestamp.
790
- overwrite
791
- If True and the workflow directory (`path` + `name`) already exists, the
792
- existing directory will be overwritten.
793
- store
794
- The persistent store to use for this workflow.
795
- ts_fmt
796
- The datetime format to use for storing datetimes. Datetimes are always stored
797
- in UTC (because Numpy does not store time zone info), so this should not
798
- include a time zone name.
799
- ts_name_fmt
800
- The datetime format to use when generating the workflow name, where it
801
- includes a timestamp.
802
- """
789
+ task_ref
790
+ If not given, the new task will be added at the beginning of the workflow.
803
791
 
804
- ts = datetime.now()
792
+ """
793
+ new_index = task_ref.index if task_ref else 0
794
+ self.add_task(new_task, new_index)
795
+ # TODO: add new downstream elements?
805
796
 
806
- # store all times in UTC, since Numpy doesn't support time zone info:
807
- ts_utc = ts.astimezone(tz=timezone.utc)
797
+ def _add_empty_loop(self, loop: app.Loop) -> app.WorkflowLoop:
798
+ """Add a new loop (zeroth iterations only) to the workflow."""
808
799
 
809
- ts_name_fmt = ts_name_fmt or cls._default_ts_name_fmt
810
- ts_fmt = ts_fmt or cls._default_ts_fmt
800
+ new_index = self.num_loops
811
801
 
812
- path = Path(path or "").resolve()
813
- name = name or f"{template.name}_{ts.strftime(ts_name_fmt)}"
814
- workflow_path = path.joinpath(name)
802
+ # don't modify passed object:
803
+ loop_c = copy.deepcopy(loop)
815
804
 
816
- replaced_dir = None
817
- if workflow_path.exists():
818
- if overwrite:
819
- replaced_dir = temporary_workflow_rename(workflow_path)
820
- else:
821
- raise ValueError(f"Path already exists: {workflow_path}.")
805
+ # add to the WorkflowTemplate:
806
+ self.template._add_empty_loop(loop_c)
822
807
 
823
- # make template-level inputs/resources think they are persistent:
824
- wk_dummy = _DummyPersistentWorkflow()
825
- param_src = {"type": "workflow_resources"}
826
- for res_i in template.resources:
827
- res_i.make_persistent(wk_dummy, param_src)
808
+ # create and insert a new WorkflowLoop:
809
+ self.loops.add_object(
810
+ self.app.WorkflowLoop.new_empty_loop(
811
+ index=new_index,
812
+ workflow=self,
813
+ template=loop_c,
814
+ )
815
+ )
816
+ wk_loop = self.loops[new_index]
828
817
 
829
- template_js, template_sh = template.to_json_like(exclude=["tasks", "loops"])
830
- template_js["tasks"] = []
831
- template_js["loops"] = []
818
+ loop_js, _ = loop_c.to_json_like()
832
819
 
833
- creation_info = {
834
- "app_info": cls.app.get_info(),
835
- "create_time": ts_utc.strftime(ts_fmt),
836
- "ts_fmt": ts_fmt,
837
- "ts_name_fmt": ts_name_fmt,
838
- }
820
+ # all these element iterations will be initialised for the new loop:
821
+ iter_IDs = [
822
+ i.id_ for i in self.get_element_iterations_of_tasks(loop_c.task_insert_IDs)
823
+ ]
839
824
 
840
- store_cls = store_cls_from_str(store)
841
- store_cls.write_empty_workflow(
842
- template_js=template_js,
843
- template_components_js=template_sh,
844
- workflow_path=workflow_path,
845
- replaced_dir=replaced_dir,
846
- creation_info=creation_info,
825
+ # update persistent store:
826
+ self._store.add_loop(
827
+ loop_template=loop_js,
828
+ iterable_parameters=wk_loop.iterable_parameters,
829
+ iter_IDs=iter_IDs,
847
830
  )
848
- wk = cls(workflow_path)
849
831
 
850
- # actually make template inputs/resources persistent, now the workflow exists:
851
- wk_dummy.make_persistent(wk)
832
+ self._pending["loops"].append(new_index)
852
833
 
853
- return wk
834
+ return wk_loop
854
835
 
855
- @property
856
- def ts_fmt(self):
857
- if not self._ts_fmt:
858
- self._ts_fmt = self._store.get_creation_info()["ts_fmt"]
859
- return self._ts_fmt
836
+ def _add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
837
+ new_wk_loop = self._add_empty_loop(loop)
838
+ if loop.num_iterations is not None:
839
+ # fixed number of iterations, so add remaining N > 0 iterations:
840
+ for _ in range(loop.num_iterations - 1):
841
+ new_wk_loop.add_iteration(parent_loop_indices=parent_loop_indices)
842
+
843
+ def add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
844
+ """Add a loop to a subset of workflow tasks."""
845
+ with self._store.cached_load():
846
+ with self.batch_update():
847
+ self._add_loop(loop, parent_loop_indices)
860
848
 
861
849
  @property
862
- def ts_name_fmt(self):
863
- if not self._ts_name_fmt:
864
- self._ts_name_fmt = self._store.get_creation_info()["ts_name_fmt"]
865
- return self._ts_name_fmt
850
+ def fs_path(self):
851
+ if not self._fs_path:
852
+ self._fs_path = self._store.get_fs_path()
853
+ return self._fs_path
866
854
 
867
855
  @property
868
856
  def creation_info(self):
869
857
  if not self._creation_info:
870
- with self._store.cached_load():
871
- info = self._store.get_creation_info()
872
- info["create_time"] = datetime.strptime(
873
- info["create_time"], info["ts_fmt"]
874
- ).replace(tzinfo=timezone.utc)
875
- self._creation_info = info
858
+ info = self._store.get_creation_info()
859
+ info["create_time"] = (
860
+ datetime.strptime(info["create_time"], info["ts_fmt"])
861
+ .replace(tzinfo=timezone.utc)
862
+ .astimezone()
863
+ )
864
+ self._creation_info = info
876
865
  return self._creation_info
877
866
 
878
- @property
879
- def num_tasks(self) -> int:
880
- return len(self.tasks)
881
-
882
- @property
883
- def num_added_tasks(self) -> int:
884
- with self._store.cached_load():
885
- return self._store.get_num_added_tasks()
886
-
887
- @property
888
- def num_elements(self) -> int:
889
- return sum(task.num_elements for task in self.tasks)
890
-
891
- @property
892
- def num_element_iterations(self) -> int:
893
- return sum(task.num_element_iterations for task in self.tasks)
894
-
895
- @property
896
- def num_loops(self) -> int:
897
- return len(self.loops)
898
-
899
- @property
900
- def num_submissions(self) -> int:
901
- return len(self.submissions)
902
-
903
867
  @property
904
868
  def template_components(self) -> Dict:
905
869
  if self._template_components is None:
@@ -913,6 +877,11 @@ class Workflow:
913
877
  if self._template is None:
914
878
  with self._store.cached_load():
915
879
  temp_js = self._store.get_template()
880
+
881
+ # TODO: insert_ID and id_ are the same thing:
882
+ for task in temp_js["tasks"]:
883
+ task.pop("id_", None)
884
+
916
885
  template = self.app.WorkflowTemplate.from_json_like(
917
886
  temp_js, self.template_components
918
887
  )
@@ -925,19 +894,18 @@ class Workflow:
925
894
  def tasks(self) -> app.WorkflowTaskList:
926
895
  if self._tasks is None:
927
896
  with self._store.cached_load():
928
- tasks_meta = self._store.get_all_tasks_metadata()
897
+ all_tasks = self._store.get_tasks()
929
898
  wk_tasks = []
930
- for idx, i in enumerate(tasks_meta):
899
+ for i in all_tasks:
931
900
  wk_task = self.app.WorkflowTask(
932
901
  workflow=self,
933
- template=self.template.tasks[idx],
934
- index=idx,
935
- num_elements=i["num_elements"],
936
- num_element_iterations=i["num_element_iterations"],
937
- num_EARs=i["num_EARs"],
902
+ template=self.template.tasks[i.index],
903
+ index=i.index,
904
+ element_IDs=i.element_IDs,
938
905
  )
939
906
  wk_tasks.append(wk_task)
940
907
  self._tasks = self.app.WorkflowTaskList(wk_tasks)
908
+
941
909
  return self._tasks
942
910
 
943
911
  @property
@@ -945,12 +913,13 @@ class Workflow:
945
913
  if self._loops is None:
946
914
  with self._store.cached_load():
947
915
  wk_loops = []
948
- for idx, loop_dat in enumerate(self._store.get_loops()):
916
+ for idx, loop_dat in self._store.get_loops().items():
949
917
  wk_loop = self.app.WorkflowLoop(
950
918
  index=idx,
951
919
  workflow=self,
952
920
  template=self.template.loops[idx],
953
- **loop_dat,
921
+ num_added_iterations=loop_dat["num_added_iterations"],
922
+ iterable_parameters=loop_dat["iterable_parameters"],
954
923
  )
955
924
  wk_loops.append(wk_loop)
956
925
  self._loops = self.app.WorkflowLoopList(wk_loops)
@@ -961,134 +930,466 @@ class Workflow:
961
930
  if self._submissions is None:
962
931
  with self._store.cached_load():
963
932
  subs = []
964
- for idx, sub_dat in enumerate(self._store.get_submissions()):
965
- sub_js = {"index": idx, "workflow": self, **sub_dat}
933
+ for idx, sub_dat in self._store.get_submissions().items():
934
+ sub_js = {"index": idx, **sub_dat}
966
935
  sub = self.app.Submission.from_json_like(sub_js)
936
+ sub.workflow = self
967
937
  subs.append(sub)
968
938
  self._submissions = subs
969
939
  return self._submissions
970
940
 
971
941
  @property
972
- def artifacts_path(self):
973
- # TODO: allow customisation of artifacts path at submission and resources level
974
- return self.path / "artifacts"
942
+ def num_added_tasks(self) -> int:
943
+ return self._store._get_num_total_added_tasks()
975
944
 
976
- @property
977
- def submissions_path(self):
978
- return self.artifacts_path / "submissions"
945
+ def get_store_EARs(self, id_lst: Iterable[int]) -> List[AnySEAR]:
946
+ return self._store.get_EARs(id_lst)
979
947
 
980
- @property
981
- def task_artifacts_path(self):
982
- return self.artifacts_path / "tasks"
948
+ def get_store_element_iterations(
949
+ self, id_lst: Iterable[int]
950
+ ) -> List[AnySElementIter]:
951
+ return self._store.get_element_iterations(id_lst)
983
952
 
984
- def elements(self) -> Iterator[app.Element]:
985
- for task in self.tasks:
986
- for element in task.elements:
987
- yield element
953
+ def get_store_elements(self, id_lst: Iterable[int]) -> List[AnySElement]:
954
+ return self._store.get_elements(id_lst)
988
955
 
989
- def copy(self, path=None) -> app.Workflow:
990
- """Copy the workflow to a new path and return the copied workflow."""
991
- if path is None:
992
- path = self.path.parent / Path(self.path.stem + "_copy" + self.path.suffix)
993
- if path.exists():
994
- raise ValueError(f"Path already exists: {path}.")
995
- self._store.copy(path=path)
996
- return self.app.Workflow(path=path)
956
+ def get_store_tasks(self, id_lst: Iterable[int]) -> List[AnySTask]:
957
+ return self._store.get_tasks_by_IDs(id_lst)
997
958
 
998
- def delete(self):
999
- self._store.delete()
959
+ def get_element_iteration_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> List[int]:
960
+ return [i.elem_iter_ID for i in self.get_store_EARs(id_lst)]
1000
961
 
1001
- def _delete_no_confirm(self):
1002
- self._store.delete_no_confirm()
962
+ def get_element_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> List[int]:
963
+ iter_IDs = self.get_element_iteration_IDs_from_EAR_IDs(id_lst)
964
+ return [i.element_ID for i in self.get_store_element_iterations(iter_IDs)]
1003
965
 
1004
- def rename(self, new_name: str):
1005
- raise NotImplementedError
966
+ def get_task_IDs_from_element_IDs(self, id_lst: Iterable[int]) -> List[int]:
967
+ return [i.task_ID for i in self.get_store_elements(id_lst)]
1006
968
 
1007
- def _submit(
1008
- self,
1009
- ignore_errors: Optional[bool] = False,
1010
- JS_parallelism: Optional[bool] = None,
1011
- print_stdout: Optional[bool] = False,
1012
- ) -> Tuple[List[Exception], Dict[int, int]]:
1013
- """Submit outstanding EARs for execution."""
969
+ def get_EAR_IDs_of_tasks(self, id_lst: int) -> List[int]:
970
+ """Get EAR IDs belonging to multiple tasks"""
971
+ return [i.id_ for i in self.get_EARs_of_tasks(id_lst)]
1014
972
 
1015
- # generate a new submission if there are no pending submissions:
1016
- pending = [i for i in self.submissions if i.needs_submit]
1017
- if not pending:
1018
- new_sub = self.add_submission(JS_parallelism=JS_parallelism)
1019
- if not new_sub:
1020
- raise ValueError("No pending element action runs to submit!")
1021
- pending = [new_sub]
973
+ def get_EARs_of_tasks(self, id_lst: Iterable[int]) -> List[app.ElementActionRun]:
974
+ """Get EARs belonging to multiple tasks"""
975
+ EARs = []
976
+ for i in id_lst:
977
+ task = self.tasks.get(insert_ID=i)
978
+ for elem in task.elements[:]:
979
+ for iter_ in elem.iterations:
980
+ for run in iter_.action_runs:
981
+ EARs.append(run)
982
+ return EARs
1022
983
 
1023
- self.submissions_path.mkdir(exist_ok=True, parents=True)
1024
- self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
984
+ def get_element_iterations_of_tasks(
985
+ self, id_lst: Iterable[int]
986
+ ) -> List[app.ElementIteration]:
987
+ """Get element iterations belonging to multiple tasks"""
988
+ iters = []
989
+ for i in id_lst:
990
+ task = self.tasks.get(insert_ID=i)
991
+ for elem in task.elements[:]:
992
+ for iter_i in elem.iterations:
993
+ iters.append(iter_i)
994
+ return iters
995
+
996
+ def get_elements_from_IDs(self, id_lst: Iterable[int]) -> List[app.Element]:
997
+ """Return element objects from a list of IDs."""
998
+
999
+ store_elems = self._store.get_elements(id_lst)
1000
+
1001
+ task_IDs = [i.task_ID for i in store_elems]
1002
+ store_tasks = self._store.get_tasks_by_IDs(task_IDs)
1003
+
1004
+ index_paths = []
1005
+ for el, tk in zip(store_elems, store_tasks):
1006
+ elem_idx = tk.element_IDs.index(el.id_)
1007
+ index_paths.append(
1008
+ {
1009
+ "elem_idx": elem_idx,
1010
+ "task_idx": tk.index,
1011
+ }
1012
+ )
1025
1013
 
1026
- # for direct execution the submission must be persistent at submit-time, because
1027
- # it will be read by a new instance of the app:
1028
- self._store.commit_pending()
1014
+ objs = []
1015
+ for idx_dat in index_paths:
1016
+ task = self.tasks[idx_dat["task_idx"]]
1017
+ elem = task.elements[idx_dat["elem_idx"]]
1018
+ objs.append(elem)
1029
1019
 
1030
- # submit all pending submissions:
1031
- exceptions = []
1032
- submitted_js = {}
1033
- for sub in pending:
1020
+ return objs
1021
+
1022
+ def get_element_iterations_from_IDs(
1023
+ self, id_lst: Iterable[int]
1024
+ ) -> List[app.ElementIteration]:
1025
+ """Return element iteration objects from a list of IDs."""
1026
+
1027
+ store_iters = self._store.get_element_iterations(id_lst)
1028
+
1029
+ elem_IDs = [i.element_ID for i in store_iters]
1030
+ store_elems = self._store.get_elements(elem_IDs)
1031
+
1032
+ task_IDs = [i.task_ID for i in store_elems]
1033
+ store_tasks = self._store.get_tasks_by_IDs(task_IDs)
1034
+
1035
+ index_paths = []
1036
+ for it, el, tk in zip(store_iters, store_elems, store_tasks):
1037
+ iter_idx = el.iteration_IDs.index(it.id_)
1038
+ elem_idx = tk.element_IDs.index(el.id_)
1039
+ index_paths.append(
1040
+ {
1041
+ "iter_idx": iter_idx,
1042
+ "elem_idx": elem_idx,
1043
+ "task_idx": tk.index,
1044
+ }
1045
+ )
1046
+
1047
+ objs = []
1048
+ for idx_dat in index_paths:
1049
+ task = self.tasks[idx_dat["task_idx"]]
1050
+ elem = task.elements[idx_dat["elem_idx"]]
1051
+ iter_ = elem.iterations[idx_dat["iter_idx"]]
1052
+ objs.append(iter_)
1053
+
1054
+ return objs
1055
+
1056
+ def get_EARs_from_IDs(self, id_lst: Iterable[int]) -> List[app.ElementActionRun]:
1057
+ """Return element action run objects from a list of IDs."""
1058
+
1059
+ store_EARs = self._store.get_EARs(id_lst)
1060
+
1061
+ elem_iter_IDs = [i.elem_iter_ID for i in store_EARs]
1062
+ store_iters = self._store.get_element_iterations(elem_iter_IDs)
1063
+
1064
+ elem_IDs = [i.element_ID for i in store_iters]
1065
+ store_elems = self._store.get_elements(elem_IDs)
1066
+
1067
+ task_IDs = [i.task_ID for i in store_elems]
1068
+ store_tasks = self._store.get_tasks_by_IDs(task_IDs)
1069
+
1070
+ index_paths = []
1071
+ for rn, it, el, tk in zip(store_EARs, store_iters, store_elems, store_tasks):
1072
+ act_idx = rn.action_idx
1073
+ run_idx = it.EAR_IDs[act_idx].index(rn.id_)
1074
+ iter_idx = el.iteration_IDs.index(it.id_)
1075
+ elem_idx = tk.element_IDs.index(el.id_)
1076
+ index_paths.append(
1077
+ {
1078
+ "run_idx": run_idx,
1079
+ "action_idx": act_idx,
1080
+ "iter_idx": iter_idx,
1081
+ "elem_idx": elem_idx,
1082
+ "task_idx": tk.index,
1083
+ }
1084
+ )
1085
+
1086
+ objs = []
1087
+ for idx_dat in index_paths:
1088
+ task = self.tasks[idx_dat["task_idx"]]
1089
+ elem = task.elements[idx_dat["elem_idx"]]
1090
+ iter_ = elem.iterations[idx_dat["iter_idx"]]
1091
+ run = iter_.actions[idx_dat["action_idx"]].runs[idx_dat["run_idx"]]
1092
+ objs.append(run)
1093
+
1094
+ return objs
1095
+
1096
+ def get_all_elements(self) -> List[app.Element]:
1097
+ return self.get_elements_from_IDs(range(self.num_elements))
1098
+
1099
+ def get_all_element_iterations(self) -> List[app.ElementIteration]:
1100
+ return self.get_element_iterations_from_IDs(range(self.num_element_iterations))
1101
+
1102
+ def get_all_EARs(self) -> List[app.ElementActionRun]:
1103
+ return self.get_EARs_from_IDs(range(self.num_EARs))
1104
+
1105
+ @contextmanager
1106
+ def batch_update(self, is_workflow_creation: bool = False) -> Iterator[None]:
1107
+ """A context manager that batches up structural changes to the workflow and
1108
+ commits them to disk all together when the context manager exits."""
1109
+
1110
+ if self._in_batch_mode:
1111
+ yield
1112
+ else:
1034
1113
  try:
1035
- sub_js_idx = sub.submit(
1036
- self.task_artifacts_path,
1037
- ignore_errors=ignore_errors,
1038
- print_stdout=print_stdout,
1114
+ self.app.persistence_logger.info(
1115
+ f"entering batch update (is_workflow_creation={is_workflow_creation!r})"
1039
1116
  )
1040
- submitted_js[sub.index] = sub_js_idx
1041
- except SubmissionFailure as exc:
1042
- exceptions.append(exc)
1117
+ self._in_batch_mode = True
1118
+ yield
1043
1119
 
1044
- return exceptions, submitted_js
1120
+ except Exception as err:
1121
+ self.app.persistence_logger.error("batch update exception!")
1122
+ self._in_batch_mode = False
1123
+ self._store._pending.reset()
1045
1124
 
1046
- def submit(
1047
- self,
1048
- ignore_errors: Optional[bool] = False,
1049
- JS_parallelism: Optional[bool] = None,
1050
- print_stdout: Optional[bool] = False,
1051
- ) -> Dict[int, int]:
1052
- with self._store.cached_load():
1053
- with self.batch_update():
1054
- # commit updates before raising exception:
1055
- exceptions, submitted_js = self._submit(
1056
- ignore_errors=ignore_errors,
1057
- JS_parallelism=JS_parallelism,
1058
- print_stdout=print_stdout,
1125
+ for task in self.tasks:
1126
+ task._reset_pending_element_IDs()
1127
+ task.template._reset_pending_element_sets()
1128
+
1129
+ for loop in self.loops:
1130
+ loop._reset_pending_num_added_iters()
1131
+
1132
+ self._reject_pending()
1133
+
1134
+ if is_workflow_creation:
1135
+ # creation failed, so no need to keep the newly generated workflow:
1136
+ self._store.delete_no_confirm()
1137
+ self._store.reinstate_replaced_dir()
1138
+
1139
+ raise err
1140
+
1141
+ else:
1142
+ if self._store._pending:
1143
+ # is_diff = self._store.is_modified_on_disk()
1144
+ # if is_diff:
1145
+ # raise WorkflowBatchUpdateFailedError(
1146
+ # f"Workflow modified on disk since it was loaded!"
1147
+ # )
1148
+
1149
+ for task in self.tasks:
1150
+ task._accept_pending_element_IDs()
1151
+ task.template._accept_pending_element_sets()
1152
+
1153
+ for loop in self.loops:
1154
+ loop._accept_pending_num_added_iters()
1155
+
1156
+ if is_workflow_creation:
1157
+ self._store.remove_replaced_dir()
1158
+
1159
+ # TODO: handle errors in commit pending?
1160
+ self._store._pending.commit_all()
1161
+
1162
+ self._accept_pending()
1163
+ self.app.persistence_logger.info("exiting batch update")
1164
+ self._in_batch_mode = False
1165
+
1166
+ @classmethod
1167
+ def temporary_rename(cls, path: str, fs) -> List[str]:
1168
+ """Rename an existing same-path workflow (directory) so we can restore it if
1169
+ workflow creation fails.
1170
+
1171
+ Renaming will occur until the successfully completed. This means multiple new
1172
+ paths may be created, where only the final path should be considered the
1173
+ successfully renamed workflow. Other paths will be deleted."""
1174
+
1175
+ all_replaced = []
1176
+
1177
+ @cls.app.perm_error_retry()
1178
+ def _temp_rename(path: str, fs) -> str:
1179
+ temp_ext = "".join(random.choices(string.ascii_letters, k=10))
1180
+ replaced = str(Path(f"{path}.{temp_ext}").as_posix())
1181
+ cls.app.persistence_logger.debug(
1182
+ f"temporary_rename: _temp_rename: {path!r} --> {replaced!r}."
1183
+ )
1184
+ all_replaced.append(replaced)
1185
+ try:
1186
+ fs.rename(path, replaced, recursive=True)
1187
+ except TypeError:
1188
+ # `SFTPFileSystem.rename` has no `recursive` argument:
1189
+ fs.rename(path, replaced)
1190
+ return replaced
1191
+
1192
+ @cls.app.perm_error_retry()
1193
+ def _remove_path(path: str, fs) -> None:
1194
+ cls.app.persistence_logger.debug(f"temporary_rename: _remove_path: {path!r}.")
1195
+ while fs.exists(path):
1196
+ fs.rm(path, recursive=True)
1197
+ time.sleep(0.5)
1198
+
1199
+ _temp_rename(path, fs)
1200
+
1201
+ for i in all_replaced[:-1]:
1202
+ _remove_path(i, fs)
1203
+
1204
+ return all_replaced[-1]
1205
+
1206
+ @classmethod
1207
+ def _write_empty_workflow(
1208
+ cls,
1209
+ template: app.WorkflowTemplate,
1210
+ path: Optional[PathLike] = None,
1211
+ name: Optional[str] = None,
1212
+ overwrite: Optional[bool] = False,
1213
+ store: Optional[str] = DEFAULT_STORE_FORMAT,
1214
+ ts_fmt: Optional[str] = None,
1215
+ ts_name_fmt: Optional[str] = None,
1216
+ fs_kwargs: Optional[Dict] = None,
1217
+ ) -> app.Workflow:
1218
+ """
1219
+ Parameters
1220
+ ----------
1221
+ path
1222
+ The directory in which the workflow will be generated. The current directory
1223
+ if not specified.
1224
+
1225
+ """
1226
+ ts = datetime.now()
1227
+
1228
+ # store all times in UTC, since NumPy doesn't support time zone info:
1229
+ ts_utc = ts.astimezone(tz=timezone.utc)
1230
+
1231
+ ts_name_fmt = ts_name_fmt or cls._default_ts_name_fmt
1232
+ ts_fmt = ts_fmt or cls._default_ts_fmt
1233
+
1234
+ name = name or f"{template.name}_{ts.strftime(ts_name_fmt)}"
1235
+
1236
+ fs_path = f"{path or '.'}/{name}"
1237
+ fs_kwargs = fs_kwargs or {}
1238
+ fs, path, pw = resolve_fsspec(path or "", **fs_kwargs)
1239
+ wk_path = f"{path}/{name}"
1240
+
1241
+ replaced_wk = None
1242
+ if fs.exists(wk_path):
1243
+ cls.app.logger.debug("workflow path exists")
1244
+ if overwrite:
1245
+ cls.app.logger.debug("renaming existing workflow path")
1246
+ replaced_wk = cls.temporary_rename(wk_path, fs)
1247
+ else:
1248
+ raise ValueError(
1249
+ f"Path already exists: {wk_path} on file system " f"{fs!r}."
1059
1250
  )
1060
1251
 
1061
- if exceptions:
1062
- msg = "\n" + "\n\n".join([i.message for i in exceptions])
1063
- raise WorkflowSubmissionFailure(msg)
1252
+ # make template-level inputs/resources think they are persistent:
1253
+ wk_dummy = _DummyPersistentWorkflow()
1254
+ param_src = {"type": "workflow_resources"}
1255
+ for res_i in template.resources:
1256
+ res_i.make_persistent(wk_dummy, param_src)
1064
1257
 
1065
- return submitted_js
1258
+ template_js, template_sh = template.to_json_like(exclude=["tasks", "loops"])
1259
+ template_js["tasks"] = []
1260
+ template_js["loops"] = []
1066
1261
 
1067
- def add_submission(self, JS_parallelism: Optional[bool] = None) -> app.Submission:
1068
- new_idx = self.num_submissions
1069
- sub_obj = self.app.Submission(
1070
- index=new_idx,
1071
- workflow=self,
1072
- jobscripts=self.resolve_jobscripts(),
1073
- JS_parallelism=JS_parallelism,
1262
+ creation_info = {
1263
+ "app_info": cls.app.get_info(),
1264
+ "create_time": ts_utc.strftime(ts_fmt),
1265
+ "ts_fmt": ts_fmt,
1266
+ "ts_name_fmt": ts_name_fmt,
1267
+ }
1268
+
1269
+ store_cls = store_cls_from_str_NEW(store)
1270
+ store_cls.write_empty_workflow(
1271
+ app=cls.app,
1272
+ template_js=template_js,
1273
+ template_components_js=template_sh,
1274
+ wk_path=wk_path,
1275
+ fs=fs,
1276
+ fs_path=fs_path,
1277
+ replaced_wk=replaced_wk,
1278
+ creation_info=creation_info,
1074
1279
  )
1075
- EAR_indices = sub_obj.prepare_EAR_submission_idx_update()
1076
- if not EAR_indices:
1077
- print(
1078
- f"There are no pending element action runs, so a new submission was not "
1079
- f"added."
1080
- )
1081
- return
1082
1280
 
1083
- self.set_EAR_submission_indices(sub_idx=new_idx, EAR_indices=EAR_indices)
1084
- sub_obj_js, _ = sub_obj.to_json_like()
1085
- self._submissions.append(sub_obj)
1086
- self._pending["submissions"].append(new_idx)
1087
- with self._store.cached_load():
1088
- with self.batch_update():
1089
- self._store.add_submission(sub_obj_js)
1281
+ fs_kwargs = {"password": pw, **fs_kwargs}
1282
+ wk = cls(fs_path, store_fmt=store, fs_kwargs=fs_kwargs)
1090
1283
 
1091
- return self.submissions[new_idx]
1284
+ # actually make template inputs/resources persistent, now the workflow exists:
1285
+ wk_dummy.make_persistent(wk)
1286
+
1287
+ return wk
1288
+
1289
+ def to_zip(self) -> str:
1290
+ return self._store.to_zip()
1291
+
1292
+ def copy(self, path=None) -> str:
1293
+ """Copy the workflow to a new path and return the copied workflow path."""
1294
+ return self._store.copy(path)
1295
+
1296
+ def delete(self):
1297
+ self._store.delete()
1298
+
1299
+ def _delete_no_confirm(self):
1300
+ self._store.delete_no_confirm()
1301
+
1302
+ def get_parameters(
1303
+ self, id_lst: Iterable[int], **kwargs: Dict
1304
+ ) -> List[AnySParameter]:
1305
+ return self._store.get_parameters(id_lst, **kwargs)
1306
+
1307
+ def get_parameter_sources(self, id_lst: Iterable[int]) -> List[Dict]:
1308
+ return self._store.get_parameter_sources(id_lst)
1309
+
1310
+ def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> List[bool]:
1311
+ return self._store.get_parameter_set_statuses(id_lst)
1312
+
1313
+ def get_parameter(self, index: int, **kwargs: Dict) -> AnySParameter:
1314
+ return self.get_parameters([index], **kwargs)[0]
1315
+
1316
+ def get_parameter_data(self, index: int, **kwargs: Dict) -> Any:
1317
+ return (
1318
+ self.get_parameter(index, **kwargs).data
1319
+ or self.get_parameter(index, **kwargs).file
1320
+ )
1321
+
1322
+ def get_parameter_source(self, index: int) -> Dict:
1323
+ return self.get_parameter_sources([index])[0]
1324
+
1325
+ def is_parameter_set(self, index: int) -> bool:
1326
+ return self.get_parameter_set_statuses([index])[0]
1327
+
1328
+ def get_all_parameters(self, **kwargs: Dict) -> List[AnySParameter]:
1329
+ """Retrieve all store parameters."""
1330
+ num_params = self._store._get_num_total_parameters()
1331
+ id_lst = list(range(num_params))
1332
+ return self._store.get_parameters(id_lst, **kwargs)
1333
+
1334
+ def get_all_parameter_data(self, **kwargs: Dict) -> Dict[int, Any]:
1335
+ """Retrieve all workflow parameter data."""
1336
+ params = self.get_all_parameters(**kwargs)
1337
+ return {i.id_: (i.data or i.file) for i in params}
1338
+
1339
+ def check_parameters_exist(
1340
+ self, id_lst: Union[int, List[int]]
1341
+ ) -> Union[bool, List[bool]]:
1342
+ is_multi = True
1343
+ if isinstance(id_lst, int):
1344
+ is_multi = False
1345
+ id_lst = [id_lst]
1346
+ exists = self._store.check_parameters_exist(id_lst)
1347
+ if not is_multi:
1348
+ exists = exists[0]
1349
+ return exists
1350
+
1351
+ def _add_unset_parameter_data(self, source: Dict) -> int:
1352
+ # TODO: use this for unset files as well
1353
+ return self._store.add_unset_parameter(source)
1354
+
1355
+ def _add_parameter_data(self, data, source: Dict) -> int:
1356
+ return self._store.add_set_parameter(data, source)
1357
+
1358
+ def _add_file(
1359
+ self,
1360
+ store_contents: bool,
1361
+ is_input: bool,
1362
+ source: Dict,
1363
+ path=None,
1364
+ contents=None,
1365
+ filename: str = None,
1366
+ ) -> int:
1367
+ return self._store.add_file(
1368
+ store_contents=store_contents,
1369
+ is_input=is_input,
1370
+ source=source,
1371
+ path=path,
1372
+ contents=contents,
1373
+ filename=filename,
1374
+ )
1375
+
1376
+ def _set_file(
1377
+ self,
1378
+ param_id: int,
1379
+ store_contents: bool,
1380
+ is_input: bool,
1381
+ path=None,
1382
+ contents=None,
1383
+ filename: str = None,
1384
+ ) -> int:
1385
+ self._store.set_file(
1386
+ param_id=param_id,
1387
+ store_contents=store_contents,
1388
+ is_input=is_input,
1389
+ path=path,
1390
+ contents=contents,
1391
+ filename=filename,
1392
+ )
1092
1393
 
1093
1394
  def get_task_unique_names(
1094
1395
  self, map_to_insert_ID: bool = False
@@ -1116,358 +1417,313 @@ class Workflow:
1116
1417
 
1117
1418
  return uniq_names[new_index]
1118
1419
 
1119
- def _add_empty_task(
1120
- self,
1121
- task: app.Task,
1122
- new_index: Optional[int] = None,
1123
- ) -> app.WorkflowTask:
1124
- if new_index is None:
1125
- new_index = self.num_tasks
1420
+ def _get_empty_pending(self) -> Dict:
1421
+ return {
1422
+ "template_components": {k: [] for k in TEMPLATE_COMP_TYPES},
1423
+ "tasks": [], # list of int
1424
+ "loops": [], # list of int
1425
+ "submissions": [], # list of int
1426
+ }
1126
1427
 
1127
- insert_ID = self.num_added_tasks
1428
+ def _accept_pending(self) -> None:
1429
+ self._reset_pending()
1128
1430
 
1129
- # make a copy with persistent schema inputs:
1130
- task_c, _ = task.to_persistent(self, insert_ID)
1431
+ def _reset_pending(self) -> None:
1432
+ self._pending = self._get_empty_pending()
1131
1433
 
1132
- # add to the WorkflowTemplate:
1133
- self.template._add_empty_task(task_c, new_index, insert_ID)
1434
+ def _reject_pending(self) -> None:
1435
+ """Revert pending changes to the in-memory representation of the workflow.
1134
1436
 
1135
- # create and insert a new WorkflowTask:
1136
- self.tasks.add_object(
1137
- self.app.WorkflowTask.new_empty_task(self, task_c, new_index),
1138
- index=new_index,
1139
- )
1437
+ This deletes new tasks, new template component data, new loops, and new
1438
+ submissions. Element additions to existing (non-pending) tasks are separately
1439
+ rejected/accepted by the WorkflowTask object.
1140
1440
 
1141
- # update persistent store:
1142
- task_js, temp_comps_js = task_c.to_json_like()
1143
- self._store.add_template_components(temp_comps_js)
1144
- self._store.add_empty_task(new_index, task_js)
1441
+ """
1442
+ for task_idx in self._pending["tasks"][::-1]:
1443
+ # iterate in reverse so the index references are correct
1444
+ self.tasks._remove_object(task_idx)
1445
+ self.template.tasks.pop(task_idx)
1145
1446
 
1146
- # update in-memory workflow template components:
1147
- temp_comps = self.app.template_components_from_json_like(temp_comps_js)
1148
- for comp_type, comps in temp_comps.items():
1149
- for comp in comps:
1150
- comp._set_hash()
1151
- if comp not in self.template_components[comp_type]:
1152
- idx = self.template_components[comp_type].add_object(comp)
1153
- self._pending["template_components"][comp_type].append(idx)
1447
+ for comp_type, comp_indices in self._pending["template_components"].items():
1448
+ for comp_idx in comp_indices[::-1]:
1449
+ # iterate in reverse so the index references are correct
1450
+ self.template_components[comp_type]._remove_object(comp_idx)
1154
1451
 
1155
- self._pending["tasks"].append(new_index)
1452
+ for loop_idx in self._pending["loops"][::-1]:
1453
+ # iterate in reverse so the index references are correct
1454
+ self.loops._remove_object(loop_idx)
1455
+ self.template.loops.pop(loop_idx)
1156
1456
 
1157
- return self.tasks[new_index]
1457
+ for sub_idx in self._pending["submissions"][::-1]:
1458
+ # iterate in reverse so the index references are correct
1459
+ self._submissions.pop(sub_idx)
1158
1460
 
1159
- def _add_empty_loop(self, loop: app.Loop) -> app.WorkflowLoop:
1160
- """Add a new loop (zeroth iterations only) to the workflow."""
1461
+ self._reset_pending()
1161
1462
 
1162
- new_index = self.num_loops
1463
+ @property
1464
+ def num_tasks(self):
1465
+ return self._store._get_num_total_tasks()
1163
1466
 
1164
- # don't modify passed object:
1165
- loop_c = copy.deepcopy(loop)
1467
+ @property
1468
+ def num_submissions(self):
1469
+ return self._store._get_num_total_submissions()
1166
1470
 
1167
- # add to the WorkflowTemplate:
1168
- self.template._add_empty_loop(loop_c)
1471
+ @property
1472
+ def num_elements(self):
1473
+ return self._store._get_num_total_elements()
1169
1474
 
1170
- # create and insert a new WorkflowLoop:
1171
- self.loops.add_object(
1172
- self.app.WorkflowLoop.new_empty_loop(
1173
- index=new_index,
1174
- workflow=self,
1175
- template=loop_c,
1176
- )
1177
- )
1178
- wk_loop = self.loops[new_index]
1475
+ @property
1476
+ def num_element_iterations(self):
1477
+ return self._store._get_num_total_elem_iters()
1179
1478
 
1180
- # update persistent store:
1181
- loop_js, _ = loop_c.to_json_like()
1182
- task_indices = [self.tasks.get(insert_ID=i).index for i in loop_c.task_insert_IDs]
1183
- self._store.add_loop(
1184
- task_indices=task_indices,
1185
- loop_js=loop_js,
1186
- iterable_parameters=wk_loop.iterable_parameters,
1187
- )
1479
+ @property
1480
+ def num_EARs(self):
1481
+ return self._store._get_num_total_EARs()
1188
1482
 
1189
- self._pending["loops"].append(new_index)
1483
+ @property
1484
+ def num_loops(self) -> int:
1485
+ return self._store._get_num_total_loops()
1190
1486
 
1191
- return wk_loop
1487
+ @property
1488
+ def artifacts_path(self):
1489
+ # TODO: allow customisation of artifacts path at submission and resources level
1490
+ return Path(self.path) / "artifacts"
1192
1491
 
1193
- def _add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
1194
- new_wk_loop = self._add_empty_loop(loop)
1195
- if loop.num_iterations is not None:
1196
- # fixed number of iterations, so add remaining N > 0 iterations:
1197
- for _ in range(loop.num_iterations - 1):
1198
- new_wk_loop.add_iteration(parent_loop_indices=parent_loop_indices)
1492
+ @property
1493
+ def input_files_path(self):
1494
+ return self.artifacts_path / self._input_files_dir_name
1199
1495
 
1200
- def add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
1201
- """Add a loop to a subset of workflow tasks."""
1496
+ @property
1497
+ def submissions_path(self):
1498
+ return self.artifacts_path / "submissions"
1499
+
1500
+ @property
1501
+ def task_artifacts_path(self):
1502
+ return self.artifacts_path / "tasks"
1503
+
1504
+ @property
1505
+ def execution_path(self):
1506
+ return Path(self.path) / self._exec_dir_name
1507
+
1508
+ def get_task_elements(self, task: app.Task, selection: slice) -> List[app.Element]:
1509
+ return [
1510
+ self.app.Element(task=task, **{k: v for k, v in i.items() if k != "task_ID"})
1511
+ for i in self._store.get_task_elements(task.insert_ID, selection)
1512
+ ]
1513
+
1514
+ def set_EAR_submission_index(self, EAR_ID: int, sub_idx: int) -> None:
1515
+ """Set the submission index of an EAR."""
1202
1516
  with self._store.cached_load():
1203
1517
  with self.batch_update():
1204
- self._add_loop(loop, parent_loop_indices)
1205
-
1206
- def _add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
1207
- new_wk_task = self._add_empty_task(task=task, new_index=new_index)
1208
- new_wk_task._add_elements(element_sets=task.element_sets)
1518
+ self._store.set_EAR_submission_index(EAR_ID, sub_idx)
1209
1519
 
1210
- def add_task(self, task: app.Task, new_index: Optional[int] = None) -> None:
1520
+ def set_EAR_start(self, EAR_ID: int) -> None:
1521
+ """Set the start time on an EAR."""
1522
+ self.app.logger.debug(f"Setting start for EAR ID {EAR_ID!r}")
1211
1523
  with self._store.cached_load():
1212
1524
  with self.batch_update():
1213
- self._add_task(task, new_index=new_index)
1525
+ self._store.set_EAR_start(EAR_ID)
1214
1526
 
1215
- def add_task_after(self, new_task: app.Task, task_ref: app.Task = None) -> None:
1216
- """Add a new task after the specified task.
1527
+ def set_EAR_end(self, EAR_ID: int, exit_code: int) -> None:
1528
+ """Set the end time and exit code on an EAR.
1217
1529
 
1218
- Parameters
1219
- ----------
1220
- task_ref
1221
- If not given, the new task will be added at the end of the workflow.
1530
+ If the exit code is non-zero, also set all downstream dependent EARs to be
1531
+ skipped. Also save any generated input/output files.
1222
1532
 
1223
1533
  """
1224
- new_index = task_ref.index + 1 if task_ref else None
1225
- self.add_task(new_task, new_index)
1226
- # TODO: add new downstream elements?
1227
-
1228
- def add_task_before(self, new_task: app.Task, task_ref: app.Task = None) -> None:
1229
- """Add a new task before the specified task.
1534
+ self.app.logger.debug(
1535
+ f"Setting end for EAR ID {EAR_ID!r} with exit code {exit_code!r}."
1536
+ )
1537
+ with self._store.cached_load():
1538
+ EAR = self.get_EARs_from_IDs([EAR_ID])[0]
1539
+ with self.batch_update():
1540
+ success = exit_code == 0 # TODO more sophisticated success heuristics
1541
+ if EAR.action.abortable and exit_code == ABORT_EXIT_CODE:
1542
+ # the point of aborting an EAR is to continue with the workflow:
1543
+ success = True
1544
+ self._store.set_EAR_end(EAR_ID, exit_code, success)
1545
+
1546
+ for IFG_i in EAR.action.input_file_generators:
1547
+ inp_file = IFG_i.input_file
1548
+ self.app.logger.debug(
1549
+ f"Saving EAR input file: {inp_file.label} for EAR ID {EAR_ID!r}."
1550
+ )
1551
+ param_id = EAR.data_idx[f"input_files.{inp_file.label}"]
1552
+ self._set_file(
1553
+ param_id=param_id,
1554
+ store_contents=True, # TODO: make optional according to IFG
1555
+ is_input=False,
1556
+ path=Path(inp_file.value()).resolve(),
1557
+ )
1230
1558
 
1231
- Parameters
1232
- ----------
1233
- task_ref
1234
- If not given, the new task will be added at the beginning of the workflow.
1559
+ for OFP_i in EAR.action.output_file_parsers:
1560
+ for out_file_j in OFP_i.output_files:
1561
+ if out_file_j.label not in OFP_i.save_files:
1562
+ continue
1563
+ self.app.logger.debug(
1564
+ f"Saving EAR output file: {out_file_j.label} for EAR ID "
1565
+ f"{EAR_ID!r}."
1566
+ )
1567
+ param_id = EAR.data_idx[f"output_files.{out_file_j.label}"]
1568
+ self._set_file(
1569
+ param_id=param_id,
1570
+ store_contents=True, # TODO: make optional according to OFP
1571
+ is_input=False,
1572
+ path=Path(out_file_j.value()).resolve(),
1573
+ )
1235
1574
 
1236
- """
1237
- new_index = task_ref.index if task_ref else 0
1238
- self.add_task(new_task, new_index)
1239
- # TODO: add new downstream elements?
1575
+ if exit_code != 0:
1576
+ for EAR_dep_ID in EAR.get_dependent_EARs(as_objects=False):
1577
+ # TODO: this needs to be recursive?
1578
+ self.app.logger.debug(
1579
+ f"Setting EAR ID {EAR_dep_ID!r} to skip because it depends on"
1580
+ f" EAR ID {EAR_ID!r}, which exited with a non-zero exit code:"
1581
+ f" {exit_code!r}."
1582
+ )
1583
+ self._store.set_EAR_skip(EAR_dep_ID)
1240
1584
 
1241
- def get_parameter_data(self, index: int) -> Tuple[bool, Any]:
1242
- return self._store.get_parameter_data(index)
1585
+ def set_EAR_skip(self, EAR_ID: int) -> None:
1586
+ """Record that an EAR is to be skipped due to an upstream failure."""
1587
+ with self._store.cached_load():
1588
+ with self.batch_update():
1589
+ self._store.set_EAR_skip(EAR_ID)
1243
1590
 
1244
- def get_parameter_source(self, index: int) -> Dict:
1245
- return self._store.get_parameter_source(index)
1591
+ def get_EAR_skipped(self, EAR_ID: int) -> None:
1592
+ """Check if an EAR is to be skipped."""
1593
+ with self._store.cached_load():
1594
+ return self._store.get_EAR_skipped(EAR_ID)
1246
1595
 
1247
- def get_all_parameter_data(self) -> Dict[int, Any]:
1248
- return self._store.get_all_parameter_data()
1596
+ def set_parameter_value(self, param_id: int, value: Any) -> None:
1597
+ with self._store.cached_load():
1598
+ with self.batch_update():
1599
+ self._store.set_parameter_value(param_id, value)
1249
1600
 
1250
- def is_parameter_set(self, index: int) -> bool:
1251
- return self._store.is_parameter_set(index)
1601
+ def elements(self) -> Iterator[app.Element]:
1602
+ for task in self.tasks:
1603
+ for element in task.elements[:]:
1604
+ yield element
1252
1605
 
1253
- def check_parameters_exist(
1254
- self, indices: Union[int, List[int]]
1255
- ) -> Union[bool, List[bool]]:
1256
- return self._store.check_parameters_exist(indices)
1606
+ def get_iteration_task_pathway(self):
1607
+ pathway = []
1608
+ for task in self.tasks:
1609
+ loop_idx = {}
1610
+ pathway.append((task.insert_ID, loop_idx))
1257
1611
 
1258
- def _add_unset_parameter_data(self, source: Dict) -> int:
1259
- return self._store.add_unset_parameter_data(source)
1612
+ for loop in self.loops: # TODO: order by depth (inner loops first?)
1613
+ task_subset = loop.task_insert_IDs
1614
+ subset_idx = [idx for idx, i in enumerate(pathway) if i[0] in task_subset]
1615
+ looped_pathway = []
1616
+ for iter_i in range(loop.num_added_iterations):
1617
+ for j in subset_idx:
1618
+ item_j = copy.deepcopy(pathway[j])
1619
+ item_j[1][loop.name] = iter_i
1620
+ looped_pathway.append(item_j)
1260
1621
 
1261
- def _add_parameter_data(self, data, source: Dict) -> int:
1262
- return self._store.add_parameter_data(data, source)
1622
+ # replaced pathway `sub_idx` items with `looped_pathway` items:
1623
+ pathway = replace_items(
1624
+ pathway, subset_idx[0], subset_idx[-1] + 1, looped_pathway
1625
+ )
1263
1626
 
1264
- def _resolve_input_source_task_reference(
1265
- self, input_source: app.InputSource, new_task_name: str
1266
- ) -> None:
1267
- """Normalise the input source task reference and convert a source to a local type
1268
- if required."""
1627
+ return pathway
1269
1628
 
1270
- # TODO: test thoroughly!
1629
+ def _submit(
1630
+ self,
1631
+ ignore_errors: Optional[bool] = False,
1632
+ JS_parallelism: Optional[bool] = None,
1633
+ print_stdout: Optional[bool] = False,
1634
+ ) -> Tuple[List[Exception], Dict[int, int]]:
1635
+ """Submit outstanding EARs for execution."""
1271
1636
 
1272
- if isinstance(input_source.task_ref, str):
1273
- if input_source.task_ref == new_task_name:
1274
- if input_source.task_source_type is self.app.TaskSourceType.OUTPUT:
1275
- raise InvalidInputSourceTaskReference(
1276
- f"Input source {input_source.to_string()!r} cannot refer to the "
1277
- f"outputs of its own task!"
1278
- )
1279
- else:
1280
- warn(
1281
- f"Changing input source {input_source.to_string()!r} to a local "
1282
- f"type, since the input source task reference refers to its own "
1283
- f"task."
1284
- )
1285
- # TODO: add an InputSource source_type setter to reset
1286
- # task_ref/source_type?
1287
- input_source.source_type = self.app.InputSourceType.LOCAL
1288
- input_source.task_ref = None
1289
- input_source.task_source_type = None
1290
- else:
1291
- try:
1292
- uniq_names_cur = self.get_task_unique_names(map_to_insert_ID=True)
1293
- input_source.task_ref = uniq_names_cur[input_source.task_ref]
1294
- except KeyError:
1295
- raise InvalidInputSourceTaskReference(
1296
- f"Input source {input_source.to_string()!r} refers to a missing "
1297
- f"or inaccessible task: {input_source.task_ref!r}."
1298
- )
1637
+ # generate a new submission if there are no pending submissions:
1638
+ pending = [i for i in self.submissions if i.needs_submit]
1639
+ if not pending:
1640
+ new_sub = self._add_submission(JS_parallelism=JS_parallelism)
1641
+ if not new_sub:
1642
+ raise ValueError("No pending element action runs to submit!")
1643
+ pending = [new_sub]
1299
1644
 
1300
- def get_task_elements(self, task: app.Task, selection: slice) -> List[app.Element]:
1301
- return [
1302
- self.app.Element(task=task, **i)
1303
- for i in self._store.get_task_elements(task.index, task.insert_ID, selection)
1304
- ]
1645
+ self.submissions_path.mkdir(exist_ok=True, parents=True)
1646
+ self.execution_path.mkdir(exist_ok=True, parents=True)
1647
+ self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
1305
1648
 
1306
- def get_task_elements_islice(
1307
- self, task: app.Task, selection: slice
1308
- ) -> Iterator[app.Element]:
1309
- for i in self._store.get_task_elements_islice(
1310
- task.index, task.insert_ID, selection
1311
- ):
1312
- yield self.app.Element(task=task, **i)
1313
-
1314
- def get_EARs_from_IDs(self, indices: List[EAR_ID]) -> List[app.ElementActionRun]:
1315
- """Return element action run objects from a list of five-tuples, representing the
1316
- task insert ID, element index, iteration index, action index, and run index,
1317
- respectively.
1318
- """
1319
- objs = []
1320
- for _EAR_ID in indices:
1321
- task = self.tasks.get(insert_ID=_EAR_ID.task_insert_ID)
1322
- elem_iters = task.elements[_EAR_ID.element_idx].iterations
1323
- for i in elem_iters:
1324
- if i.index == _EAR_ID.iteration_idx:
1325
- iter_i = i
1326
- break
1327
- EAR_i = iter_i.actions[_EAR_ID.action_idx].runs[_EAR_ID.run_idx]
1328
- objs.append(EAR_i)
1329
- return objs
1649
+ # for direct execution the submission must be persistent at submit-time, because
1650
+ # it will be read by a new instance of the app:
1651
+ self._store._pending.commit_all()
1330
1652
 
1331
- def get_element_iterations_from_IDs(
1332
- self, indices: List[IterationID]
1333
- ) -> List[app.ElementIteration]:
1334
- """Return element iteration objects from a list of three-tuples, representing the
1335
- task insert ID, element index, and iteration index, respectively.
1336
- """
1337
- objs = []
1338
- for iter_idx in indices:
1339
- iter_i = (
1340
- self.tasks.get(insert_ID=iter_idx.task_insert_ID)
1341
- .elements[iter_idx.element_idx]
1342
- .iterations[iter_idx.iteration_idx]
1343
- )
1344
- objs.append(iter_i)
1345
- return objs
1653
+ # submit all pending submissions:
1654
+ exceptions = []
1655
+ submitted_js = {}
1656
+ for sub in pending:
1657
+ try:
1658
+ sub_js_idx = sub.submit(
1659
+ ignore_errors=ignore_errors,
1660
+ print_stdout=print_stdout,
1661
+ )
1662
+ submitted_js[sub.index] = sub_js_idx
1663
+ except SubmissionFailure as exc:
1664
+ exceptions.append(exc)
1346
1665
 
1347
- def get_elements_from_IDs(self, indices: List[ElementID]) -> List[app.Element]:
1348
- """Return element objects from a list of two-tuples, representing the task insert
1349
- ID, and element index, respectively."""
1350
- return [
1351
- self.tasks.get(insert_ID=idx.task_insert_ID).elements[idx.element_idx]
1352
- for idx in indices
1353
- ]
1666
+ return exceptions, submitted_js
1354
1667
 
1355
- def set_EAR_submission_indices(
1668
+ def submit(
1356
1669
  self,
1357
- sub_idx: int,
1358
- EAR_indices: Tuple[int, int, int, int],
1359
- ) -> None:
1360
- """Set the submission index on an EAR."""
1670
+ ignore_errors: Optional[bool] = False,
1671
+ JS_parallelism: Optional[bool] = None,
1672
+ print_stdout: Optional[bool] = False,
1673
+ ) -> Dict[int, int]:
1361
1674
  with self._store.cached_load():
1362
1675
  with self.batch_update():
1363
- self._store.set_EAR_submission_indices(sub_idx, EAR_indices)
1676
+ # commit updates before raising exception:
1677
+ exceptions, submitted_js = self._submit(
1678
+ ignore_errors=ignore_errors,
1679
+ JS_parallelism=JS_parallelism,
1680
+ print_stdout=print_stdout,
1681
+ )
1364
1682
 
1365
- def set_EAR_start(
1366
- self,
1367
- submission_idx: int,
1368
- jobscript_idx: int,
1369
- JS_element_idx: int,
1370
- JS_action_idx: int,
1371
- ) -> None:
1372
- """Set the start time on an EAR."""
1373
- with self._store.cached_load():
1374
- with self.batch_update():
1375
- jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
1376
- (t_iD, _, i_idx, a_idx, r_idx, _) = jobscript.get_EAR_ID_array()[
1377
- JS_action_idx, JS_element_idx
1378
- ].item()
1379
- self._store.set_EAR_start(t_iD, i_idx, a_idx, r_idx)
1683
+ if exceptions:
1684
+ msg = "\n" + "\n\n".join([i.message for i in exceptions])
1685
+ raise WorkflowSubmissionFailure(msg)
1380
1686
 
1381
- def set_EAR_end(
1382
- self,
1383
- submission_idx: int,
1384
- jobscript_idx: int,
1385
- JS_element_idx: int,
1386
- JS_action_idx: int,
1387
- ) -> None:
1388
- """Set the end time on an EAR."""
1389
- with self._store.cached_load():
1390
- with self.batch_update():
1391
- jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
1392
- (t_iD, _, i_idx, a_idx, r_idx, _) = jobscript.get_EAR_ID_array()[
1393
- JS_action_idx, JS_element_idx
1394
- ].item()
1395
- self._store.set_EAR_end(t_iD, i_idx, a_idx, r_idx)
1687
+ return submitted_js
1396
1688
 
1397
- def _from_internal_get_EAR(
1398
- self,
1399
- submission_idx: int,
1400
- jobscript_idx: int,
1401
- JS_element_idx: int,
1402
- JS_action_idx: int,
1403
- ):
1689
+ def add_submission(self, JS_parallelism: Optional[bool] = None) -> app.Submission:
1404
1690
  with self._store.cached_load():
1405
- jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
1406
- id_args = jobscript.get_EAR_ID_array()[JS_action_idx, JS_element_idx].item()
1407
- EAR_id = EAR_ID(*id_args)
1408
- EAR = self.get_EARs_from_IDs([EAR_id])[0]
1409
-
1410
- return jobscript, EAR
1691
+ with self.batch_update():
1692
+ return self._add_submission(JS_parallelism)
1411
1693
 
1412
- def write_commands(
1413
- self,
1414
- submission_idx: int,
1415
- jobscript_idx: int,
1416
- JS_element_idx: int,
1417
- JS_action_idx: int,
1418
- ) -> None:
1419
- """Write run-time commands for a given EAR."""
1420
- with self._store.cached_load():
1421
- jobscript, EAR = self._from_internal_get_EAR(
1422
- submission_idx, jobscript_idx, JS_element_idx, JS_action_idx
1694
+ def _add_submission(self, JS_parallelism: Optional[bool] = None) -> app.Submission:
1695
+ new_idx = self.num_submissions
1696
+ _ = self.submissions # TODO: just to ensure `submissions` is loaded
1697
+ sub_obj = self.app.Submission(
1698
+ index=new_idx,
1699
+ workflow=self,
1700
+ jobscripts=self.resolve_jobscripts(),
1701
+ JS_parallelism=JS_parallelism,
1702
+ )
1703
+ all_EAR_ID = [i for js in sub_obj.jobscripts for i in js.EAR_ID.flatten()]
1704
+ # EAR_indices = sub_obj.prepare_EAR_submission_idx_update()
1705
+ if not all_EAR_ID:
1706
+ print(
1707
+ f"There are no pending element action runs, so a new submission was not "
1708
+ f"added."
1423
1709
  )
1424
- commands, shell_vars = EAR.compose_commands(jobscript)
1425
- for param_name, shell_var_name in shell_vars:
1426
- commands += jobscript.shell.format_save_parameter(
1427
- workflow_app_alias=jobscript.workflow_app_alias,
1428
- param_name=param_name,
1429
- shell_var_name=shell_var_name,
1430
- )
1431
- commands = jobscript.shell.wrap_in_subshell(commands)
1432
- cmd_file_name = jobscript.get_commands_file_name(JS_action_idx)
1433
- with Path(cmd_file_name).open("wt", newline="\n") as fp:
1434
- # (assuming we have CD'd correctly to the element run directory)
1435
- fp.write(commands)
1710
+ return
1436
1711
 
1437
- def save_parameter(
1438
- self,
1439
- name,
1440
- value,
1441
- submission_idx: int,
1442
- jobscript_idx: int,
1443
- JS_element_idx: int,
1444
- JS_action_idx: int,
1445
- ):
1446
1712
  with self._store.cached_load():
1447
1713
  with self.batch_update():
1448
- _, EAR = self._from_internal_get_EAR(
1449
- submission_idx, jobscript_idx, JS_element_idx, JS_action_idx
1450
- )
1451
- data_idx = EAR.data_idx[name]
1452
- self._store.set_parameter(data_idx, value)
1714
+ for i in all_EAR_ID:
1715
+ self._store.set_EAR_submission_index(EAR_ID=i, sub_idx=new_idx)
1453
1716
 
1454
- def save_parameters(
1455
- self,
1456
- values: Dict,
1457
- submission_idx: int,
1458
- jobscript_idx: int,
1459
- JS_element_idx: int,
1460
- JS_action_idx: int,
1461
- ):
1462
- """Save multiple parameters to a given EAR."""
1717
+ # self.set_EAR_submission_indices(sub_idx=new_idx, EAR_indices=EAR_indices)
1718
+
1719
+ sub_obj_js, _ = sub_obj.to_json_like()
1720
+ self._submissions.append(sub_obj)
1721
+ self._pending["submissions"].append(new_idx)
1463
1722
  with self._store.cached_load():
1464
1723
  with self.batch_update():
1465
- _, EAR = self._from_internal_get_EAR(
1466
- submission_idx, jobscript_idx, JS_element_idx, JS_action_idx
1467
- )
1468
- for name, value in values.items():
1469
- data_idx = EAR.data_idx[name]
1470
- self._store.set_parameter(data_idx, value)
1724
+ self._store.add_submission(new_idx, sub_obj_js)
1725
+
1726
+ return self.submissions[new_idx]
1471
1727
 
1472
1728
  def resolve_jobscripts(self) -> List[app.Jobscript]:
1473
1729
  js, element_deps = self._resolve_singular_jobscripts()
@@ -1492,6 +1748,8 @@ class Workflow:
1492
1748
  -------
1493
1749
  submission_jobscripts
1494
1750
  all_element_deps
1751
+ For a given jobscript index, for a given jobscript element index within that
1752
+ jobscript, this is a list of EAR IDs dependencies of that element.
1495
1753
 
1496
1754
  """
1497
1755
 
@@ -1520,8 +1778,8 @@ class Workflow:
1520
1778
  len(task_actions),
1521
1779
  len(task_elements[task.insert_ID]),
1522
1780
  )
1523
- EAR_idx_arr = np.empty(EAR_idx_arr_shape, dtype=np.int32)
1524
- EAR_idx_arr[:] = -1
1781
+ EAR_ID_arr = np.empty(EAR_idx_arr_shape, dtype=np.int32)
1782
+ EAR_ID_arr[:] = -1
1525
1783
 
1526
1784
  new_js_idx = len(submission_jobscripts)
1527
1785
 
@@ -1530,8 +1788,8 @@ class Workflow:
1530
1788
  "task_loop_idx": [loop_idx_i],
1531
1789
  "task_actions": task_actions, # map jobscript actions to task actions
1532
1790
  "task_elements": task_elements, # map jobscript elements to task elements
1533
- "EARs": {}, # keys are (task insert ID, elem_idx, EAR_idx)
1534
- "EAR_idx": EAR_idx_arr,
1791
+ # "EARs": {}, # keys are (task insert ID, elem_idx, EAR_idx)
1792
+ "EAR_ID": EAR_ID_arr,
1535
1793
  "resources": res[js_dat["resources"]],
1536
1794
  "resource_hash": res_hash[js_dat["resources"]],
1537
1795
  "dependencies": {},
@@ -1540,43 +1798,51 @@ class Workflow:
1540
1798
  js_elem_idx = task_elements[task.insert_ID].index((elem_idx))
1541
1799
  all_EAR_IDs = []
1542
1800
  for act_idx in act_indices:
1543
- EAR_idx, run_idx, iter_idx = (
1544
- i.item() for i in EAR_map[act_idx, elem_idx]
1545
- )
1801
+ EAR_ID_i = EAR_map[act_idx, elem_idx].item()
1802
+ # EAR_idx, run_idx, iter_idx = (
1803
+ # i.item() for i in EAR_map[act_idx, elem_idx]
1804
+ # )
1546
1805
  # construct EAR_ID object so we can retrieve the EAR objects and
1547
1806
  # so their dependencies:
1548
- EAR_id = EAR_ID(
1549
- task_insert_ID=task.insert_ID,
1550
- element_idx=elem_idx,
1551
- iteration_idx=iter_idx,
1552
- action_idx=act_idx,
1553
- run_idx=run_idx,
1554
- EAR_idx=EAR_idx,
1555
- )
1556
- all_EAR_IDs.append(EAR_id)
1557
- js_i["EARs"][(task.insert_ID, elem_idx, EAR_idx)] = (
1558
- iter_idx,
1559
- act_idx,
1560
- run_idx,
1561
- )
1807
+ # EAR_id = EAR_ID(
1808
+ # task_insert_ID=task.insert_ID,
1809
+ # element_idx=elem_idx,
1810
+ # iteration_idx=iter_idx,
1811
+ # action_idx=act_idx,
1812
+ # run_idx=run_idx,
1813
+ # EAR_idx=EAR_idx,
1814
+ # )
1815
+ all_EAR_IDs.append(EAR_ID_i)
1816
+ # js_i["EARs"][(task.insert_ID, elem_idx, EAR_idx)] = (
1817
+ # iter_idx,
1818
+ # act_idx,
1819
+ # run_idx,
1820
+ # )
1562
1821
 
1563
1822
  js_act_idx = task_actions.index([task.insert_ID, act_idx, 0])
1564
- js_i["EAR_idx"][js_act_idx][js_elem_idx] = EAR_idx
1823
+ # js_i["EAR_idx"][js_act_idx][js_elem_idx] = EAR_idx
1824
+ js_i["EAR_ID"][js_act_idx][js_elem_idx] = EAR_ID_i
1565
1825
 
1566
1826
  # get indices of EARs that this element depends on:
1567
1827
  EAR_objs = self.get_EARs_from_IDs(all_EAR_IDs)
1568
1828
  EAR_deps = [i.get_EAR_dependencies() for i in EAR_objs]
1569
1829
  EAR_deps_flat = [j for i in EAR_deps for j in i]
1570
1830
 
1831
+ # print(f"{EAR_deps=}")
1832
+ # print(f"{EAR_deps_flat=}")
1833
+
1571
1834
  # represent EAR dependencies of this jobscripts using the same key
1572
1835
  # format as in the "EARs" dict, to allow for quick lookup when
1573
1836
  # resolving dependencies between jobscripts; also, no need to include
1574
1837
  # EAR dependencies that are in this jobscript:
1838
+ # EAR_deps_EAR_idx = [
1839
+ # (i.task_insert_ID, i.element_idx, i.EAR_idx)
1840
+ # for i in EAR_deps_flat
1841
+ # if (i.task_insert_ID, i.element_idx, i.EAR_idx)
1842
+ # not in js_i["EARs"]
1843
+ # ]
1575
1844
  EAR_deps_EAR_idx = [
1576
- (i.task_insert_ID, i.element_idx, i.EAR_idx)
1577
- for i in EAR_deps_flat
1578
- if (i.task_insert_ID, i.element_idx, i.EAR_idx)
1579
- not in js_i["EARs"]
1845
+ i for i in EAR_deps_flat if i not in js_i["EAR_ID"]
1580
1846
  ]
1581
1847
  if EAR_deps_EAR_idx:
1582
1848
  if new_js_idx not in all_element_deps:
@@ -1588,43 +1854,65 @@ class Workflow:
1588
1854
 
1589
1855
  return submission_jobscripts, all_element_deps
1590
1856
 
1591
- def get_iteration_task_pathway(self):
1592
- pathway = []
1593
- for task in self.tasks:
1594
- loop_idx = {}
1595
- pathway.append((task.insert_ID, loop_idx))
1596
-
1597
- for loop in self.loops: # TODO: order by depth (inner loops first?)
1598
- task_subset = loop.task_insert_IDs
1599
- subset_idx = [idx for idx, i in enumerate(pathway) if i[0] in task_subset]
1600
- looped_pathway = []
1601
- for iter_i in range(loop.num_added_iterations):
1602
- for j in subset_idx:
1603
- item_j = copy.deepcopy(pathway[j])
1604
- item_j[1][loop.name] = iter_i
1605
- looped_pathway.append(item_j)
1606
-
1607
- # replaced pathway `sub_idx` items with `looped_pathway` items:
1608
- pathway = replace_items(
1609
- pathway, subset_idx[0], subset_idx[-1] + 1, looped_pathway
1610
- )
1857
+ def write_commands(
1858
+ self,
1859
+ submission_idx: int,
1860
+ jobscript_idx: int,
1861
+ JS_action_idx: int,
1862
+ EAR_ID: int,
1863
+ ) -> None:
1864
+ """Write run-time commands for a given EAR."""
1865
+ with self._store.cached_load():
1866
+ jobscript = self.submissions[submission_idx].jobscripts[jobscript_idx]
1867
+ EAR = self.get_EARs_from_IDs([EAR_ID])[0]
1868
+ commands, shell_vars = EAR.compose_commands(jobscript)
1869
+ for param_name, shell_var_name in shell_vars:
1870
+ commands += jobscript.shell.format_save_parameter(
1871
+ workflow_app_alias=jobscript.workflow_app_alias,
1872
+ param_name=param_name,
1873
+ shell_var_name=shell_var_name,
1874
+ EAR_ID=EAR_ID,
1875
+ )
1876
+ commands = jobscript.shell.wrap_in_subshell(commands, EAR.action.abortable)
1877
+ cmd_file_name = jobscript.get_commands_file_name(JS_action_idx)
1878
+ with Path(cmd_file_name).open("wt", newline="\n") as fp:
1879
+ # (assuming we have CD'd correctly to the element run directory)
1880
+ fp.write(commands)
1611
1881
 
1612
- return pathway
1882
+ def save_parameter(
1883
+ self,
1884
+ name: str,
1885
+ value: Any,
1886
+ EAR_ID: int,
1887
+ ):
1888
+ with self._store.cached_load():
1889
+ with self.batch_update():
1890
+ EAR = self.get_EARs_from_IDs([EAR_ID])[0]
1891
+ param_id = EAR.data_idx[name]
1892
+ self.set_parameter_value(param_id, value)
1613
1893
 
1614
1894
  def show_all_EAR_statuses(self):
1615
1895
  print(
1616
1896
  f"{'task':8s} {'element':8s} {'iteration':8s} {'action':8s} "
1617
- f"{'run':8s} {'status':8s}"
1897
+ f"{'run':8s} {'sub.':8s} {'exitcode':8s} {'success':8s} {'skip':8s}"
1618
1898
  )
1619
1899
  for task in self.tasks:
1620
- for element in task.elements:
1900
+ for element in task.elements[:]:
1621
1901
  for iter_idx, iteration in enumerate(element.iterations):
1622
1902
  for act_idx, action_runs in iteration.actions.items():
1623
1903
  for run_idx, EAR in enumerate(action_runs.runs):
1904
+ suc = EAR.success if EAR.success is not None else "-"
1905
+ if EAR.exit_code is not None:
1906
+ exc = f"{EAR.exit_code:^8d}"
1907
+ else:
1908
+ exc = f"{'-':^8}"
1624
1909
  print(
1625
1910
  f"{task.insert_ID:^8d} {element.index:^8d} "
1626
1911
  f"{iter_idx:^8d} {act_idx:^8d} {run_idx:^8d} "
1627
1912
  f"{EAR.submission_status.name.lower():^8s}"
1913
+ f"{exc}"
1914
+ f"{suc:^8}"
1915
+ f"{EAR.skip:^8}"
1628
1916
  )
1629
1917
 
1630
1918