ddeutil-workflow 0.0.85__py3-none-any.whl → 0.0.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/api/routes/job.py +3 -2
- ddeutil/workflow/conf.py +5 -3
- ddeutil/workflow/errors.py +3 -0
- ddeutil/workflow/job.py +66 -42
- ddeutil/workflow/result.py +46 -55
- ddeutil/workflow/stages.py +157 -165
- ddeutil/workflow/traces.py +147 -89
- ddeutil/workflow/workflow.py +300 -360
- {ddeutil_workflow-0.0.85.dist-info → ddeutil_workflow-0.0.86.dist-info}/METADATA +2 -2
- {ddeutil_workflow-0.0.85.dist-info → ddeutil_workflow-0.0.86.dist-info}/RECORD +15 -15
- {ddeutil_workflow-0.0.85.dist-info → ddeutil_workflow-0.0.86.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.85.dist-info → ddeutil_workflow-0.0.86.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.85.dist-info → ddeutil_workflow-0.0.86.dist-info}/licenses/LICENSE +0 -0
- {ddeutil_workflow-0.0.85.dist-info → ddeutil_workflow-0.0.86.dist-info}/top_level.txt +0 -0
ddeutil/workflow/workflow.py
CHANGED
@@ -19,11 +19,12 @@ Classes:
|
|
19
19
|
Constants:
|
20
20
|
NORMAL: Normal release execution
|
21
21
|
RERUN: Re-execution of failed workflows
|
22
|
-
|
22
|
+
DRYRUN: Dryrun execution for testing workflow loop.
|
23
23
|
FORCE: Force execution regardless of conditions
|
24
24
|
"""
|
25
25
|
import copy
|
26
26
|
import time
|
27
|
+
import traceback
|
27
28
|
from concurrent.futures import (
|
28
29
|
Future,
|
29
30
|
ThreadPoolExecutor,
|
@@ -43,9 +44,15 @@ from typing_extensions import Self
|
|
43
44
|
|
44
45
|
from . import DRYRUN
|
45
46
|
from .__types import DictData
|
46
|
-
from .audits import NORMAL, RERUN, Audit, ReleaseType, get_audit
|
47
|
+
from .audits import NORMAL, RERUN, Audit, AuditData, ReleaseType, get_audit
|
47
48
|
from .conf import YamlParser, dynamic
|
48
|
-
from .errors import
|
49
|
+
from .errors import (
|
50
|
+
WorkflowCancelError,
|
51
|
+
WorkflowError,
|
52
|
+
WorkflowSkipError,
|
53
|
+
WorkflowTimeoutError,
|
54
|
+
to_dict,
|
55
|
+
)
|
49
56
|
from .event import Event
|
50
57
|
from .job import Job
|
51
58
|
from .params import Param
|
@@ -65,7 +72,6 @@ from .reusables import has_template, param2template
|
|
65
72
|
from .traces import Trace, get_trace
|
66
73
|
from .utils import (
|
67
74
|
extract_id,
|
68
|
-
gen_id,
|
69
75
|
get_dt_now,
|
70
76
|
pop_sys_extras,
|
71
77
|
)
|
@@ -167,10 +173,10 @@ class Workflow(BaseModel):
|
|
167
173
|
FileNotFoundError: If workflow configuration file not found
|
168
174
|
|
169
175
|
Example:
|
170
|
-
|
176
|
+
Case: Load from default config path
|
171
177
|
>>> workflow = Workflow.from_conf('data-pipeline')
|
172
178
|
|
173
|
-
|
179
|
+
Case: Load with custom path and extras
|
174
180
|
>>> workflow = Workflow.from_conf(
|
175
181
|
... 'data-pipeline',
|
176
182
|
... path=Path('./custom-configs'),
|
@@ -374,7 +380,6 @@ class Workflow(BaseModel):
|
|
374
380
|
if k in self.params
|
375
381
|
}
|
376
382
|
),
|
377
|
-
"jobs": {},
|
378
383
|
}
|
379
384
|
|
380
385
|
def release(
|
@@ -439,7 +444,10 @@ class Workflow(BaseModel):
|
|
439
444
|
"extras": self.extras,
|
440
445
|
}
|
441
446
|
trace: Trace = get_trace(
|
442
|
-
run_id,
|
447
|
+
run_id,
|
448
|
+
parent_run_id=parent_run_id,
|
449
|
+
extras=self.extras,
|
450
|
+
pre_process=True,
|
443
451
|
)
|
444
452
|
release: datetime = self.on.validate_dt(dt=release)
|
445
453
|
trace.info(f"[RELEASE]: Start {name!r} : {release:%Y-%m-%d %H:%M:%S}")
|
@@ -457,13 +465,21 @@ class Workflow(BaseModel):
|
|
457
465
|
)
|
458
466
|
|
459
467
|
if release_type == RERUN:
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
468
|
+
try:
|
469
|
+
previous: AuditData = audit.find_audit_with_release(
|
470
|
+
name, release=release
|
471
|
+
)
|
472
|
+
values: DictData = previous.context
|
473
|
+
except FileNotFoundError:
|
474
|
+
trace.warning(
|
475
|
+
(
|
476
|
+
f"Does not find previous audit log with release: "
|
477
|
+
f"{release:%Y%m%d%H%M%S}"
|
478
|
+
),
|
479
|
+
module="release",
|
480
|
+
)
|
466
481
|
elif release_type == DRYRUN:
|
482
|
+
# IMPORTANT: Set system extra parameter for allow dryrun mode,
|
467
483
|
self.extras.update({"__sys_release_dryrun_mode": True})
|
468
484
|
trace.debug("[RELEASE]: Mark dryrun mode to the extra params.")
|
469
485
|
elif release_type == NORMAL and audit.is_pointed(data=audit_data):
|
@@ -492,7 +508,7 @@ class Workflow(BaseModel):
|
|
492
508
|
"context": context,
|
493
509
|
"runs_metadata": (
|
494
510
|
(runs_metadata or {})
|
495
|
-
|
|
511
|
+
| context.get("info", {})
|
496
512
|
| {
|
497
513
|
"timeout": timeout,
|
498
514
|
"original_name": self.name,
|
@@ -523,7 +539,7 @@ class Workflow(BaseModel):
|
|
523
539
|
),
|
524
540
|
)
|
525
541
|
|
526
|
-
def
|
542
|
+
def process_job(
|
527
543
|
self,
|
528
544
|
job: Job,
|
529
545
|
run_id: str,
|
@@ -532,7 +548,7 @@ class Workflow(BaseModel):
|
|
532
548
|
parent_run_id: Optional[str] = None,
|
533
549
|
event: Optional[ThreadEvent] = None,
|
534
550
|
) -> tuple[Status, DictData]:
|
535
|
-
"""Job
|
551
|
+
"""Job process job with passing dynamic parameters from the main workflow
|
536
552
|
execution to the target job object via job's ID.
|
537
553
|
|
538
554
|
This execution is the minimum level of execution of this workflow
|
@@ -602,132 +618,48 @@ class Workflow(BaseModel):
|
|
602
618
|
|
603
619
|
return result.status, catch(context, status=result.status)
|
604
620
|
|
605
|
-
def
|
621
|
+
def process(
|
606
622
|
self,
|
607
|
-
|
623
|
+
job_queue: Queue[str],
|
624
|
+
run_id: str,
|
625
|
+
context: DictData,
|
608
626
|
*,
|
609
|
-
|
627
|
+
parent_run_id: Optional[str] = None,
|
610
628
|
event: Optional[ThreadEvent] = None,
|
611
629
|
timeout: float = 3600,
|
612
630
|
max_job_parallel: int = 2,
|
631
|
+
total_job: Optional[int] = None,
|
613
632
|
) -> Result:
|
614
|
-
"""
|
615
|
-
included in this workflow model with `jobs` field.
|
616
|
-
|
617
|
-
The result of execution process for each job and stages on this
|
618
|
-
workflow will keep in dict which able to catch out with all jobs and
|
619
|
-
stages by dot annotation.
|
620
|
-
|
621
|
-
For example with non-strategy job, when I want to use the output
|
622
|
-
from previous stage, I can access it with syntax:
|
633
|
+
"""Job process method.
|
623
634
|
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
the job execution. It will warp that error and keep it in the key `errors`
|
634
|
-
at the result context.
|
635
|
-
|
636
|
-
|
637
|
-
Execution --> Ok --> Result
|
638
|
-
|-status: CANCEL
|
639
|
-
╰-context:
|
640
|
-
╰-errors:
|
641
|
-
|-name: ...
|
642
|
-
╰-message: ...
|
643
|
-
|
644
|
-
--> Ok --> Result
|
645
|
-
|-status: FAILED
|
646
|
-
╰-context:
|
647
|
-
╰-errors:
|
648
|
-
|-name: ...
|
649
|
-
╰-message: ...
|
650
|
-
|
651
|
-
--> Ok --> Result
|
652
|
-
╰-status: SKIP
|
653
|
-
|
654
|
-
--> Ok --> Result
|
655
|
-
╰-status: SUCCESS
|
656
|
-
|
657
|
-
:param params: A parameter data that will parameterize before execution.
|
658
|
-
:param run_id: (Optional[str]) A workflow running ID.
|
659
|
-
:param event: (Event) An Event manager instance that use to cancel this
|
660
|
-
execution if it forces stopped by parent execution.
|
661
|
-
:param timeout: (float) A workflow execution time out in second unit
|
662
|
-
that use for limit time of execution and waiting job dependency.
|
663
|
-
This value does not force stop the task that still running more than
|
664
|
-
this limit time. (Default: 60 * 60 seconds)
|
665
|
-
:param max_job_parallel: (int) The maximum workers that use for job
|
666
|
-
execution in `ThreadPoolExecutor` object. (Default: 2 workers)
|
667
|
-
|
668
|
-
:rtype: Result
|
635
|
+
Args:
|
636
|
+
job_queue:
|
637
|
+
run_id (str):
|
638
|
+
context (DictData):
|
639
|
+
parent_run_id (str, default None):
|
640
|
+
event (Event, default None):
|
641
|
+
timeout:
|
642
|
+
max_job_parallel:
|
643
|
+
total_job:
|
669
644
|
"""
|
670
645
|
ts: float = time.monotonic()
|
671
|
-
parent_run_id, run_id = extract_id(
|
672
|
-
self.name, run_id=run_id, extras=self.extras
|
673
|
-
)
|
674
646
|
trace: Trace = get_trace(
|
675
647
|
run_id, parent_run_id=parent_run_id, extras=self.extras
|
676
648
|
)
|
677
|
-
context: DictData = self.parameterize(params)
|
678
|
-
event: ThreadEvent = event or ThreadEvent()
|
679
|
-
max_job_parallel: int = dynamic(
|
680
|
-
"max_job_parallel", f=max_job_parallel, extras=self.extras
|
681
|
-
)
|
682
|
-
trace.info(
|
683
|
-
f"[WORKFLOW]: Execute: {self.name!r} ("
|
684
|
-
f"{'parallel' if max_job_parallel > 1 else 'sequential'} jobs)"
|
685
|
-
)
|
686
|
-
if not self.jobs:
|
687
|
-
trace.warning(f"[WORKFLOW]: {self.name!r} does not set jobs")
|
688
|
-
return Result(
|
689
|
-
run_id=run_id,
|
690
|
-
parent_run_id=parent_run_id,
|
691
|
-
status=SUCCESS,
|
692
|
-
context=catch(context, status=SUCCESS),
|
693
|
-
info={"execution_time": time.monotonic() - ts},
|
694
|
-
extras=self.extras,
|
695
|
-
)
|
696
|
-
|
697
|
-
job_queue: Queue = Queue()
|
698
|
-
for job_id in self.jobs:
|
699
|
-
job_queue.put(job_id)
|
700
|
-
|
701
649
|
not_timeout_flag: bool = True
|
702
|
-
total_job: int = len(self.jobs)
|
650
|
+
total_job: int = total_job or len(self.jobs)
|
703
651
|
statuses: list[Status] = [WAIT] * total_job
|
704
652
|
skip_count: int = 0
|
705
653
|
sequence_statuses: list[Status] = []
|
706
|
-
timeout: float = dynamic(
|
707
|
-
"max_job_exec_timeout", f=timeout, extras=self.extras
|
708
|
-
)
|
709
|
-
catch(context, status=WAIT)
|
710
654
|
if event and event.is_set():
|
711
|
-
|
655
|
+
raise WorkflowCancelError(
|
712
656
|
"Execution was canceled from the event was set "
|
713
657
|
"before workflow execution."
|
714
658
|
)
|
715
|
-
trace.error(f"[WORKFLOW]: {err_msg}")
|
716
|
-
return Result(
|
717
|
-
run_id=run_id,
|
718
|
-
parent_run_id=parent_run_id,
|
719
|
-
status=CANCEL,
|
720
|
-
context=catch(
|
721
|
-
context,
|
722
|
-
status=CANCEL,
|
723
|
-
updated={"errors": WorkflowCancelError(err_msg).to_dict()},
|
724
|
-
),
|
725
|
-
info={"execution_time": time.monotonic() - ts},
|
726
|
-
extras=self.extras,
|
727
|
-
)
|
728
659
|
|
729
660
|
# NOTE: Force update internal extras for handler circle execution.
|
730
661
|
self.extras.update({"__sys_exec_break_circle": self.name})
|
662
|
+
|
731
663
|
with ThreadPoolExecutor(max_job_parallel, "wf") as executor:
|
732
664
|
futures: list[Future] = []
|
733
665
|
|
@@ -757,23 +689,9 @@ class Workflow(BaseModel):
|
|
757
689
|
|
758
690
|
if check == FAILED: # pragma: no cov
|
759
691
|
pop_sys_extras(self.extras)
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
status=FAILED,
|
764
|
-
context=catch(
|
765
|
-
context,
|
766
|
-
status=FAILED,
|
767
|
-
updated={
|
768
|
-
"status": FAILED,
|
769
|
-
"errors": WorkflowError(
|
770
|
-
f"Validate job trigger rule was failed "
|
771
|
-
f"with {job.trigger_rule.value!r}."
|
772
|
-
).to_dict(),
|
773
|
-
},
|
774
|
-
),
|
775
|
-
info={"execution_time": time.monotonic() - ts},
|
776
|
-
extras=self.extras,
|
692
|
+
raise WorkflowError(
|
693
|
+
f"Validate job trigger rule was failed with "
|
694
|
+
f"{job.trigger_rule.value!r}."
|
777
695
|
)
|
778
696
|
elif check == SKIP: # pragma: no cov
|
779
697
|
trace.info(
|
@@ -788,7 +706,7 @@ class Workflow(BaseModel):
|
|
788
706
|
if max_job_parallel > 1:
|
789
707
|
futures.append(
|
790
708
|
executor.submit(
|
791
|
-
self.
|
709
|
+
self.process_job,
|
792
710
|
job=job,
|
793
711
|
run_id=run_id,
|
794
712
|
context=context,
|
@@ -802,7 +720,7 @@ class Workflow(BaseModel):
|
|
802
720
|
if len(futures) < 1:
|
803
721
|
futures.append(
|
804
722
|
executor.submit(
|
805
|
-
self.
|
723
|
+
self.process_job,
|
806
724
|
job=job,
|
807
725
|
run_id=run_id,
|
808
726
|
context=context,
|
@@ -853,13 +771,8 @@ class Workflow(BaseModel):
|
|
853
771
|
|
854
772
|
pop_sys_extras(self.extras)
|
855
773
|
st: Status = validate_statuses(statuses)
|
856
|
-
return Result(
|
857
|
-
|
858
|
-
parent_run_id=parent_run_id,
|
859
|
-
status=st,
|
860
|
-
context=catch(context, status=st),
|
861
|
-
info={"execution_time": time.monotonic() - ts},
|
862
|
-
extras=self.extras,
|
774
|
+
return Result.from_trace(trace).catch(
|
775
|
+
status=st, context=catch(context, status=st)
|
863
776
|
)
|
864
777
|
|
865
778
|
event.set()
|
@@ -867,85 +780,86 @@ class Workflow(BaseModel):
|
|
867
780
|
future.cancel()
|
868
781
|
|
869
782
|
trace.error(
|
870
|
-
|
871
|
-
|
783
|
+
(
|
784
|
+
f"{self.name!r} was timeout because it use exec time more "
|
785
|
+
f"than {timeout} seconds."
|
786
|
+
),
|
787
|
+
module="workflow",
|
872
788
|
)
|
873
789
|
|
874
790
|
time.sleep(0.0025)
|
875
791
|
|
876
792
|
pop_sys_extras(self.extras)
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
status=FAILED,
|
881
|
-
context=catch(
|
882
|
-
context,
|
883
|
-
status=FAILED,
|
884
|
-
updated={
|
885
|
-
"errors": WorkflowTimeoutError(
|
886
|
-
f"{self.name!r} was timeout because it use exec time "
|
887
|
-
f"more than {timeout} seconds."
|
888
|
-
).to_dict(),
|
889
|
-
},
|
890
|
-
),
|
891
|
-
info={"execution_time": time.monotonic() - ts},
|
892
|
-
extras=self.extras,
|
793
|
+
raise WorkflowTimeoutError(
|
794
|
+
f"{self.name!r} was timeout because it use exec time more than "
|
795
|
+
f"{timeout} seconds."
|
893
796
|
)
|
894
797
|
|
895
|
-
def
|
798
|
+
def _execute(
|
896
799
|
self,
|
800
|
+
params: DictData,
|
801
|
+
trace: Trace,
|
897
802
|
context: DictData,
|
898
803
|
*,
|
899
|
-
run_id: Optional[str] = None,
|
900
804
|
event: Optional[ThreadEvent] = None,
|
901
805
|
timeout: float = 3600,
|
902
806
|
max_job_parallel: int = 2,
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
807
|
+
total_job: Optional[int] = None,
|
808
|
+
) -> Result:
|
809
|
+
"""Wrapped Execute method."""
|
810
|
+
context.update(
|
811
|
+
{"jobs": {}, "info": {"exec_start": get_dt_now()}}
|
812
|
+
| self.parameterize(params)
|
813
|
+
)
|
814
|
+
trace.info(
|
815
|
+
f"[WORKFLOW]: Execute: {self.name!r} ("
|
816
|
+
f"{'parallel' if max_job_parallel > 1 else 'sequential'} jobs)"
|
817
|
+
)
|
818
|
+
if not self.jobs:
|
819
|
+
trace.warning(f"[WORKFLOW]: {self.name!r} does not set jobs")
|
820
|
+
return Result.from_trace(trace).catch(
|
821
|
+
status=SUCCESS, context=catch(context, status=SUCCESS)
|
822
|
+
)
|
909
823
|
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
event: (Event) An Event manager instance that use to cancel this
|
914
|
-
execution if it forces stopped by parent execution.
|
915
|
-
timeout: (float) A workflow execution time out in second unit
|
916
|
-
that use for limit time of execution and waiting job dependency.
|
917
|
-
This value does not force stop the task that still running more
|
918
|
-
than this limit time. (Default: 60 * 60 seconds)
|
919
|
-
max_job_parallel: (int) The maximum workers that use for job
|
920
|
-
execution in `ThreadPoolExecutor` object. (Default: 2 workers)
|
824
|
+
job_queue: Queue[str] = Queue()
|
825
|
+
for job_id in self.jobs:
|
826
|
+
job_queue.put(job_id)
|
921
827
|
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
828
|
+
catch(context, status=WAIT)
|
829
|
+
return self.process(
|
830
|
+
job_queue,
|
831
|
+
run_id=trace.run_id,
|
832
|
+
context=context,
|
833
|
+
parent_run_id=trace.parent_run_id,
|
834
|
+
event=event,
|
835
|
+
timeout=timeout,
|
836
|
+
max_job_parallel=max_job_parallel,
|
837
|
+
total_job=total_job,
|
931
838
|
)
|
932
|
-
|
839
|
+
|
840
|
+
def _rerun(
|
841
|
+
self,
|
842
|
+
params: DictData,
|
843
|
+
trace: Trace,
|
844
|
+
context: DictData,
|
845
|
+
*,
|
846
|
+
event: Optional[ThreadEvent] = None,
|
847
|
+
timeout: float = 3600,
|
848
|
+
max_job_parallel: int = 2,
|
849
|
+
) -> Result:
|
850
|
+
"""Wrapped Rerun method."""
|
851
|
+
if params["status"] == SUCCESS:
|
933
852
|
trace.info(
|
934
853
|
"[WORKFLOW]: Does not rerun because it already executed with "
|
935
854
|
"success status."
|
936
855
|
)
|
937
856
|
return Result.from_trace(trace).catch(
|
938
857
|
status=SUCCESS,
|
939
|
-
context=catch(context=
|
858
|
+
context=catch(context=params, status=SUCCESS),
|
940
859
|
)
|
941
860
|
|
942
|
-
err: dict[str, str] =
|
861
|
+
err: dict[str, str] = params.get("errors", {})
|
943
862
|
trace.info(f"[WORKFLOW]: Previous error: {err}")
|
944
|
-
|
945
|
-
event: ThreadEvent = event or ThreadEvent()
|
946
|
-
max_job_parallel: int = dynamic(
|
947
|
-
"max_job_parallel", f=max_job_parallel, extras=self.extras
|
948
|
-
)
|
949
863
|
trace.info(
|
950
864
|
f"[WORKFLOW]: Execute: {self.name!r} ("
|
951
865
|
f"{'parallel' if max_job_parallel > 1 else 'sequential'} jobs)"
|
@@ -953,19 +867,24 @@ class Workflow(BaseModel):
|
|
953
867
|
if not self.jobs:
|
954
868
|
trace.warning(f"[WORKFLOW]: {self.name!r} does not set jobs")
|
955
869
|
return Result.from_trace(trace).catch(
|
956
|
-
status=SUCCESS,
|
957
|
-
context=catch(context=context, status=SUCCESS),
|
870
|
+
status=SUCCESS, context=catch(context=params, status=SUCCESS)
|
958
871
|
)
|
959
872
|
|
960
873
|
# NOTE: Prepare the new context variable for rerun process.
|
961
|
-
jobs: DictData =
|
962
|
-
context
|
963
|
-
|
964
|
-
|
965
|
-
|
874
|
+
jobs: DictData = params.get("jobs")
|
875
|
+
context.update(
|
876
|
+
{
|
877
|
+
"params": params["params"].copy(),
|
878
|
+
"jobs": {
|
879
|
+
j: jobs[j]
|
880
|
+
for j in jobs
|
881
|
+
if jobs[j].get("status", FAILED) == SUCCESS
|
882
|
+
},
|
883
|
+
}
|
884
|
+
)
|
966
885
|
|
967
886
|
total_job: int = 0
|
968
|
-
job_queue: Queue = Queue()
|
887
|
+
job_queue: Queue[str] = Queue()
|
969
888
|
for job_id in self.jobs:
|
970
889
|
|
971
890
|
if job_id in context["jobs"]:
|
@@ -975,178 +894,199 @@ class Workflow(BaseModel):
|
|
975
894
|
total_job += 1
|
976
895
|
|
977
896
|
if total_job == 0:
|
978
|
-
|
979
|
-
"
|
897
|
+
raise WorkflowSkipError(
|
898
|
+
"It does not have job to rerun. it will change "
|
980
899
|
"status to skip."
|
981
900
|
)
|
982
|
-
return Result.from_trace(trace).catch(
|
983
|
-
status=SKIP,
|
984
|
-
context=catch(context=context, status=SKIP),
|
985
|
-
)
|
986
|
-
|
987
|
-
not_timeout_flag: bool = True
|
988
|
-
statuses: list[Status] = [WAIT] * total_job
|
989
|
-
skip_count: int = 0
|
990
|
-
sequence_statuses: list[Status] = []
|
991
|
-
timeout: float = dynamic(
|
992
|
-
"max_job_exec_timeout", f=timeout, extras=self.extras
|
993
|
-
)
|
994
901
|
|
995
902
|
catch(context, status=WAIT)
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
).to_dict(),
|
1007
|
-
},
|
1008
|
-
),
|
1009
|
-
)
|
903
|
+
return self.process(
|
904
|
+
job_queue,
|
905
|
+
run_id=trace.run_id,
|
906
|
+
context=context,
|
907
|
+
parent_run_id=trace.parent_run_id,
|
908
|
+
event=event,
|
909
|
+
timeout=timeout,
|
910
|
+
max_job_parallel=max_job_parallel,
|
911
|
+
total_job=total_job,
|
912
|
+
)
|
1010
913
|
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
914
|
+
def execute(
|
915
|
+
self,
|
916
|
+
params: DictData,
|
917
|
+
*,
|
918
|
+
run_id: Optional[str] = None,
|
919
|
+
event: Optional[ThreadEvent] = None,
|
920
|
+
timeout: float = 3600,
|
921
|
+
max_job_parallel: int = 2,
|
922
|
+
rerun_mode: bool = False,
|
923
|
+
) -> Result:
|
924
|
+
"""Execute workflow with passing a dynamic parameters to all jobs that
|
925
|
+
included in this workflow model with `jobs` field.
|
1015
926
|
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
job_id: str = job_queue.get()
|
1020
|
-
job: Job = self.job(name=job_id)
|
1021
|
-
if (check := job.check_needs(context["jobs"])) == WAIT:
|
1022
|
-
job_queue.task_done()
|
1023
|
-
job_queue.put(job_id)
|
1024
|
-
consecutive_waits += 1
|
927
|
+
The result of execution process for each job and stages on this
|
928
|
+
workflow will keep in dict which able to catch out with all jobs and
|
929
|
+
stages by dot annotation.
|
1025
930
|
|
1026
|
-
|
1027
|
-
|
1028
|
-
time.sleep(backoff_sleep)
|
1029
|
-
continue
|
931
|
+
For example with non-strategy job, when I want to use the output
|
932
|
+
from previous stage, I can access it with syntax:
|
1030
933
|
|
1031
|
-
|
1032
|
-
|
1033
|
-
backoff_sleep = 0.01
|
934
|
+
... ${job-id}.stages.${stage-id}.outputs.${key}
|
935
|
+
... ${job-id}.stages.${stage-id}.errors.${key}
|
1034
936
|
|
1035
|
-
|
1036
|
-
return Result.from_trace(trace).catch(
|
1037
|
-
status=FAILED,
|
1038
|
-
context=catch(
|
1039
|
-
context,
|
1040
|
-
status=FAILED,
|
1041
|
-
updated={
|
1042
|
-
"status": FAILED,
|
1043
|
-
"errors": WorkflowError(
|
1044
|
-
f"Validate job trigger rule was failed "
|
1045
|
-
f"with {job.trigger_rule.value!r}."
|
1046
|
-
).to_dict(),
|
1047
|
-
},
|
1048
|
-
),
|
1049
|
-
)
|
1050
|
-
elif check == SKIP: # pragma: no cov
|
1051
|
-
trace.info(
|
1052
|
-
f"[JOB]: Skip job: {job_id!r} from trigger rule."
|
1053
|
-
)
|
1054
|
-
job.set_outputs(output={"status": SKIP}, to=context)
|
1055
|
-
job_queue.task_done()
|
1056
|
-
skip_count += 1
|
1057
|
-
continue
|
937
|
+
But example for strategy job:
|
1058
938
|
|
1059
|
-
|
1060
|
-
|
1061
|
-
executor.submit(
|
1062
|
-
self.execute_job,
|
1063
|
-
job=job,
|
1064
|
-
run_id=run_id,
|
1065
|
-
context=context,
|
1066
|
-
parent_run_id=parent_run_id,
|
1067
|
-
event=event,
|
1068
|
-
),
|
1069
|
-
)
|
1070
|
-
job_queue.task_done()
|
1071
|
-
continue
|
939
|
+
... ${job-id}.strategies.${strategy-id}.stages.${stage-id}.outputs.${key}
|
940
|
+
... ${job-id}.strategies.${strategy-id}.stages.${stage-id}.errors.${key}
|
1072
941
|
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
self.execute_job,
|
1077
|
-
job=job,
|
1078
|
-
run_id=run_id,
|
1079
|
-
context=context,
|
1080
|
-
parent_run_id=parent_run_id,
|
1081
|
-
event=event,
|
1082
|
-
)
|
1083
|
-
)
|
1084
|
-
elif (future := futures.pop(0)).done():
|
1085
|
-
if e := future.exception():
|
1086
|
-
sequence_statuses.append(get_status_from_error(e))
|
1087
|
-
else:
|
1088
|
-
st, _ = future.result()
|
1089
|
-
sequence_statuses.append(st)
|
1090
|
-
job_queue.put(job_id)
|
1091
|
-
elif future.cancelled():
|
1092
|
-
sequence_statuses.append(CANCEL)
|
1093
|
-
job_queue.put(job_id)
|
1094
|
-
elif future.running() or "state=pending" in str(future):
|
1095
|
-
futures.insert(0, future)
|
1096
|
-
job_queue.put(job_id)
|
1097
|
-
else: # pragma: no cov
|
1098
|
-
job_queue.put(job_id)
|
1099
|
-
futures.insert(0, future)
|
1100
|
-
trace.warning(
|
1101
|
-
f"[WORKFLOW]: ... Execution non-threading not "
|
1102
|
-
f"handle: {future}."
|
1103
|
-
)
|
942
|
+
This method already handle all exception class that can raise from
|
943
|
+
the job execution. It will warp that error and keep it in the key `errors`
|
944
|
+
at the result context.
|
1104
945
|
|
1105
|
-
job_queue.task_done()
|
1106
946
|
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
statuses[total] = get_status_from_error(e)
|
947
|
+
Execution --> Ok --> Result
|
948
|
+
|-status: CANCEL
|
949
|
+
╰-context:
|
950
|
+
╰-errors:
|
951
|
+
|-name: ...
|
952
|
+
╰-message: ...
|
1114
953
|
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
954
|
+
--> Ok --> Result
|
955
|
+
|-status: FAILED
|
956
|
+
╰-context:
|
957
|
+
╰-errors:
|
958
|
+
|-name: ...
|
959
|
+
╰-message: ...
|
1118
960
|
|
1119
|
-
|
1120
|
-
|
1121
|
-
statuses[total + 1 + skip_count + i] = s
|
961
|
+
--> Ok --> Result
|
962
|
+
╰-status: SKIP
|
1122
963
|
|
1123
|
-
|
1124
|
-
|
1125
|
-
status=st,
|
1126
|
-
context=catch(context, status=st),
|
1127
|
-
)
|
964
|
+
--> Ok --> Result
|
965
|
+
╰-status: SUCCESS
|
1128
966
|
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
967
|
+
Args:
|
968
|
+
params (DictData): A parameter data that will parameterize before
|
969
|
+
execution.
|
970
|
+
run_id (str, default None): A workflow running ID.
|
971
|
+
event (Event, default None): An Event manager instance that use to
|
972
|
+
cancel this execution if it forces stopped by parent execution.
|
973
|
+
timeout (float, default 3600): A workflow execution time out in
|
974
|
+
second unit that use for limit time of execution and waiting job
|
975
|
+
dependency. This value does not force stop the task that still
|
976
|
+
running more than this limit time. (Default: 60 * 60 seconds)
|
977
|
+
max_job_parallel (int, default 2) The maximum workers that use for
|
978
|
+
job execution in `ThreadPoolExecutor` object.
|
979
|
+
rerun_mode (bool, default False): A rerun mode flag that will use
|
980
|
+
`_rerun` method if it set be True.
|
1132
981
|
|
982
|
+
Returns
|
983
|
+
Result: Return Result object that create from execution context with
|
984
|
+
return mode.
|
985
|
+
"""
|
986
|
+
ts: float = time.monotonic()
|
987
|
+
parent_run_id, run_id = extract_id(
|
988
|
+
self.name, run_id=run_id, extras=self.extras
|
989
|
+
)
|
990
|
+
trace: Trace = get_trace(
|
991
|
+
run_id,
|
992
|
+
parent_run_id=parent_run_id,
|
993
|
+
extras=self.extras,
|
994
|
+
pre_process=True,
|
995
|
+
)
|
996
|
+
context: DictData = {
|
997
|
+
"jobs": {},
|
998
|
+
"status": WAIT,
|
999
|
+
"info": {"exec_start": get_dt_now()},
|
1000
|
+
}
|
1001
|
+
event: ThreadEvent = event or ThreadEvent()
|
1002
|
+
max_job_parallel: int = dynamic(
|
1003
|
+
"max_job_parallel", f=max_job_parallel, extras=self.extras
|
1004
|
+
)
|
1005
|
+
try:
|
1006
|
+
if rerun_mode:
|
1007
|
+
return self._rerun(
|
1008
|
+
params,
|
1009
|
+
trace,
|
1010
|
+
context,
|
1011
|
+
event=event,
|
1012
|
+
timeout=timeout,
|
1013
|
+
max_job_parallel=max_job_parallel,
|
1014
|
+
)
|
1015
|
+
return self._execute(
|
1016
|
+
params,
|
1017
|
+
trace,
|
1018
|
+
context,
|
1019
|
+
event=event,
|
1020
|
+
timeout=timeout,
|
1021
|
+
max_job_parallel=max_job_parallel,
|
1022
|
+
)
|
1023
|
+
except WorkflowError as e:
|
1024
|
+
updated = {"errors": e.to_dict()}
|
1025
|
+
if isinstance(e, WorkflowSkipError):
|
1026
|
+
trace.error(f"⏭️ Skip: {e}", module="workflow")
|
1027
|
+
updated = None
|
1028
|
+
else:
|
1029
|
+
trace.error(f"📢 Workflow Failed:||{e}", module="workflow")
|
1030
|
+
|
1031
|
+
st: Status = get_status_from_error(e)
|
1032
|
+
return Result.from_trace(trace).catch(
|
1033
|
+
status=st, context=catch(context, status=st, updated=updated)
|
1034
|
+
)
|
1035
|
+
except Exception as e:
|
1133
1036
|
trace.error(
|
1134
|
-
f"
|
1135
|
-
|
1037
|
+
f"💥 Error Failed:||🚨 {traceback.format_exc()}||",
|
1038
|
+
module="workflow",
|
1039
|
+
)
|
1040
|
+
return Result.from_trace(trace).catch(
|
1041
|
+
status=FAILED,
|
1042
|
+
context=catch(
|
1043
|
+
context, status=FAILED, updated={"errors": to_dict(e)}
|
1044
|
+
),
|
1045
|
+
)
|
1046
|
+
finally:
|
1047
|
+
context["info"].update(
|
1048
|
+
{
|
1049
|
+
"exec_end": get_dt_now(),
|
1050
|
+
"exec_latency": round(time.monotonic() - ts, 6),
|
1051
|
+
}
|
1136
1052
|
)
|
1137
1053
|
|
1138
|
-
|
1054
|
+
def rerun(
|
1055
|
+
self,
|
1056
|
+
context: DictData,
|
1057
|
+
*,
|
1058
|
+
run_id: Optional[str] = None,
|
1059
|
+
event: Optional[ThreadEvent] = None,
|
1060
|
+
timeout: float = 3600,
|
1061
|
+
max_job_parallel: int = 2,
|
1062
|
+
) -> Result: # pragma: no cov
|
1063
|
+
"""Re-Execute workflow with passing the error context data.
|
1139
1064
|
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1065
|
+
Warnings:
|
1066
|
+
This rerun method allow to rerun job execution level only. That mean
|
1067
|
+
it does not support rerun only stage.
|
1068
|
+
|
1069
|
+
Args:
|
1070
|
+
context (DictData): A context result that get the failed status.
|
1071
|
+
run_id (str, default None): A workflow running ID.
|
1072
|
+
event (Event, default None): An Event manager instance that use to
|
1073
|
+
cancel this execution if it forces stopped by parent execution.
|
1074
|
+
timeout (float, default 3600): A workflow execution time out in
|
1075
|
+
second unit that use for limit time of execution and waiting job
|
1076
|
+
dependency. This value does not force stop the task that still
|
1077
|
+
running more than this limit time. (Default: 60 * 60 seconds)
|
1078
|
+
max_job_parallel (int, default 2) The maximum workers that use for
|
1079
|
+
job execution in `ThreadPoolExecutor` object.
|
1080
|
+
|
1081
|
+
Returns
|
1082
|
+
Result: Return Result object that create from execution context with
|
1083
|
+
return mode.
|
1084
|
+
"""
|
1085
|
+
return self.execute(
|
1086
|
+
context,
|
1087
|
+
run_id=run_id,
|
1088
|
+
event=event,
|
1089
|
+
timeout=timeout,
|
1090
|
+
max_job_parallel=max_job_parallel,
|
1091
|
+
rerun_mode=True,
|
1152
1092
|
)
|