ddeutil-workflow 0.0.84__py3-none-any.whl → 0.0.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +4 -4
- ddeutil/workflow/api/routes/job.py +3 -2
- ddeutil/workflow/audits.py +8 -6
- ddeutil/workflow/conf.py +9 -20
- ddeutil/workflow/errors.py +34 -19
- ddeutil/workflow/job.py +303 -159
- ddeutil/workflow/plugins/providers/az.py +2 -2
- ddeutil/workflow/result.py +46 -55
- ddeutil/workflow/stages.py +540 -458
- ddeutil/workflow/traces.py +259 -261
- ddeutil/workflow/workflow.py +304 -361
- {ddeutil_workflow-0.0.84.dist-info → ddeutil_workflow-0.0.86.dist-info}/METADATA +13 -16
- {ddeutil_workflow-0.0.84.dist-info → ddeutil_workflow-0.0.86.dist-info}/RECORD +18 -18
- {ddeutil_workflow-0.0.84.dist-info → ddeutil_workflow-0.0.86.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.84.dist-info → ddeutil_workflow-0.0.86.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.84.dist-info → ddeutil_workflow-0.0.86.dist-info}/licenses/LICENSE +0 -0
- {ddeutil_workflow-0.0.84.dist-info → ddeutil_workflow-0.0.86.dist-info}/top_level.txt +0 -0
ddeutil/workflow/workflow.py
CHANGED
@@ -19,11 +19,12 @@ Classes:
|
|
19
19
|
Constants:
|
20
20
|
NORMAL: Normal release execution
|
21
21
|
RERUN: Re-execution of failed workflows
|
22
|
-
|
22
|
+
DRYRUN: Dryrun execution for testing workflow loop.
|
23
23
|
FORCE: Force execution regardless of conditions
|
24
24
|
"""
|
25
25
|
import copy
|
26
26
|
import time
|
27
|
+
import traceback
|
27
28
|
from concurrent.futures import (
|
28
29
|
Future,
|
29
30
|
ThreadPoolExecutor,
|
@@ -43,9 +44,15 @@ from typing_extensions import Self
|
|
43
44
|
|
44
45
|
from . import DRYRUN
|
45
46
|
from .__types import DictData
|
46
|
-
from .audits import NORMAL, RERUN, Audit, ReleaseType, get_audit
|
47
|
+
from .audits import NORMAL, RERUN, Audit, AuditData, ReleaseType, get_audit
|
47
48
|
from .conf import YamlParser, dynamic
|
48
|
-
from .errors import
|
49
|
+
from .errors import (
|
50
|
+
WorkflowCancelError,
|
51
|
+
WorkflowError,
|
52
|
+
WorkflowSkipError,
|
53
|
+
WorkflowTimeoutError,
|
54
|
+
to_dict,
|
55
|
+
)
|
49
56
|
from .event import Event
|
50
57
|
from .job import Job
|
51
58
|
from .params import Param
|
@@ -65,7 +72,6 @@ from .reusables import has_template, param2template
|
|
65
72
|
from .traces import Trace, get_trace
|
66
73
|
from .utils import (
|
67
74
|
extract_id,
|
68
|
-
gen_id,
|
69
75
|
get_dt_now,
|
70
76
|
pop_sys_extras,
|
71
77
|
)
|
@@ -99,7 +105,10 @@ class Workflow(BaseModel):
|
|
99
105
|
description="An extra parameters that want to override config values.",
|
100
106
|
)
|
101
107
|
name: str = Field(description="A workflow name.")
|
102
|
-
type: Literal["Workflow"] = Field(
|
108
|
+
type: Literal["Workflow"] = Field(
|
109
|
+
default="Workflow",
|
110
|
+
description="A type of this config data that will use by discriminator",
|
111
|
+
)
|
103
112
|
desc: Optional[str] = Field(
|
104
113
|
default=None,
|
105
114
|
description=(
|
@@ -164,10 +173,10 @@ class Workflow(BaseModel):
|
|
164
173
|
FileNotFoundError: If workflow configuration file not found
|
165
174
|
|
166
175
|
Example:
|
167
|
-
|
176
|
+
Case: Load from default config path
|
168
177
|
>>> workflow = Workflow.from_conf('data-pipeline')
|
169
178
|
|
170
|
-
|
179
|
+
Case: Load with custom path and extras
|
171
180
|
>>> workflow = Workflow.from_conf(
|
172
181
|
... 'data-pipeline',
|
173
182
|
... path=Path('./custom-configs'),
|
@@ -371,7 +380,6 @@ class Workflow(BaseModel):
|
|
371
380
|
if k in self.params
|
372
381
|
}
|
373
382
|
),
|
374
|
-
"jobs": {},
|
375
383
|
}
|
376
384
|
|
377
385
|
def release(
|
@@ -436,7 +444,10 @@ class Workflow(BaseModel):
|
|
436
444
|
"extras": self.extras,
|
437
445
|
}
|
438
446
|
trace: Trace = get_trace(
|
439
|
-
run_id,
|
447
|
+
run_id,
|
448
|
+
parent_run_id=parent_run_id,
|
449
|
+
extras=self.extras,
|
450
|
+
pre_process=True,
|
440
451
|
)
|
441
452
|
release: datetime = self.on.validate_dt(dt=release)
|
442
453
|
trace.info(f"[RELEASE]: Start {name!r} : {release:%Y-%m-%d %H:%M:%S}")
|
@@ -454,13 +465,21 @@ class Workflow(BaseModel):
|
|
454
465
|
)
|
455
466
|
|
456
467
|
if release_type == RERUN:
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
468
|
+
try:
|
469
|
+
previous: AuditData = audit.find_audit_with_release(
|
470
|
+
name, release=release
|
471
|
+
)
|
472
|
+
values: DictData = previous.context
|
473
|
+
except FileNotFoundError:
|
474
|
+
trace.warning(
|
475
|
+
(
|
476
|
+
f"Does not find previous audit log with release: "
|
477
|
+
f"{release:%Y%m%d%H%M%S}"
|
478
|
+
),
|
479
|
+
module="release",
|
480
|
+
)
|
463
481
|
elif release_type == DRYRUN:
|
482
|
+
# IMPORTANT: Set system extra parameter for allow dryrun mode,
|
464
483
|
self.extras.update({"__sys_release_dryrun_mode": True})
|
465
484
|
trace.debug("[RELEASE]: Mark dryrun mode to the extra params.")
|
466
485
|
elif release_type == NORMAL and audit.is_pointed(data=audit_data):
|
@@ -489,7 +508,7 @@ class Workflow(BaseModel):
|
|
489
508
|
"context": context,
|
490
509
|
"runs_metadata": (
|
491
510
|
(runs_metadata or {})
|
492
|
-
|
|
511
|
+
| context.get("info", {})
|
493
512
|
| {
|
494
513
|
"timeout": timeout,
|
495
514
|
"original_name": self.name,
|
@@ -520,7 +539,7 @@ class Workflow(BaseModel):
|
|
520
539
|
),
|
521
540
|
)
|
522
541
|
|
523
|
-
def
|
542
|
+
def process_job(
|
524
543
|
self,
|
525
544
|
job: Job,
|
526
545
|
run_id: str,
|
@@ -529,7 +548,7 @@ class Workflow(BaseModel):
|
|
529
548
|
parent_run_id: Optional[str] = None,
|
530
549
|
event: Optional[ThreadEvent] = None,
|
531
550
|
) -> tuple[Status, DictData]:
|
532
|
-
"""Job
|
551
|
+
"""Job process job with passing dynamic parameters from the main workflow
|
533
552
|
execution to the target job object via job's ID.
|
534
553
|
|
535
554
|
This execution is the minimum level of execution of this workflow
|
@@ -599,132 +618,48 @@ class Workflow(BaseModel):
|
|
599
618
|
|
600
619
|
return result.status, catch(context, status=result.status)
|
601
620
|
|
602
|
-
def
|
621
|
+
def process(
|
603
622
|
self,
|
604
|
-
|
623
|
+
job_queue: Queue[str],
|
624
|
+
run_id: str,
|
625
|
+
context: DictData,
|
605
626
|
*,
|
606
|
-
|
627
|
+
parent_run_id: Optional[str] = None,
|
607
628
|
event: Optional[ThreadEvent] = None,
|
608
629
|
timeout: float = 3600,
|
609
630
|
max_job_parallel: int = 2,
|
631
|
+
total_job: Optional[int] = None,
|
610
632
|
) -> Result:
|
611
|
-
"""
|
612
|
-
included in this workflow model with `jobs` field.
|
613
|
-
|
614
|
-
The result of execution process for each job and stages on this
|
615
|
-
workflow will keep in dict which able to catch out with all jobs and
|
616
|
-
stages by dot annotation.
|
617
|
-
|
618
|
-
For example with non-strategy job, when I want to use the output
|
619
|
-
from previous stage, I can access it with syntax:
|
633
|
+
"""Job process method.
|
620
634
|
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
the job execution. It will warp that error and keep it in the key `errors`
|
631
|
-
at the result context.
|
632
|
-
|
633
|
-
|
634
|
-
Execution --> Ok --> Result
|
635
|
-
|-status: CANCEL
|
636
|
-
╰-context:
|
637
|
-
╰-errors:
|
638
|
-
|-name: ...
|
639
|
-
╰-message: ...
|
640
|
-
|
641
|
-
--> Ok --> Result
|
642
|
-
|-status: FAILED
|
643
|
-
╰-context:
|
644
|
-
╰-errors:
|
645
|
-
|-name: ...
|
646
|
-
╰-message: ...
|
647
|
-
|
648
|
-
--> Ok --> Result
|
649
|
-
╰-status: SKIP
|
650
|
-
|
651
|
-
--> Ok --> Result
|
652
|
-
╰-status: SUCCESS
|
653
|
-
|
654
|
-
:param params: A parameter data that will parameterize before execution.
|
655
|
-
:param run_id: (Optional[str]) A workflow running ID.
|
656
|
-
:param event: (Event) An Event manager instance that use to cancel this
|
657
|
-
execution if it forces stopped by parent execution.
|
658
|
-
:param timeout: (float) A workflow execution time out in second unit
|
659
|
-
that use for limit time of execution and waiting job dependency.
|
660
|
-
This value does not force stop the task that still running more than
|
661
|
-
this limit time. (Default: 60 * 60 seconds)
|
662
|
-
:param max_job_parallel: (int) The maximum workers that use for job
|
663
|
-
execution in `ThreadPoolExecutor` object. (Default: 2 workers)
|
664
|
-
|
665
|
-
:rtype: Result
|
635
|
+
Args:
|
636
|
+
job_queue:
|
637
|
+
run_id (str):
|
638
|
+
context (DictData):
|
639
|
+
parent_run_id (str, default None):
|
640
|
+
event (Event, default None):
|
641
|
+
timeout:
|
642
|
+
max_job_parallel:
|
643
|
+
total_job:
|
666
644
|
"""
|
667
645
|
ts: float = time.monotonic()
|
668
|
-
parent_run_id, run_id = extract_id(
|
669
|
-
self.name, run_id=run_id, extras=self.extras
|
670
|
-
)
|
671
646
|
trace: Trace = get_trace(
|
672
647
|
run_id, parent_run_id=parent_run_id, extras=self.extras
|
673
648
|
)
|
674
|
-
context: DictData = self.parameterize(params)
|
675
|
-
event: ThreadEvent = event or ThreadEvent()
|
676
|
-
max_job_parallel: int = dynamic(
|
677
|
-
"max_job_parallel", f=max_job_parallel, extras=self.extras
|
678
|
-
)
|
679
|
-
trace.info(
|
680
|
-
f"[WORKFLOW]: Execute: {self.name!r} ("
|
681
|
-
f"{'parallel' if max_job_parallel > 1 else 'sequential'} jobs)"
|
682
|
-
)
|
683
|
-
if not self.jobs:
|
684
|
-
trace.warning(f"[WORKFLOW]: {self.name!r} does not set jobs")
|
685
|
-
return Result(
|
686
|
-
run_id=run_id,
|
687
|
-
parent_run_id=parent_run_id,
|
688
|
-
status=SUCCESS,
|
689
|
-
context=catch(context, status=SUCCESS),
|
690
|
-
info={"execution_time": time.monotonic() - ts},
|
691
|
-
extras=self.extras,
|
692
|
-
)
|
693
|
-
|
694
|
-
job_queue: Queue = Queue()
|
695
|
-
for job_id in self.jobs:
|
696
|
-
job_queue.put(job_id)
|
697
|
-
|
698
649
|
not_timeout_flag: bool = True
|
699
|
-
total_job: int = len(self.jobs)
|
650
|
+
total_job: int = total_job or len(self.jobs)
|
700
651
|
statuses: list[Status] = [WAIT] * total_job
|
701
652
|
skip_count: int = 0
|
702
653
|
sequence_statuses: list[Status] = []
|
703
|
-
timeout: float = dynamic(
|
704
|
-
"max_job_exec_timeout", f=timeout, extras=self.extras
|
705
|
-
)
|
706
|
-
catch(context, status=WAIT)
|
707
654
|
if event and event.is_set():
|
708
|
-
|
655
|
+
raise WorkflowCancelError(
|
709
656
|
"Execution was canceled from the event was set "
|
710
657
|
"before workflow execution."
|
711
658
|
)
|
712
|
-
trace.error(f"[WORKFLOW]: {err_msg}")
|
713
|
-
return Result(
|
714
|
-
run_id=run_id,
|
715
|
-
parent_run_id=parent_run_id,
|
716
|
-
status=CANCEL,
|
717
|
-
context=catch(
|
718
|
-
context,
|
719
|
-
status=CANCEL,
|
720
|
-
updated={"errors": WorkflowCancelError(err_msg).to_dict()},
|
721
|
-
),
|
722
|
-
info={"execution_time": time.monotonic() - ts},
|
723
|
-
extras=self.extras,
|
724
|
-
)
|
725
659
|
|
726
660
|
# NOTE: Force update internal extras for handler circle execution.
|
727
661
|
self.extras.update({"__sys_exec_break_circle": self.name})
|
662
|
+
|
728
663
|
with ThreadPoolExecutor(max_job_parallel, "wf") as executor:
|
729
664
|
futures: list[Future] = []
|
730
665
|
|
@@ -754,23 +689,9 @@ class Workflow(BaseModel):
|
|
754
689
|
|
755
690
|
if check == FAILED: # pragma: no cov
|
756
691
|
pop_sys_extras(self.extras)
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
status=FAILED,
|
761
|
-
context=catch(
|
762
|
-
context,
|
763
|
-
status=FAILED,
|
764
|
-
updated={
|
765
|
-
"status": FAILED,
|
766
|
-
"errors": WorkflowError(
|
767
|
-
f"Validate job trigger rule was failed "
|
768
|
-
f"with {job.trigger_rule.value!r}."
|
769
|
-
).to_dict(),
|
770
|
-
},
|
771
|
-
),
|
772
|
-
info={"execution_time": time.monotonic() - ts},
|
773
|
-
extras=self.extras,
|
692
|
+
raise WorkflowError(
|
693
|
+
f"Validate job trigger rule was failed with "
|
694
|
+
f"{job.trigger_rule.value!r}."
|
774
695
|
)
|
775
696
|
elif check == SKIP: # pragma: no cov
|
776
697
|
trace.info(
|
@@ -785,7 +706,7 @@ class Workflow(BaseModel):
|
|
785
706
|
if max_job_parallel > 1:
|
786
707
|
futures.append(
|
787
708
|
executor.submit(
|
788
|
-
self.
|
709
|
+
self.process_job,
|
789
710
|
job=job,
|
790
711
|
run_id=run_id,
|
791
712
|
context=context,
|
@@ -799,7 +720,7 @@ class Workflow(BaseModel):
|
|
799
720
|
if len(futures) < 1:
|
800
721
|
futures.append(
|
801
722
|
executor.submit(
|
802
|
-
self.
|
723
|
+
self.process_job,
|
803
724
|
job=job,
|
804
725
|
run_id=run_id,
|
805
726
|
context=context,
|
@@ -850,13 +771,8 @@ class Workflow(BaseModel):
|
|
850
771
|
|
851
772
|
pop_sys_extras(self.extras)
|
852
773
|
st: Status = validate_statuses(statuses)
|
853
|
-
return Result(
|
854
|
-
|
855
|
-
parent_run_id=parent_run_id,
|
856
|
-
status=st,
|
857
|
-
context=catch(context, status=st),
|
858
|
-
info={"execution_time": time.monotonic() - ts},
|
859
|
-
extras=self.extras,
|
774
|
+
return Result.from_trace(trace).catch(
|
775
|
+
status=st, context=catch(context, status=st)
|
860
776
|
)
|
861
777
|
|
862
778
|
event.set()
|
@@ -864,85 +780,86 @@ class Workflow(BaseModel):
|
|
864
780
|
future.cancel()
|
865
781
|
|
866
782
|
trace.error(
|
867
|
-
|
868
|
-
|
783
|
+
(
|
784
|
+
f"{self.name!r} was timeout because it use exec time more "
|
785
|
+
f"than {timeout} seconds."
|
786
|
+
),
|
787
|
+
module="workflow",
|
869
788
|
)
|
870
789
|
|
871
790
|
time.sleep(0.0025)
|
872
791
|
|
873
792
|
pop_sys_extras(self.extras)
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
status=FAILED,
|
878
|
-
context=catch(
|
879
|
-
context,
|
880
|
-
status=FAILED,
|
881
|
-
updated={
|
882
|
-
"errors": WorkflowTimeoutError(
|
883
|
-
f"{self.name!r} was timeout because it use exec time "
|
884
|
-
f"more than {timeout} seconds."
|
885
|
-
).to_dict(),
|
886
|
-
},
|
887
|
-
),
|
888
|
-
info={"execution_time": time.monotonic() - ts},
|
889
|
-
extras=self.extras,
|
793
|
+
raise WorkflowTimeoutError(
|
794
|
+
f"{self.name!r} was timeout because it use exec time more than "
|
795
|
+
f"{timeout} seconds."
|
890
796
|
)
|
891
797
|
|
892
|
-
def
|
798
|
+
def _execute(
|
893
799
|
self,
|
800
|
+
params: DictData,
|
801
|
+
trace: Trace,
|
894
802
|
context: DictData,
|
895
803
|
*,
|
896
|
-
run_id: Optional[str] = None,
|
897
804
|
event: Optional[ThreadEvent] = None,
|
898
805
|
timeout: float = 3600,
|
899
806
|
max_job_parallel: int = 2,
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
807
|
+
total_job: Optional[int] = None,
|
808
|
+
) -> Result:
|
809
|
+
"""Wrapped Execute method."""
|
810
|
+
context.update(
|
811
|
+
{"jobs": {}, "info": {"exec_start": get_dt_now()}}
|
812
|
+
| self.parameterize(params)
|
813
|
+
)
|
814
|
+
trace.info(
|
815
|
+
f"[WORKFLOW]: Execute: {self.name!r} ("
|
816
|
+
f"{'parallel' if max_job_parallel > 1 else 'sequential'} jobs)"
|
817
|
+
)
|
818
|
+
if not self.jobs:
|
819
|
+
trace.warning(f"[WORKFLOW]: {self.name!r} does not set jobs")
|
820
|
+
return Result.from_trace(trace).catch(
|
821
|
+
status=SUCCESS, context=catch(context, status=SUCCESS)
|
822
|
+
)
|
906
823
|
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
event: (Event) An Event manager instance that use to cancel this
|
911
|
-
execution if it forces stopped by parent execution.
|
912
|
-
timeout: (float) A workflow execution time out in second unit
|
913
|
-
that use for limit time of execution and waiting job dependency.
|
914
|
-
This value does not force stop the task that still running more
|
915
|
-
than this limit time. (Default: 60 * 60 seconds)
|
916
|
-
max_job_parallel: (int) The maximum workers that use for job
|
917
|
-
execution in `ThreadPoolExecutor` object. (Default: 2 workers)
|
824
|
+
job_queue: Queue[str] = Queue()
|
825
|
+
for job_id in self.jobs:
|
826
|
+
job_queue.put(job_id)
|
918
827
|
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
828
|
+
catch(context, status=WAIT)
|
829
|
+
return self.process(
|
830
|
+
job_queue,
|
831
|
+
run_id=trace.run_id,
|
832
|
+
context=context,
|
833
|
+
parent_run_id=trace.parent_run_id,
|
834
|
+
event=event,
|
835
|
+
timeout=timeout,
|
836
|
+
max_job_parallel=max_job_parallel,
|
837
|
+
total_job=total_job,
|
928
838
|
)
|
929
|
-
|
839
|
+
|
840
|
+
def _rerun(
|
841
|
+
self,
|
842
|
+
params: DictData,
|
843
|
+
trace: Trace,
|
844
|
+
context: DictData,
|
845
|
+
*,
|
846
|
+
event: Optional[ThreadEvent] = None,
|
847
|
+
timeout: float = 3600,
|
848
|
+
max_job_parallel: int = 2,
|
849
|
+
) -> Result:
|
850
|
+
"""Wrapped Rerun method."""
|
851
|
+
if params["status"] == SUCCESS:
|
930
852
|
trace.info(
|
931
853
|
"[WORKFLOW]: Does not rerun because it already executed with "
|
932
854
|
"success status."
|
933
855
|
)
|
934
856
|
return Result.from_trace(trace).catch(
|
935
857
|
status=SUCCESS,
|
936
|
-
context=catch(context=
|
858
|
+
context=catch(context=params, status=SUCCESS),
|
937
859
|
)
|
938
860
|
|
939
|
-
err: dict[str, str] =
|
861
|
+
err: dict[str, str] = params.get("errors", {})
|
940
862
|
trace.info(f"[WORKFLOW]: Previous error: {err}")
|
941
|
-
|
942
|
-
event: ThreadEvent = event or ThreadEvent()
|
943
|
-
max_job_parallel: int = dynamic(
|
944
|
-
"max_job_parallel", f=max_job_parallel, extras=self.extras
|
945
|
-
)
|
946
863
|
trace.info(
|
947
864
|
f"[WORKFLOW]: Execute: {self.name!r} ("
|
948
865
|
f"{'parallel' if max_job_parallel > 1 else 'sequential'} jobs)"
|
@@ -950,19 +867,24 @@ class Workflow(BaseModel):
|
|
950
867
|
if not self.jobs:
|
951
868
|
trace.warning(f"[WORKFLOW]: {self.name!r} does not set jobs")
|
952
869
|
return Result.from_trace(trace).catch(
|
953
|
-
status=SUCCESS,
|
954
|
-
context=catch(context=context, status=SUCCESS),
|
870
|
+
status=SUCCESS, context=catch(context=params, status=SUCCESS)
|
955
871
|
)
|
956
872
|
|
957
873
|
# NOTE: Prepare the new context variable for rerun process.
|
958
|
-
jobs: DictData =
|
959
|
-
context
|
960
|
-
|
961
|
-
|
962
|
-
|
874
|
+
jobs: DictData = params.get("jobs")
|
875
|
+
context.update(
|
876
|
+
{
|
877
|
+
"params": params["params"].copy(),
|
878
|
+
"jobs": {
|
879
|
+
j: jobs[j]
|
880
|
+
for j in jobs
|
881
|
+
if jobs[j].get("status", FAILED) == SUCCESS
|
882
|
+
},
|
883
|
+
}
|
884
|
+
)
|
963
885
|
|
964
886
|
total_job: int = 0
|
965
|
-
job_queue: Queue = Queue()
|
887
|
+
job_queue: Queue[str] = Queue()
|
966
888
|
for job_id in self.jobs:
|
967
889
|
|
968
890
|
if job_id in context["jobs"]:
|
@@ -972,178 +894,199 @@ class Workflow(BaseModel):
|
|
972
894
|
total_job += 1
|
973
895
|
|
974
896
|
if total_job == 0:
|
975
|
-
|
976
|
-
"
|
897
|
+
raise WorkflowSkipError(
|
898
|
+
"It does not have job to rerun. it will change "
|
977
899
|
"status to skip."
|
978
900
|
)
|
979
|
-
return Result.from_trace(trace).catch(
|
980
|
-
status=SKIP,
|
981
|
-
context=catch(context=context, status=SKIP),
|
982
|
-
)
|
983
|
-
|
984
|
-
not_timeout_flag: bool = True
|
985
|
-
statuses: list[Status] = [WAIT] * total_job
|
986
|
-
skip_count: int = 0
|
987
|
-
sequence_statuses: list[Status] = []
|
988
|
-
timeout: float = dynamic(
|
989
|
-
"max_job_exec_timeout", f=timeout, extras=self.extras
|
990
|
-
)
|
991
901
|
|
992
902
|
catch(context, status=WAIT)
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
).to_dict(),
|
1004
|
-
},
|
1005
|
-
),
|
1006
|
-
)
|
903
|
+
return self.process(
|
904
|
+
job_queue,
|
905
|
+
run_id=trace.run_id,
|
906
|
+
context=context,
|
907
|
+
parent_run_id=trace.parent_run_id,
|
908
|
+
event=event,
|
909
|
+
timeout=timeout,
|
910
|
+
max_job_parallel=max_job_parallel,
|
911
|
+
total_job=total_job,
|
912
|
+
)
|
1007
913
|
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
914
|
+
def execute(
|
915
|
+
self,
|
916
|
+
params: DictData,
|
917
|
+
*,
|
918
|
+
run_id: Optional[str] = None,
|
919
|
+
event: Optional[ThreadEvent] = None,
|
920
|
+
timeout: float = 3600,
|
921
|
+
max_job_parallel: int = 2,
|
922
|
+
rerun_mode: bool = False,
|
923
|
+
) -> Result:
|
924
|
+
"""Execute workflow with passing a dynamic parameters to all jobs that
|
925
|
+
included in this workflow model with `jobs` field.
|
1012
926
|
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
job_id: str = job_queue.get()
|
1017
|
-
job: Job = self.job(name=job_id)
|
1018
|
-
if (check := job.check_needs(context["jobs"])) == WAIT:
|
1019
|
-
job_queue.task_done()
|
1020
|
-
job_queue.put(job_id)
|
1021
|
-
consecutive_waits += 1
|
927
|
+
The result of execution process for each job and stages on this
|
928
|
+
workflow will keep in dict which able to catch out with all jobs and
|
929
|
+
stages by dot annotation.
|
1022
930
|
|
1023
|
-
|
1024
|
-
|
1025
|
-
time.sleep(backoff_sleep)
|
1026
|
-
continue
|
931
|
+
For example with non-strategy job, when I want to use the output
|
932
|
+
from previous stage, I can access it with syntax:
|
1027
933
|
|
1028
|
-
|
1029
|
-
|
1030
|
-
backoff_sleep = 0.01
|
934
|
+
... ${job-id}.stages.${stage-id}.outputs.${key}
|
935
|
+
... ${job-id}.stages.${stage-id}.errors.${key}
|
1031
936
|
|
1032
|
-
|
1033
|
-
return Result.from_trace(trace).catch(
|
1034
|
-
status=FAILED,
|
1035
|
-
context=catch(
|
1036
|
-
context,
|
1037
|
-
status=FAILED,
|
1038
|
-
updated={
|
1039
|
-
"status": FAILED,
|
1040
|
-
"errors": WorkflowError(
|
1041
|
-
f"Validate job trigger rule was failed "
|
1042
|
-
f"with {job.trigger_rule.value!r}."
|
1043
|
-
).to_dict(),
|
1044
|
-
},
|
1045
|
-
),
|
1046
|
-
)
|
1047
|
-
elif check == SKIP: # pragma: no cov
|
1048
|
-
trace.info(
|
1049
|
-
f"[JOB]: Skip job: {job_id!r} from trigger rule."
|
1050
|
-
)
|
1051
|
-
job.set_outputs(output={"status": SKIP}, to=context)
|
1052
|
-
job_queue.task_done()
|
1053
|
-
skip_count += 1
|
1054
|
-
continue
|
937
|
+
But example for strategy job:
|
1055
938
|
|
1056
|
-
|
1057
|
-
|
1058
|
-
executor.submit(
|
1059
|
-
self.execute_job,
|
1060
|
-
job=job,
|
1061
|
-
run_id=run_id,
|
1062
|
-
context=context,
|
1063
|
-
parent_run_id=parent_run_id,
|
1064
|
-
event=event,
|
1065
|
-
),
|
1066
|
-
)
|
1067
|
-
job_queue.task_done()
|
1068
|
-
continue
|
939
|
+
... ${job-id}.strategies.${strategy-id}.stages.${stage-id}.outputs.${key}
|
940
|
+
... ${job-id}.strategies.${strategy-id}.stages.${stage-id}.errors.${key}
|
1069
941
|
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
self.execute_job,
|
1074
|
-
job=job,
|
1075
|
-
run_id=run_id,
|
1076
|
-
context=context,
|
1077
|
-
parent_run_id=parent_run_id,
|
1078
|
-
event=event,
|
1079
|
-
)
|
1080
|
-
)
|
1081
|
-
elif (future := futures.pop(0)).done():
|
1082
|
-
if e := future.exception():
|
1083
|
-
sequence_statuses.append(get_status_from_error(e))
|
1084
|
-
else:
|
1085
|
-
st, _ = future.result()
|
1086
|
-
sequence_statuses.append(st)
|
1087
|
-
job_queue.put(job_id)
|
1088
|
-
elif future.cancelled():
|
1089
|
-
sequence_statuses.append(CANCEL)
|
1090
|
-
job_queue.put(job_id)
|
1091
|
-
elif future.running() or "state=pending" in str(future):
|
1092
|
-
futures.insert(0, future)
|
1093
|
-
job_queue.put(job_id)
|
1094
|
-
else: # pragma: no cov
|
1095
|
-
job_queue.put(job_id)
|
1096
|
-
futures.insert(0, future)
|
1097
|
-
trace.warning(
|
1098
|
-
f"[WORKFLOW]: ... Execution non-threading not "
|
1099
|
-
f"handle: {future}."
|
1100
|
-
)
|
942
|
+
This method already handle all exception class that can raise from
|
943
|
+
the job execution. It will warp that error and keep it in the key `errors`
|
944
|
+
at the result context.
|
1101
945
|
|
1102
|
-
job_queue.task_done()
|
1103
946
|
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
statuses[total] = get_status_from_error(e)
|
947
|
+
Execution --> Ok --> Result
|
948
|
+
|-status: CANCEL
|
949
|
+
╰-context:
|
950
|
+
╰-errors:
|
951
|
+
|-name: ...
|
952
|
+
╰-message: ...
|
1111
953
|
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
954
|
+
--> Ok --> Result
|
955
|
+
|-status: FAILED
|
956
|
+
╰-context:
|
957
|
+
╰-errors:
|
958
|
+
|-name: ...
|
959
|
+
╰-message: ...
|
1115
960
|
|
1116
|
-
|
1117
|
-
|
1118
|
-
statuses[total + 1 + skip_count + i] = s
|
961
|
+
--> Ok --> Result
|
962
|
+
╰-status: SKIP
|
1119
963
|
|
1120
|
-
|
1121
|
-
|
1122
|
-
status=st,
|
1123
|
-
context=catch(context, status=st),
|
1124
|
-
)
|
964
|
+
--> Ok --> Result
|
965
|
+
╰-status: SUCCESS
|
1125
966
|
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
967
|
+
Args:
|
968
|
+
params (DictData): A parameter data that will parameterize before
|
969
|
+
execution.
|
970
|
+
run_id (str, default None): A workflow running ID.
|
971
|
+
event (Event, default None): An Event manager instance that use to
|
972
|
+
cancel this execution if it forces stopped by parent execution.
|
973
|
+
timeout (float, default 3600): A workflow execution time out in
|
974
|
+
second unit that use for limit time of execution and waiting job
|
975
|
+
dependency. This value does not force stop the task that still
|
976
|
+
running more than this limit time. (Default: 60 * 60 seconds)
|
977
|
+
max_job_parallel (int, default 2) The maximum workers that use for
|
978
|
+
job execution in `ThreadPoolExecutor` object.
|
979
|
+
rerun_mode (bool, default False): A rerun mode flag that will use
|
980
|
+
`_rerun` method if it set be True.
|
1129
981
|
|
982
|
+
Returns
|
983
|
+
Result: Return Result object that create from execution context with
|
984
|
+
return mode.
|
985
|
+
"""
|
986
|
+
ts: float = time.monotonic()
|
987
|
+
parent_run_id, run_id = extract_id(
|
988
|
+
self.name, run_id=run_id, extras=self.extras
|
989
|
+
)
|
990
|
+
trace: Trace = get_trace(
|
991
|
+
run_id,
|
992
|
+
parent_run_id=parent_run_id,
|
993
|
+
extras=self.extras,
|
994
|
+
pre_process=True,
|
995
|
+
)
|
996
|
+
context: DictData = {
|
997
|
+
"jobs": {},
|
998
|
+
"status": WAIT,
|
999
|
+
"info": {"exec_start": get_dt_now()},
|
1000
|
+
}
|
1001
|
+
event: ThreadEvent = event or ThreadEvent()
|
1002
|
+
max_job_parallel: int = dynamic(
|
1003
|
+
"max_job_parallel", f=max_job_parallel, extras=self.extras
|
1004
|
+
)
|
1005
|
+
try:
|
1006
|
+
if rerun_mode:
|
1007
|
+
return self._rerun(
|
1008
|
+
params,
|
1009
|
+
trace,
|
1010
|
+
context,
|
1011
|
+
event=event,
|
1012
|
+
timeout=timeout,
|
1013
|
+
max_job_parallel=max_job_parallel,
|
1014
|
+
)
|
1015
|
+
return self._execute(
|
1016
|
+
params,
|
1017
|
+
trace,
|
1018
|
+
context,
|
1019
|
+
event=event,
|
1020
|
+
timeout=timeout,
|
1021
|
+
max_job_parallel=max_job_parallel,
|
1022
|
+
)
|
1023
|
+
except WorkflowError as e:
|
1024
|
+
updated = {"errors": e.to_dict()}
|
1025
|
+
if isinstance(e, WorkflowSkipError):
|
1026
|
+
trace.error(f"⏭️ Skip: {e}", module="workflow")
|
1027
|
+
updated = None
|
1028
|
+
else:
|
1029
|
+
trace.error(f"📢 Workflow Failed:||{e}", module="workflow")
|
1030
|
+
|
1031
|
+
st: Status = get_status_from_error(e)
|
1032
|
+
return Result.from_trace(trace).catch(
|
1033
|
+
status=st, context=catch(context, status=st, updated=updated)
|
1034
|
+
)
|
1035
|
+
except Exception as e:
|
1130
1036
|
trace.error(
|
1131
|
-
f"
|
1132
|
-
|
1037
|
+
f"💥 Error Failed:||🚨 {traceback.format_exc()}||",
|
1038
|
+
module="workflow",
|
1039
|
+
)
|
1040
|
+
return Result.from_trace(trace).catch(
|
1041
|
+
status=FAILED,
|
1042
|
+
context=catch(
|
1043
|
+
context, status=FAILED, updated={"errors": to_dict(e)}
|
1044
|
+
),
|
1045
|
+
)
|
1046
|
+
finally:
|
1047
|
+
context["info"].update(
|
1048
|
+
{
|
1049
|
+
"exec_end": get_dt_now(),
|
1050
|
+
"exec_latency": round(time.monotonic() - ts, 6),
|
1051
|
+
}
|
1133
1052
|
)
|
1134
1053
|
|
1135
|
-
|
1054
|
+
def rerun(
|
1055
|
+
self,
|
1056
|
+
context: DictData,
|
1057
|
+
*,
|
1058
|
+
run_id: Optional[str] = None,
|
1059
|
+
event: Optional[ThreadEvent] = None,
|
1060
|
+
timeout: float = 3600,
|
1061
|
+
max_job_parallel: int = 2,
|
1062
|
+
) -> Result: # pragma: no cov
|
1063
|
+
"""Re-Execute workflow with passing the error context data.
|
1136
1064
|
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1065
|
+
Warnings:
|
1066
|
+
This rerun method allow to rerun job execution level only. That mean
|
1067
|
+
it does not support rerun only stage.
|
1068
|
+
|
1069
|
+
Args:
|
1070
|
+
context (DictData): A context result that get the failed status.
|
1071
|
+
run_id (str, default None): A workflow running ID.
|
1072
|
+
event (Event, default None): An Event manager instance that use to
|
1073
|
+
cancel this execution if it forces stopped by parent execution.
|
1074
|
+
timeout (float, default 3600): A workflow execution time out in
|
1075
|
+
second unit that use for limit time of execution and waiting job
|
1076
|
+
dependency. This value does not force stop the task that still
|
1077
|
+
running more than this limit time. (Default: 60 * 60 seconds)
|
1078
|
+
max_job_parallel (int, default 2) The maximum workers that use for
|
1079
|
+
job execution in `ThreadPoolExecutor` object.
|
1080
|
+
|
1081
|
+
Returns
|
1082
|
+
Result: Return Result object that create from execution context with
|
1083
|
+
return mode.
|
1084
|
+
"""
|
1085
|
+
return self.execute(
|
1086
|
+
context,
|
1087
|
+
run_id=run_id,
|
1088
|
+
event=event,
|
1089
|
+
timeout=timeout,
|
1090
|
+
max_job_parallel=max_job_parallel,
|
1091
|
+
rerun_mode=True,
|
1149
1092
|
)
|