ddeutil-workflow 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +4 -1
- ddeutil/workflow/__types.py +24 -8
- ddeutil/workflow/api.py +2 -2
- ddeutil/workflow/conf.py +41 -0
- ddeutil/workflow/cron.py +19 -12
- ddeutil/workflow/job.py +189 -153
- ddeutil/workflow/log.py +28 -14
- ddeutil/workflow/scheduler.py +233 -112
- ddeutil/workflow/stage.py +66 -33
- ddeutil/workflow/utils.py +106 -40
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/METADATA +6 -4
- ddeutil_workflow-0.0.14.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.13.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -11,34 +11,39 @@ import json
|
|
11
11
|
import logging
|
12
12
|
import os
|
13
13
|
import time
|
14
|
-
from collections.abc import Iterator
|
15
14
|
from concurrent.futures import (
|
16
15
|
Future,
|
17
16
|
ProcessPoolExecutor,
|
18
17
|
ThreadPoolExecutor,
|
19
18
|
as_completed,
|
20
19
|
)
|
21
|
-
from dataclasses import
|
20
|
+
from dataclasses import field
|
22
21
|
from datetime import datetime, timedelta
|
23
22
|
from functools import wraps
|
24
23
|
from heapq import heappush
|
25
24
|
from queue import Queue
|
26
25
|
from textwrap import dedent
|
27
26
|
from threading import Thread
|
28
|
-
from typing import Optional
|
27
|
+
from typing import Callable, Optional
|
29
28
|
from zoneinfo import ZoneInfo
|
30
29
|
|
31
|
-
from dotenv import load_dotenv
|
32
30
|
from pydantic import BaseModel, Field
|
31
|
+
from pydantic.dataclasses import dataclass
|
33
32
|
from pydantic.functional_validators import field_validator, model_validator
|
34
33
|
from typing_extensions import Self
|
35
34
|
|
35
|
+
try:
|
36
|
+
from typing import ParamSpec
|
37
|
+
except ImportError:
|
38
|
+
from typing_extensions import ParamSpec
|
39
|
+
|
36
40
|
try:
|
37
41
|
from schedule import CancelJob
|
38
42
|
except ImportError:
|
39
43
|
CancelJob = None
|
40
44
|
|
41
45
|
from .__types import DictData, TupleStr
|
46
|
+
from .conf import config
|
42
47
|
from .cron import CronRunner
|
43
48
|
from .exceptions import JobException, WorkflowException
|
44
49
|
from .job import Job
|
@@ -54,9 +59,10 @@ from .utils import (
|
|
54
59
|
get_diff_sec,
|
55
60
|
has_template,
|
56
61
|
param2template,
|
62
|
+
queue2str,
|
57
63
|
)
|
58
64
|
|
59
|
-
|
65
|
+
P = ParamSpec("P")
|
60
66
|
logger = get_logger("ddeutil.workflow")
|
61
67
|
|
62
68
|
# NOTE: Adjust logging level on the schedule package.
|
@@ -65,9 +71,9 @@ logging.getLogger("schedule").setLevel(logging.INFO)
|
|
65
71
|
|
66
72
|
__all__: TupleStr = (
|
67
73
|
"Workflow",
|
68
|
-
"
|
69
|
-
"WorkflowTask",
|
74
|
+
"WorkflowTaskData",
|
70
75
|
"Schedule",
|
76
|
+
"ScheduleWorkflow",
|
71
77
|
"workflow_task",
|
72
78
|
"workflow_long_running_task",
|
73
79
|
"workflow_control",
|
@@ -76,10 +82,10 @@ __all__: TupleStr = (
|
|
76
82
|
|
77
83
|
|
78
84
|
class Workflow(BaseModel):
|
79
|
-
"""Workflow Model this is the main future of this project because
|
80
|
-
be workflow data for running everywhere that you want or using it
|
81
|
-
scheduler task in background. It use lightweight coding line from
|
82
|
-
Model and enhance execute method on it.
|
85
|
+
"""Workflow Pydantic Model this is the main future of this project because
|
86
|
+
it use to be workflow data for running everywhere that you want or using it
|
87
|
+
to scheduler task in background. It use lightweight coding line from
|
88
|
+
Pydantic Model and enhance execute method on it.
|
83
89
|
"""
|
84
90
|
|
85
91
|
name: str = Field(description="A workflow name.")
|
@@ -91,7 +97,7 @@ class Workflow(BaseModel):
|
|
91
97
|
)
|
92
98
|
params: dict[str, Param] = Field(
|
93
99
|
default_factory=dict,
|
94
|
-
description="A parameters that
|
100
|
+
description="A parameters that need to use on this workflow.",
|
95
101
|
)
|
96
102
|
on: list[On] = Field(
|
97
103
|
default_factory=list,
|
@@ -103,14 +109,19 @@ class Workflow(BaseModel):
|
|
103
109
|
)
|
104
110
|
run_id: Optional[str] = Field(
|
105
111
|
default=None,
|
106
|
-
description=
|
112
|
+
description=(
|
113
|
+
"A running workflow ID that is able to change after initialize."
|
114
|
+
),
|
107
115
|
repr=False,
|
108
116
|
exclude=True,
|
109
117
|
)
|
110
118
|
|
111
119
|
@property
|
112
120
|
def new_run_id(self) -> str:
|
113
|
-
"""Running ID of this workflow that always generate new unique value.
|
121
|
+
"""Running ID of this workflow that always generate new unique value.
|
122
|
+
|
123
|
+
:rtype: str
|
124
|
+
"""
|
114
125
|
return gen_id(self.name, unique=True)
|
115
126
|
|
116
127
|
@classmethod
|
@@ -144,8 +155,17 @@ class Workflow(BaseModel):
|
|
144
155
|
return cls.model_validate(obj=loader_data)
|
145
156
|
|
146
157
|
@classmethod
|
147
|
-
def __bypass_on(
|
148
|
-
|
158
|
+
def __bypass_on(
|
159
|
+
cls,
|
160
|
+
data: DictData,
|
161
|
+
externals: DictData | None = None,
|
162
|
+
) -> DictData:
|
163
|
+
"""Bypass the on data to loaded config data.
|
164
|
+
|
165
|
+
:param data:
|
166
|
+
:param externals:
|
167
|
+
:rtype: DictData
|
168
|
+
"""
|
149
169
|
if on := data.pop("on", []):
|
150
170
|
if isinstance(on, str):
|
151
171
|
on = [on]
|
@@ -180,12 +200,18 @@ class Workflow(BaseModel):
|
|
180
200
|
|
181
201
|
@field_validator("desc", mode="after")
|
182
202
|
def ___prepare_desc(cls, value: str) -> str:
|
183
|
-
"""Prepare description string that was created on a template.
|
203
|
+
"""Prepare description string that was created on a template.
|
204
|
+
|
205
|
+
:rtype: str
|
206
|
+
"""
|
184
207
|
return dedent(value)
|
185
208
|
|
186
209
|
@model_validator(mode="after")
|
187
|
-
def __validate_jobs_need_and_prepare_running_id(self):
|
188
|
-
"""Validate each need job in any jobs should exists.
|
210
|
+
def __validate_jobs_need_and_prepare_running_id(self) -> Self:
|
211
|
+
"""Validate each need job in any jobs should exists.
|
212
|
+
|
213
|
+
:rtype: Self
|
214
|
+
"""
|
189
215
|
for job in self.jobs:
|
190
216
|
if not_exist := [
|
191
217
|
need for need in self.jobs[job].needs if need not in self.jobs
|
@@ -221,7 +247,7 @@ class Workflow(BaseModel):
|
|
221
247
|
return self.model_copy(update={"run_id": run_id})
|
222
248
|
|
223
249
|
def job(self, name: str) -> Job:
|
224
|
-
"""Return
|
250
|
+
"""Return this workflow's job that already created on this job field.
|
225
251
|
|
226
252
|
:param name: A job name that want to get from a mapping of job models.
|
227
253
|
:type name: str
|
@@ -237,11 +263,18 @@ class Workflow(BaseModel):
|
|
237
263
|
return self.jobs[name]
|
238
264
|
|
239
265
|
def parameterize(self, params: DictData) -> DictData:
|
240
|
-
"""Prepare parameters before
|
241
|
-
|
242
|
-
|
266
|
+
"""Prepare a passing parameters before use it in execution process.
|
267
|
+
This method will validate keys of an incoming params with this object
|
268
|
+
necessary params field and then create a jobs key to result mapping
|
269
|
+
that will keep any execution result from its job.
|
270
|
+
|
271
|
+
... {
|
272
|
+
... "params": <an-incoming-params>,
|
273
|
+
... "jobs": {}
|
274
|
+
... }
|
243
275
|
|
244
276
|
:param params: A parameter mapping that receive from workflow execution.
|
277
|
+
:type params: DictData
|
245
278
|
:rtype: DictData
|
246
279
|
"""
|
247
280
|
# VALIDATE: Incoming params should have keys that set on this workflow.
|
@@ -255,7 +288,7 @@ class Workflow(BaseModel):
|
|
255
288
|
f"{', '.join(check_key)}."
|
256
289
|
)
|
257
290
|
|
258
|
-
# NOTE:
|
291
|
+
# NOTE: Mapping type of param before adding it to the ``params`` key.
|
259
292
|
return {
|
260
293
|
"params": (
|
261
294
|
params
|
@@ -299,9 +332,8 @@ class Workflow(BaseModel):
|
|
299
332
|
f"queue id: {id(queue)}"
|
300
333
|
)
|
301
334
|
log: Log = log or FileLog
|
302
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
303
335
|
gen: CronRunner = on.generate(
|
304
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
336
|
+
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
305
337
|
+ timedelta(seconds=1)
|
306
338
|
)
|
307
339
|
cron_tz: ZoneInfo = gen.tz
|
@@ -456,35 +488,55 @@ class Workflow(BaseModel):
|
|
456
488
|
|
457
489
|
def execute_job(
|
458
490
|
self,
|
459
|
-
|
491
|
+
job_id: str,
|
460
492
|
params: DictData,
|
493
|
+
*,
|
494
|
+
raise_error: bool = True,
|
461
495
|
) -> Result:
|
462
|
-
"""Job
|
496
|
+
"""Workflow Job execution with passing dynamic parameters from the
|
497
|
+
workflow execution to the target job.
|
463
498
|
|
464
|
-
|
499
|
+
This execution is the minimum level of execution of this workflow
|
500
|
+
model. It different with ``self.execute`` because this method run only
|
501
|
+
one job and return with context of this job data.
|
502
|
+
|
503
|
+
:param job_id: A job ID that want to execute.
|
465
504
|
:param params: A params that was parameterized from workflow execution.
|
505
|
+
:param raise_error: A flag that raise error instead catching to result
|
506
|
+
if it get exception from job execution.
|
466
507
|
:rtype: Result
|
467
508
|
"""
|
468
509
|
# VALIDATE: check a job ID that exists in this workflow or not.
|
469
|
-
if
|
510
|
+
if job_id not in self.jobs:
|
470
511
|
raise WorkflowException(
|
471
|
-
f"The job ID: {
|
512
|
+
f"The job ID: {job_id} does not exists in {self.name!r} "
|
513
|
+
f"workflow."
|
472
514
|
)
|
473
|
-
try:
|
474
|
-
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job!r}")
|
475
515
|
|
476
|
-
|
477
|
-
|
478
|
-
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
479
|
-
j_rs: Result = job_obj.execute(params=params)
|
516
|
+
context: DictData = {}
|
517
|
+
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
|
480
518
|
|
519
|
+
# IMPORTANT:
|
520
|
+
# Change any job running IDs to this workflow running ID.
|
521
|
+
#
|
522
|
+
try:
|
523
|
+
job: Job = self.jobs[job_id].get_running_id(self.run_id)
|
524
|
+
job.set_outputs(
|
525
|
+
job.execute(params=params).context,
|
526
|
+
to=context,
|
527
|
+
)
|
481
528
|
except JobException as err:
|
482
|
-
|
529
|
+
logger.error(
|
530
|
+
f"({self.run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
|
531
|
+
)
|
532
|
+
if raise_error:
|
533
|
+
raise WorkflowException(
|
534
|
+
f"Get job execution error {job_id}: JobException: {err}"
|
535
|
+
) from None
|
536
|
+
else:
|
537
|
+
raise NotImplementedError() from None
|
483
538
|
|
484
|
-
return Result(
|
485
|
-
status=j_rs.status,
|
486
|
-
context={job: job_obj.set_outputs(j_rs.context)},
|
487
|
-
)
|
539
|
+
return Result(status=0, context=context)
|
488
540
|
|
489
541
|
def execute(
|
490
542
|
self,
|
@@ -492,17 +544,8 @@ class Workflow(BaseModel):
|
|
492
544
|
*,
|
493
545
|
timeout: int = 60,
|
494
546
|
) -> Result:
|
495
|
-
"""Execute workflow with passing dynamic parameters to
|
496
|
-
included in
|
497
|
-
|
498
|
-
:param params: An input parameters that use on workflow execution that
|
499
|
-
will parameterize before using it.
|
500
|
-
:param timeout: A workflow execution time out in second unit that use
|
501
|
-
for limit time of execution and waiting job dependency.
|
502
|
-
:rtype: Result
|
503
|
-
|
504
|
-
See Also:
|
505
|
-
---
|
547
|
+
"""Execute workflow with passing a dynamic parameters to all jobs that
|
548
|
+
included in this workflow model with ``jobs`` field.
|
506
549
|
|
507
550
|
The result of execution process for each jobs and stages on this
|
508
551
|
workflow will keeping in dict which able to catch out with all jobs and
|
@@ -513,10 +556,22 @@ class Workflow(BaseModel):
|
|
513
556
|
|
514
557
|
... ${job-name}.stages.${stage-id}.outputs.${key}
|
515
558
|
|
559
|
+
:param params: An input parameters that use on workflow execution that
|
560
|
+
will parameterize before using it. Default is None.
|
561
|
+
:type params: DictData | None
|
562
|
+
:param timeout: A workflow execution time out in second unit that use
|
563
|
+
for limit time of execution and waiting job dependency. Default is
|
564
|
+
60 seconds.
|
565
|
+
:type timeout: int
|
566
|
+
:rtype: Result
|
516
567
|
"""
|
517
568
|
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
518
|
-
|
569
|
+
|
570
|
+
# NOTE: I use this condition because this method allow passing empty
|
571
|
+
# params and I do not want to create new dict object.
|
572
|
+
params: DictData = {} if params is None else params
|
519
573
|
ts: float = time.monotonic()
|
574
|
+
rs: Result = Result()
|
520
575
|
|
521
576
|
# NOTE: It should not do anything if it does not have job.
|
522
577
|
if not self.jobs:
|
@@ -524,7 +579,7 @@ class Workflow(BaseModel):
|
|
524
579
|
f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
525
580
|
f"does not have any jobs"
|
526
581
|
)
|
527
|
-
return
|
582
|
+
return rs.catch(status=0, context=params)
|
528
583
|
|
529
584
|
# NOTE: Create a job queue that keep the job that want to running after
|
530
585
|
# it dependency condition.
|
@@ -535,21 +590,32 @@ class Workflow(BaseModel):
|
|
535
590
|
# NOTE: Create result context that will pass this context to any
|
536
591
|
# execution dependency.
|
537
592
|
context: DictData = self.parameterize(params)
|
593
|
+
status: int = 0
|
538
594
|
try:
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
595
|
+
if config.max_job_parallel == 1:
|
596
|
+
self.__exec_non_threading(
|
597
|
+
context=context,
|
598
|
+
ts=ts,
|
599
|
+
job_queue=jq,
|
600
|
+
timeout=timeout,
|
601
|
+
)
|
602
|
+
else:
|
603
|
+
self.__exec_threading(
|
604
|
+
context=context,
|
605
|
+
ts=ts,
|
606
|
+
job_queue=jq,
|
607
|
+
worker=config.max_job_parallel,
|
608
|
+
timeout=timeout,
|
545
609
|
)
|
546
|
-
)
|
547
|
-
return Result(status=0, context=context)
|
548
610
|
except WorkflowException as err:
|
549
611
|
context.update(
|
550
|
-
{
|
612
|
+
{
|
613
|
+
"error": err,
|
614
|
+
"error_message": f"{err.__class__.__name__}: {err}",
|
615
|
+
},
|
551
616
|
)
|
552
|
-
|
617
|
+
status = 1
|
618
|
+
return rs.catch(status=status, context=context)
|
553
619
|
|
554
620
|
def __exec_threading(
|
555
621
|
self,
|
@@ -560,11 +626,15 @@ class Workflow(BaseModel):
|
|
560
626
|
worker: int = 2,
|
561
627
|
timeout: int = 600,
|
562
628
|
) -> DictData:
|
563
|
-
"""Workflow threading
|
629
|
+
"""Workflow execution by threading strategy.
|
630
|
+
|
631
|
+
If a job need dependency, it will check dependency job ID from
|
632
|
+
context data before allow it run.
|
564
633
|
|
565
634
|
:param context: A context workflow data that want to downstream passing.
|
566
635
|
:param ts: A start timestamp that use for checking execute time should
|
567
636
|
timeout.
|
637
|
+
:param job_queue: A job queue object.
|
568
638
|
:param timeout: A second value unit that bounding running time.
|
569
639
|
:param worker: A number of threading executor pool size.
|
570
640
|
:rtype: DictData
|
@@ -598,18 +668,24 @@ class Workflow(BaseModel):
|
|
598
668
|
params=copy.deepcopy(context),
|
599
669
|
),
|
600
670
|
)
|
671
|
+
|
672
|
+
# NOTE: Mark this job queue done.
|
601
673
|
job_queue.task_done()
|
602
674
|
|
603
675
|
# NOTE: Wait for all items to finish processing
|
604
676
|
job_queue.join()
|
605
677
|
|
606
|
-
for future in as_completed(futures):
|
678
|
+
for future in as_completed(futures, timeout=1800):
|
607
679
|
if err := future.exception():
|
608
680
|
logger.error(f"{err}")
|
609
681
|
raise WorkflowException(f"{err}")
|
610
|
-
|
611
|
-
|
612
|
-
|
682
|
+
try:
|
683
|
+
# NOTE: Update job result to workflow result.
|
684
|
+
context["jobs"].update(future.result(timeout=60).context)
|
685
|
+
except TimeoutError as err:
|
686
|
+
raise WorkflowException(
|
687
|
+
"Get result from future was timeout"
|
688
|
+
) from err
|
613
689
|
|
614
690
|
if not_time_out_flag:
|
615
691
|
return context
|
@@ -631,8 +707,11 @@ class Workflow(BaseModel):
|
|
631
707
|
*,
|
632
708
|
timeout: int = 600,
|
633
709
|
) -> DictData:
|
634
|
-
"""Workflow non-threading
|
635
|
-
and waiting previous run successful.
|
710
|
+
"""Workflow execution with non-threading strategy that use sequential
|
711
|
+
job running and waiting previous job was run successful.
|
712
|
+
|
713
|
+
If a job need dependency, it will check dependency job ID from
|
714
|
+
context data before allow it run.
|
636
715
|
|
637
716
|
:param context: A context workflow data that want to downstream passing.
|
638
717
|
:param ts: A start timestamp that use for checking execute time should
|
@@ -658,9 +737,14 @@ class Workflow(BaseModel):
|
|
658
737
|
time.sleep(0.25)
|
659
738
|
continue
|
660
739
|
|
661
|
-
# NOTE: Start job execution.
|
662
|
-
job_rs = self.execute_job(
|
740
|
+
# NOTE: Start workflow job execution.
|
741
|
+
job_rs = self.execute_job(
|
742
|
+
job_id=job_id,
|
743
|
+
params=copy.deepcopy(context),
|
744
|
+
)
|
663
745
|
context["jobs"].update(job_rs.context)
|
746
|
+
|
747
|
+
# NOTE: Mark this job queue done.
|
664
748
|
job_queue.task_done()
|
665
749
|
|
666
750
|
# NOTE: Wait for all items to finish processing
|
@@ -678,8 +762,12 @@ class Workflow(BaseModel):
|
|
678
762
|
)
|
679
763
|
|
680
764
|
|
681
|
-
class
|
682
|
-
"""Workflow
|
765
|
+
class ScheduleWorkflow(BaseModel):
|
766
|
+
"""Schedule Workflow Pydantic model that use to keep workflow model for the
|
767
|
+
Schedule model. it should not use Workflow model directly because on the
|
768
|
+
schedule config it can adjust crontab value that different from the Workflow
|
769
|
+
model.
|
770
|
+
"""
|
683
771
|
|
684
772
|
name: str = Field(description="A workflow name.")
|
685
773
|
on: list[On] = Field(
|
@@ -692,17 +780,26 @@ class WorkflowSchedule(BaseModel):
|
|
692
780
|
)
|
693
781
|
|
694
782
|
@model_validator(mode="before")
|
695
|
-
def
|
696
|
-
"""Prepare incoming values before validating with model fields.
|
783
|
+
def __prepare_values(cls, values: DictData) -> DictData:
|
784
|
+
"""Prepare incoming values before validating with model fields.
|
697
785
|
|
786
|
+
:rtype: DictData
|
787
|
+
"""
|
698
788
|
values["name"] = values["name"].replace(" ", "_")
|
699
789
|
|
700
790
|
cls.__bypass_on(values)
|
701
791
|
return values
|
702
792
|
|
703
793
|
@classmethod
|
704
|
-
def __bypass_on(
|
705
|
-
|
794
|
+
def __bypass_on(
|
795
|
+
cls,
|
796
|
+
data: DictData,
|
797
|
+
externals: DictData | None = None,
|
798
|
+
) -> DictData:
|
799
|
+
"""Bypass the on data to loaded config data.
|
800
|
+
|
801
|
+
:rtype: DictData
|
802
|
+
"""
|
706
803
|
if on := data.pop("on", []):
|
707
804
|
|
708
805
|
if isinstance(on, str):
|
@@ -735,9 +832,9 @@ class Schedule(BaseModel):
|
|
735
832
|
"A schedule description that can be string of markdown content."
|
736
833
|
),
|
737
834
|
)
|
738
|
-
workflows: list[
|
835
|
+
workflows: list[ScheduleWorkflow] = Field(
|
739
836
|
default_factory=list,
|
740
|
-
description="A list of
|
837
|
+
description="A list of ScheduleWorkflow models.",
|
741
838
|
)
|
742
839
|
|
743
840
|
@classmethod
|
@@ -746,6 +843,15 @@ class Schedule(BaseModel):
|
|
746
843
|
name: str,
|
747
844
|
externals: DictData | None = None,
|
748
845
|
) -> Self:
|
846
|
+
"""Create Schedule instance from the Loader object that only receive
|
847
|
+
an input schedule name. The loader object will use this schedule name to
|
848
|
+
searching configuration data of this schedule model in conf path.
|
849
|
+
|
850
|
+
:param name: A schedule name that want to pass to Loader object.
|
851
|
+
:param externals: An external parameters that want to pass to Loader
|
852
|
+
object.
|
853
|
+
:rtype: Self
|
854
|
+
"""
|
749
855
|
loader: Loader = Loader(name, externals=(externals or {}))
|
750
856
|
|
751
857
|
# NOTE: Validate the config type match with current connection model
|
@@ -766,18 +872,18 @@ class Schedule(BaseModel):
|
|
766
872
|
running: dict[str, list[datetime]],
|
767
873
|
*,
|
768
874
|
externals: DictData | None = None,
|
769
|
-
) -> list[
|
875
|
+
) -> list[WorkflowTaskData]:
|
770
876
|
"""Generate Task from the current datetime.
|
771
877
|
|
772
878
|
:param start_date: A start date that get from the workflow schedule.
|
773
879
|
:param queue: A mapping of name and list of datetime for queue.
|
774
880
|
:param running: A mapping of name and list of datetime for running.
|
775
881
|
:param externals: An external parameters that pass to the Loader object.
|
776
|
-
:rtype: list[
|
882
|
+
:rtype: list[WorkflowTaskData]
|
777
883
|
"""
|
778
884
|
|
779
885
|
# NOTE: Create pair of workflow and on.
|
780
|
-
workflow_tasks: list[
|
886
|
+
workflow_tasks: list[WorkflowTaskData] = []
|
781
887
|
externals: DictData = externals or {}
|
782
888
|
|
783
889
|
for wfs in self.workflows:
|
@@ -800,7 +906,7 @@ class Schedule(BaseModel):
|
|
800
906
|
heappush(queue[wfs.name], next_running_date)
|
801
907
|
|
802
908
|
workflow_tasks.append(
|
803
|
-
|
909
|
+
WorkflowTaskData(
|
804
910
|
workflow=wf,
|
805
911
|
on=on,
|
806
912
|
params=wfs.params,
|
@@ -812,12 +918,22 @@ class Schedule(BaseModel):
|
|
812
918
|
return workflow_tasks
|
813
919
|
|
814
920
|
|
815
|
-
def catch_exceptions(
|
816
|
-
|
921
|
+
def catch_exceptions(
|
922
|
+
cancel_on_failure: bool = False,
|
923
|
+
) -> Callable[P, Optional[CancelJob]]:
|
924
|
+
"""Catch exception error from scheduler job that running with schedule
|
925
|
+
package and return CancelJob if this function raise an error.
|
817
926
|
|
818
|
-
|
927
|
+
:param cancel_on_failure: A flag that allow to return the CancelJob or not
|
928
|
+
it will raise.
|
929
|
+
:rtype: Callable[P, Optional[CancelJob]]
|
930
|
+
"""
|
819
931
|
|
932
|
+
def decorator(
|
933
|
+
func: Callable[P, Optional[CancelJob]],
|
934
|
+
) -> Callable[P, Optional[CancelJob]]:
|
820
935
|
try:
|
936
|
+
# NOTE: Check the function that want to handle is method or not.
|
821
937
|
if inspect.ismethod(func):
|
822
938
|
|
823
939
|
@wraps(func)
|
@@ -838,11 +954,11 @@ def catch_exceptions(cancel_on_failure=False):
|
|
838
954
|
return CancelJob
|
839
955
|
raise err
|
840
956
|
|
841
|
-
return
|
957
|
+
return decorator
|
842
958
|
|
843
959
|
|
844
960
|
@dataclass(frozen=True)
|
845
|
-
class
|
961
|
+
class WorkflowTaskData:
|
846
962
|
"""Workflow task dataclass that use to keep mapping data and objects for
|
847
963
|
passing in multithreading task.
|
848
964
|
"""
|
@@ -854,19 +970,28 @@ class WorkflowTask:
|
|
854
970
|
running: list[datetime] = field(compare=False, hash=False)
|
855
971
|
|
856
972
|
@catch_exceptions(cancel_on_failure=True)
|
857
|
-
def release(
|
973
|
+
def release(
|
974
|
+
self,
|
975
|
+
log: Log | None = None,
|
976
|
+
*,
|
977
|
+
waiting_sec: int = 60,
|
978
|
+
sleep_interval: int = 15,
|
979
|
+
) -> None:
|
858
980
|
"""Workflow release, it will use with the same logic of
|
859
981
|
`workflow.release` method.
|
860
982
|
|
861
|
-
:param log: A log object
|
983
|
+
:param log: A log object for saving result logging from workflow
|
984
|
+
execution process.
|
985
|
+
:param waiting_sec: A second period value that allow workflow execute.
|
986
|
+
:param sleep_interval: A second value that want to waiting until time
|
987
|
+
to execute.
|
862
988
|
"""
|
863
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
864
989
|
log: Log = log or FileLog
|
865
990
|
wf: Workflow = self.workflow
|
866
991
|
on: On = self.on
|
867
992
|
|
868
993
|
gen: CronRunner = on.generate(
|
869
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
994
|
+
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
870
995
|
)
|
871
996
|
cron_tz: ZoneInfo = gen.tz
|
872
997
|
|
@@ -883,7 +1008,7 @@ class WorkflowTask:
|
|
883
1008
|
)
|
884
1009
|
heappush(self.running[wf.name], next_time)
|
885
1010
|
|
886
|
-
if get_diff_sec(next_time, tz=cron_tz) >
|
1011
|
+
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
887
1012
|
logger.debug(
|
888
1013
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
889
1014
|
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
@@ -903,7 +1028,9 @@ class WorkflowTask:
|
|
903
1028
|
)
|
904
1029
|
|
905
1030
|
# NOTE: Release when the time is nearly to schedule time.
|
906
|
-
while (duration := get_diff_sec(next_time, tz=tz)) > (
|
1031
|
+
while (duration := get_diff_sec(next_time, tz=config.tz)) > (
|
1032
|
+
sleep_interval + 5
|
1033
|
+
):
|
907
1034
|
logger.debug(
|
908
1035
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
909
1036
|
f": Sleep until: {duration}"
|
@@ -968,21 +1095,17 @@ class WorkflowTask:
|
|
968
1095
|
heappush(self.queue[wf.name], future_running_time)
|
969
1096
|
logger.debug(f"[CORE]: {'-' * 100}")
|
970
1097
|
|
971
|
-
def __eq__(self, other):
|
972
|
-
if isinstance(other,
|
1098
|
+
def __eq__(self, other) -> bool:
|
1099
|
+
if isinstance(other, WorkflowTaskData):
|
973
1100
|
return (
|
974
1101
|
self.workflow.name == other.workflow.name
|
975
1102
|
and self.on.cronjob == other.on.cronjob
|
976
1103
|
)
|
977
1104
|
|
978
1105
|
|
979
|
-
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
980
|
-
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
981
|
-
|
982
|
-
|
983
1106
|
@catch_exceptions(cancel_on_failure=True)
|
984
1107
|
def workflow_task(
|
985
|
-
workflow_tasks: list[
|
1108
|
+
workflow_tasks: list[WorkflowTaskData],
|
986
1109
|
stop: datetime,
|
987
1110
|
threads: dict[str, Thread],
|
988
1111
|
) -> CancelJob | None:
|
@@ -996,11 +1119,10 @@ def workflow_task(
|
|
996
1119
|
:param threads:
|
997
1120
|
:rtype: CancelJob | None
|
998
1121
|
"""
|
999
|
-
|
1000
|
-
start_date: datetime = datetime.now(tz=tz)
|
1122
|
+
start_date: datetime = datetime.now(tz=config.tz)
|
1001
1123
|
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
1002
1124
|
|
1003
|
-
if start_date > stop.replace(tzinfo=tz):
|
1125
|
+
if start_date > stop.replace(tzinfo=config.tz):
|
1004
1126
|
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
1005
1127
|
while len(threads) > 0:
|
1006
1128
|
logger.warning(
|
@@ -1117,9 +1239,8 @@ def workflow_control(
|
|
1117
1239
|
"Should install schedule package before use this module."
|
1118
1240
|
) from None
|
1119
1241
|
|
1120
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
1121
1242
|
schedule: Scheduler = Scheduler()
|
1122
|
-
start_date: datetime = datetime.now(tz=tz)
|
1243
|
+
start_date: datetime = datetime.now(tz=config.tz)
|
1123
1244
|
|
1124
1245
|
# NOTE: Design workflow queue caching.
|
1125
1246
|
# ---
|
@@ -1134,7 +1255,7 @@ def workflow_control(
|
|
1134
1255
|
)
|
1135
1256
|
|
1136
1257
|
# NOTE: Create pair of workflow and on from schedule model.
|
1137
|
-
workflow_tasks: list[
|
1258
|
+
workflow_tasks: list[WorkflowTaskData] = []
|
1138
1259
|
for name in schedules:
|
1139
1260
|
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
1140
1261
|
workflow_tasks.extend(
|
@@ -1205,8 +1326,8 @@ def workflow_runner(
|
|
1205
1326
|
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
1206
1327
|
value to multiprocess executor pool.
|
1207
1328
|
|
1208
|
-
|
1209
|
-
|
1329
|
+
The current workflow logic that split to process will be below diagram:
|
1330
|
+
|
1210
1331
|
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
1211
1332
|
workflow task 01 01
|
1212
1333
|
--> thread of release
|