ddeutil-workflow 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +4 -1
- ddeutil/workflow/__types.py +24 -8
- ddeutil/workflow/api.py +2 -2
- ddeutil/workflow/conf.py +41 -0
- ddeutil/workflow/cron.py +19 -12
- ddeutil/workflow/job.py +251 -184
- ddeutil/workflow/log.py +28 -14
- ddeutil/workflow/on.py +5 -2
- ddeutil/workflow/scheduler.py +262 -140
- ddeutil/workflow/stage.py +105 -39
- ddeutil/workflow/utils.py +106 -40
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.14.dist-info}/METADATA +80 -32
- ddeutil_workflow-0.0.14.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.14.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.12.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.14.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.14.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.14.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -6,38 +6,44 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import copy
|
9
|
+
import inspect
|
9
10
|
import json
|
10
11
|
import logging
|
11
12
|
import os
|
12
13
|
import time
|
13
|
-
from collections.abc import Iterator
|
14
14
|
from concurrent.futures import (
|
15
15
|
Future,
|
16
16
|
ProcessPoolExecutor,
|
17
17
|
ThreadPoolExecutor,
|
18
18
|
as_completed,
|
19
19
|
)
|
20
|
-
from dataclasses import
|
20
|
+
from dataclasses import field
|
21
21
|
from datetime import datetime, timedelta
|
22
22
|
from functools import wraps
|
23
23
|
from heapq import heappush
|
24
24
|
from queue import Queue
|
25
25
|
from textwrap import dedent
|
26
26
|
from threading import Thread
|
27
|
-
from typing import Optional
|
27
|
+
from typing import Callable, Optional
|
28
28
|
from zoneinfo import ZoneInfo
|
29
29
|
|
30
|
-
from dotenv import load_dotenv
|
31
30
|
from pydantic import BaseModel, Field
|
31
|
+
from pydantic.dataclasses import dataclass
|
32
32
|
from pydantic.functional_validators import field_validator, model_validator
|
33
33
|
from typing_extensions import Self
|
34
34
|
|
35
|
+
try:
|
36
|
+
from typing import ParamSpec
|
37
|
+
except ImportError:
|
38
|
+
from typing_extensions import ParamSpec
|
39
|
+
|
35
40
|
try:
|
36
41
|
from schedule import CancelJob
|
37
42
|
except ImportError:
|
38
43
|
CancelJob = None
|
39
44
|
|
40
|
-
from .__types import DictData
|
45
|
+
from .__types import DictData, TupleStr
|
46
|
+
from .conf import config
|
41
47
|
from .cron import CronRunner
|
42
48
|
from .exceptions import JobException, WorkflowException
|
43
49
|
from .job import Job
|
@@ -53,28 +59,33 @@ from .utils import (
|
|
53
59
|
get_diff_sec,
|
54
60
|
has_template,
|
55
61
|
param2template,
|
62
|
+
queue2str,
|
56
63
|
)
|
57
64
|
|
58
|
-
|
65
|
+
P = ParamSpec("P")
|
59
66
|
logger = get_logger("ddeutil.workflow")
|
67
|
+
|
68
|
+
# NOTE: Adjust logging level on the schedule package.
|
60
69
|
logging.getLogger("schedule").setLevel(logging.INFO)
|
61
70
|
|
62
71
|
|
63
|
-
__all__ = (
|
72
|
+
__all__: TupleStr = (
|
64
73
|
"Workflow",
|
65
|
-
"
|
66
|
-
"WorkflowTask",
|
74
|
+
"WorkflowTaskData",
|
67
75
|
"Schedule",
|
68
|
-
"
|
76
|
+
"ScheduleWorkflow",
|
69
77
|
"workflow_task",
|
78
|
+
"workflow_long_running_task",
|
79
|
+
"workflow_control",
|
80
|
+
"workflow_runner",
|
70
81
|
)
|
71
82
|
|
72
83
|
|
73
84
|
class Workflow(BaseModel):
|
74
|
-
"""Workflow Model this is the main future of this project because
|
75
|
-
be workflow data for running everywhere that you want or using it
|
76
|
-
scheduler task in background. It use lightweight coding line from
|
77
|
-
Model and enhance execute method on it.
|
85
|
+
"""Workflow Pydantic Model this is the main future of this project because
|
86
|
+
it use to be workflow data for running everywhere that you want or using it
|
87
|
+
to scheduler task in background. It use lightweight coding line from
|
88
|
+
Pydantic Model and enhance execute method on it.
|
78
89
|
"""
|
79
90
|
|
80
91
|
name: str = Field(description="A workflow name.")
|
@@ -86,7 +97,7 @@ class Workflow(BaseModel):
|
|
86
97
|
)
|
87
98
|
params: dict[str, Param] = Field(
|
88
99
|
default_factory=dict,
|
89
|
-
description="A parameters that
|
100
|
+
description="A parameters that need to use on this workflow.",
|
90
101
|
)
|
91
102
|
on: list[On] = Field(
|
92
103
|
default_factory=list,
|
@@ -98,14 +109,19 @@ class Workflow(BaseModel):
|
|
98
109
|
)
|
99
110
|
run_id: Optional[str] = Field(
|
100
111
|
default=None,
|
101
|
-
description=
|
112
|
+
description=(
|
113
|
+
"A running workflow ID that is able to change after initialize."
|
114
|
+
),
|
102
115
|
repr=False,
|
103
116
|
exclude=True,
|
104
117
|
)
|
105
118
|
|
106
119
|
@property
|
107
120
|
def new_run_id(self) -> str:
|
108
|
-
"""Running ID of this workflow that always generate new unique value.
|
121
|
+
"""Running ID of this workflow that always generate new unique value.
|
122
|
+
|
123
|
+
:rtype: str
|
124
|
+
"""
|
109
125
|
return gen_id(self.name, unique=True)
|
110
126
|
|
111
127
|
@classmethod
|
@@ -139,8 +155,17 @@ class Workflow(BaseModel):
|
|
139
155
|
return cls.model_validate(obj=loader_data)
|
140
156
|
|
141
157
|
@classmethod
|
142
|
-
def __bypass_on(
|
143
|
-
|
158
|
+
def __bypass_on(
|
159
|
+
cls,
|
160
|
+
data: DictData,
|
161
|
+
externals: DictData | None = None,
|
162
|
+
) -> DictData:
|
163
|
+
"""Bypass the on data to loaded config data.
|
164
|
+
|
165
|
+
:param data:
|
166
|
+
:param externals:
|
167
|
+
:rtype: DictData
|
168
|
+
"""
|
144
169
|
if on := data.pop("on", []):
|
145
170
|
if isinstance(on, str):
|
146
171
|
on = [on]
|
@@ -175,12 +200,18 @@ class Workflow(BaseModel):
|
|
175
200
|
|
176
201
|
@field_validator("desc", mode="after")
|
177
202
|
def ___prepare_desc(cls, value: str) -> str:
|
178
|
-
"""Prepare description string that was created on a template.
|
203
|
+
"""Prepare description string that was created on a template.
|
204
|
+
|
205
|
+
:rtype: str
|
206
|
+
"""
|
179
207
|
return dedent(value)
|
180
208
|
|
181
209
|
@model_validator(mode="after")
|
182
|
-
def __validate_jobs_need_and_prepare_running_id(self):
|
183
|
-
"""Validate each need job in any jobs should exists.
|
210
|
+
def __validate_jobs_need_and_prepare_running_id(self) -> Self:
|
211
|
+
"""Validate each need job in any jobs should exists.
|
212
|
+
|
213
|
+
:rtype: Self
|
214
|
+
"""
|
184
215
|
for job in self.jobs:
|
185
216
|
if not_exist := [
|
186
217
|
need for need in self.jobs[job].needs if need not in self.jobs
|
@@ -216,7 +247,7 @@ class Workflow(BaseModel):
|
|
216
247
|
return self.model_copy(update={"run_id": run_id})
|
217
248
|
|
218
249
|
def job(self, name: str) -> Job:
|
219
|
-
"""Return
|
250
|
+
"""Return this workflow's job that already created on this job field.
|
220
251
|
|
221
252
|
:param name: A job name that want to get from a mapping of job models.
|
222
253
|
:type name: str
|
@@ -232,11 +263,18 @@ class Workflow(BaseModel):
|
|
232
263
|
return self.jobs[name]
|
233
264
|
|
234
265
|
def parameterize(self, params: DictData) -> DictData:
|
235
|
-
"""Prepare parameters before
|
236
|
-
|
237
|
-
|
266
|
+
"""Prepare a passing parameters before use it in execution process.
|
267
|
+
This method will validate keys of an incoming params with this object
|
268
|
+
necessary params field and then create a jobs key to result mapping
|
269
|
+
that will keep any execution result from its job.
|
270
|
+
|
271
|
+
... {
|
272
|
+
... "params": <an-incoming-params>,
|
273
|
+
... "jobs": {}
|
274
|
+
... }
|
238
275
|
|
239
276
|
:param params: A parameter mapping that receive from workflow execution.
|
277
|
+
:type params: DictData
|
240
278
|
:rtype: DictData
|
241
279
|
"""
|
242
280
|
# VALIDATE: Incoming params should have keys that set on this workflow.
|
@@ -250,7 +288,7 @@ class Workflow(BaseModel):
|
|
250
288
|
f"{', '.join(check_key)}."
|
251
289
|
)
|
252
290
|
|
253
|
-
# NOTE:
|
291
|
+
# NOTE: Mapping type of param before adding it to the ``params`` key.
|
254
292
|
return {
|
255
293
|
"params": (
|
256
294
|
params
|
@@ -294,9 +332,8 @@ class Workflow(BaseModel):
|
|
294
332
|
f"queue id: {id(queue)}"
|
295
333
|
)
|
296
334
|
log: Log = log or FileLog
|
297
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
298
335
|
gen: CronRunner = on.generate(
|
299
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
336
|
+
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
300
337
|
+ timedelta(seconds=1)
|
301
338
|
)
|
302
339
|
cron_tz: ZoneInfo = gen.tz
|
@@ -423,7 +460,6 @@ class Workflow(BaseModel):
|
|
423
460
|
|
424
461
|
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
|
425
462
|
with ThreadPoolExecutor(max_workers=worker) as executor:
|
426
|
-
# TODO: If I want to run infinite loop.
|
427
463
|
futures: list[Future] = []
|
428
464
|
for on in self.on:
|
429
465
|
futures.append(
|
@@ -452,35 +488,55 @@ class Workflow(BaseModel):
|
|
452
488
|
|
453
489
|
def execute_job(
|
454
490
|
self,
|
455
|
-
|
491
|
+
job_id: str,
|
456
492
|
params: DictData,
|
493
|
+
*,
|
494
|
+
raise_error: bool = True,
|
457
495
|
) -> Result:
|
458
|
-
"""Job
|
496
|
+
"""Workflow Job execution with passing dynamic parameters from the
|
497
|
+
workflow execution to the target job.
|
498
|
+
|
499
|
+
This execution is the minimum level of execution of this workflow
|
500
|
+
model. It different with ``self.execute`` because this method run only
|
501
|
+
one job and return with context of this job data.
|
459
502
|
|
460
|
-
:param
|
503
|
+
:param job_id: A job ID that want to execute.
|
461
504
|
:param params: A params that was parameterized from workflow execution.
|
505
|
+
:param raise_error: A flag that raise error instead catching to result
|
506
|
+
if it get exception from job execution.
|
462
507
|
:rtype: Result
|
463
508
|
"""
|
464
509
|
# VALIDATE: check a job ID that exists in this workflow or not.
|
465
|
-
if
|
510
|
+
if job_id not in self.jobs:
|
466
511
|
raise WorkflowException(
|
467
|
-
f"The job ID: {
|
512
|
+
f"The job ID: {job_id} does not exists in {self.name!r} "
|
513
|
+
f"workflow."
|
468
514
|
)
|
469
|
-
try:
|
470
|
-
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job!r}")
|
471
515
|
|
472
|
-
|
473
|
-
|
474
|
-
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
475
|
-
j_rs: Result = job_obj.execute(params=params)
|
516
|
+
context: DictData = {}
|
517
|
+
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
|
476
518
|
|
519
|
+
# IMPORTANT:
|
520
|
+
# Change any job running IDs to this workflow running ID.
|
521
|
+
#
|
522
|
+
try:
|
523
|
+
job: Job = self.jobs[job_id].get_running_id(self.run_id)
|
524
|
+
job.set_outputs(
|
525
|
+
job.execute(params=params).context,
|
526
|
+
to=context,
|
527
|
+
)
|
477
528
|
except JobException as err:
|
478
|
-
|
529
|
+
logger.error(
|
530
|
+
f"({self.run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
|
531
|
+
)
|
532
|
+
if raise_error:
|
533
|
+
raise WorkflowException(
|
534
|
+
f"Get job execution error {job_id}: JobException: {err}"
|
535
|
+
) from None
|
536
|
+
else:
|
537
|
+
raise NotImplementedError() from None
|
479
538
|
|
480
|
-
return Result(
|
481
|
-
status=j_rs.status,
|
482
|
-
context={job: job_obj.set_outputs(j_rs.context)},
|
483
|
-
)
|
539
|
+
return Result(status=0, context=context)
|
484
540
|
|
485
541
|
def execute(
|
486
542
|
self,
|
@@ -488,17 +544,8 @@ class Workflow(BaseModel):
|
|
488
544
|
*,
|
489
545
|
timeout: int = 60,
|
490
546
|
) -> Result:
|
491
|
-
"""Execute workflow with passing dynamic parameters to
|
492
|
-
included in
|
493
|
-
|
494
|
-
:param params: An input parameters that use on workflow execution that
|
495
|
-
will parameterize before using it.
|
496
|
-
:param timeout: A workflow execution time out in second unit that use
|
497
|
-
for limit time of execution and waiting job dependency.
|
498
|
-
:rtype: Result
|
499
|
-
|
500
|
-
See Also:
|
501
|
-
---
|
547
|
+
"""Execute workflow with passing a dynamic parameters to all jobs that
|
548
|
+
included in this workflow model with ``jobs`` field.
|
502
549
|
|
503
550
|
The result of execution process for each jobs and stages on this
|
504
551
|
workflow will keeping in dict which able to catch out with all jobs and
|
@@ -509,10 +556,22 @@ class Workflow(BaseModel):
|
|
509
556
|
|
510
557
|
... ${job-name}.stages.${stage-id}.outputs.${key}
|
511
558
|
|
559
|
+
:param params: An input parameters that use on workflow execution that
|
560
|
+
will parameterize before using it. Default is None.
|
561
|
+
:type params: DictData | None
|
562
|
+
:param timeout: A workflow execution time out in second unit that use
|
563
|
+
for limit time of execution and waiting job dependency. Default is
|
564
|
+
60 seconds.
|
565
|
+
:type timeout: int
|
566
|
+
:rtype: Result
|
512
567
|
"""
|
513
568
|
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
514
|
-
|
569
|
+
|
570
|
+
# NOTE: I use this condition because this method allow passing empty
|
571
|
+
# params and I do not want to create new dict object.
|
572
|
+
params: DictData = {} if params is None else params
|
515
573
|
ts: float = time.monotonic()
|
574
|
+
rs: Result = Result()
|
516
575
|
|
517
576
|
# NOTE: It should not do anything if it does not have job.
|
518
577
|
if not self.jobs:
|
@@ -520,7 +579,7 @@ class Workflow(BaseModel):
|
|
520
579
|
f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
521
580
|
f"does not have any jobs"
|
522
581
|
)
|
523
|
-
return
|
582
|
+
return rs.catch(status=0, context=params)
|
524
583
|
|
525
584
|
# NOTE: Create a job queue that keep the job that want to running after
|
526
585
|
# it dependency condition.
|
@@ -531,21 +590,32 @@ class Workflow(BaseModel):
|
|
531
590
|
# NOTE: Create result context that will pass this context to any
|
532
591
|
# execution dependency.
|
533
592
|
context: DictData = self.parameterize(params)
|
593
|
+
status: int = 0
|
534
594
|
try:
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
595
|
+
if config.max_job_parallel == 1:
|
596
|
+
self.__exec_non_threading(
|
597
|
+
context=context,
|
598
|
+
ts=ts,
|
599
|
+
job_queue=jq,
|
600
|
+
timeout=timeout,
|
601
|
+
)
|
602
|
+
else:
|
603
|
+
self.__exec_threading(
|
604
|
+
context=context,
|
605
|
+
ts=ts,
|
606
|
+
job_queue=jq,
|
607
|
+
worker=config.max_job_parallel,
|
608
|
+
timeout=timeout,
|
541
609
|
)
|
542
|
-
)
|
543
|
-
return Result(status=0, context=context)
|
544
610
|
except WorkflowException as err:
|
545
611
|
context.update(
|
546
|
-
{
|
612
|
+
{
|
613
|
+
"error": err,
|
614
|
+
"error_message": f"{err.__class__.__name__}: {err}",
|
615
|
+
},
|
547
616
|
)
|
548
|
-
|
617
|
+
status = 1
|
618
|
+
return rs.catch(status=status, context=context)
|
549
619
|
|
550
620
|
def __exec_threading(
|
551
621
|
self,
|
@@ -556,11 +626,15 @@ class Workflow(BaseModel):
|
|
556
626
|
worker: int = 2,
|
557
627
|
timeout: int = 600,
|
558
628
|
) -> DictData:
|
559
|
-
"""Workflow threading
|
629
|
+
"""Workflow execution by threading strategy.
|
630
|
+
|
631
|
+
If a job need dependency, it will check dependency job ID from
|
632
|
+
context data before allow it run.
|
560
633
|
|
561
634
|
:param context: A context workflow data that want to downstream passing.
|
562
635
|
:param ts: A start timestamp that use for checking execute time should
|
563
636
|
timeout.
|
637
|
+
:param job_queue: A job queue object.
|
564
638
|
:param timeout: A second value unit that bounding running time.
|
565
639
|
:param worker: A number of threading executor pool size.
|
566
640
|
:rtype: DictData
|
@@ -594,18 +668,24 @@ class Workflow(BaseModel):
|
|
594
668
|
params=copy.deepcopy(context),
|
595
669
|
),
|
596
670
|
)
|
671
|
+
|
672
|
+
# NOTE: Mark this job queue done.
|
597
673
|
job_queue.task_done()
|
598
674
|
|
599
675
|
# NOTE: Wait for all items to finish processing
|
600
676
|
job_queue.join()
|
601
677
|
|
602
|
-
for future in as_completed(futures):
|
678
|
+
for future in as_completed(futures, timeout=1800):
|
603
679
|
if err := future.exception():
|
604
680
|
logger.error(f"{err}")
|
605
681
|
raise WorkflowException(f"{err}")
|
606
|
-
|
607
|
-
|
608
|
-
|
682
|
+
try:
|
683
|
+
# NOTE: Update job result to workflow result.
|
684
|
+
context["jobs"].update(future.result(timeout=60).context)
|
685
|
+
except TimeoutError as err:
|
686
|
+
raise WorkflowException(
|
687
|
+
"Get result from future was timeout"
|
688
|
+
) from err
|
609
689
|
|
610
690
|
if not_time_out_flag:
|
611
691
|
return context
|
@@ -627,8 +707,11 @@ class Workflow(BaseModel):
|
|
627
707
|
*,
|
628
708
|
timeout: int = 600,
|
629
709
|
) -> DictData:
|
630
|
-
"""Workflow non-threading
|
631
|
-
and waiting previous run successful.
|
710
|
+
"""Workflow execution with non-threading strategy that use sequential
|
711
|
+
job running and waiting previous job was run successful.
|
712
|
+
|
713
|
+
If a job need dependency, it will check dependency job ID from
|
714
|
+
context data before allow it run.
|
632
715
|
|
633
716
|
:param context: A context workflow data that want to downstream passing.
|
634
717
|
:param ts: A start timestamp that use for checking execute time should
|
@@ -654,9 +737,14 @@ class Workflow(BaseModel):
|
|
654
737
|
time.sleep(0.25)
|
655
738
|
continue
|
656
739
|
|
657
|
-
# NOTE: Start job execution.
|
658
|
-
job_rs = self.execute_job(
|
740
|
+
# NOTE: Start workflow job execution.
|
741
|
+
job_rs = self.execute_job(
|
742
|
+
job_id=job_id,
|
743
|
+
params=copy.deepcopy(context),
|
744
|
+
)
|
659
745
|
context["jobs"].update(job_rs.context)
|
746
|
+
|
747
|
+
# NOTE: Mark this job queue done.
|
660
748
|
job_queue.task_done()
|
661
749
|
|
662
750
|
# NOTE: Wait for all items to finish processing
|
@@ -674,8 +762,12 @@ class Workflow(BaseModel):
|
|
674
762
|
)
|
675
763
|
|
676
764
|
|
677
|
-
class
|
678
|
-
"""Workflow
|
765
|
+
class ScheduleWorkflow(BaseModel):
|
766
|
+
"""Schedule Workflow Pydantic model that use to keep workflow model for the
|
767
|
+
Schedule model. it should not use Workflow model directly because on the
|
768
|
+
schedule config it can adjust crontab value that different from the Workflow
|
769
|
+
model.
|
770
|
+
"""
|
679
771
|
|
680
772
|
name: str = Field(description="A workflow name.")
|
681
773
|
on: list[On] = Field(
|
@@ -688,17 +780,26 @@ class WorkflowSchedule(BaseModel):
|
|
688
780
|
)
|
689
781
|
|
690
782
|
@model_validator(mode="before")
|
691
|
-
def
|
692
|
-
"""Prepare incoming values before validating with model fields.
|
783
|
+
def __prepare_values(cls, values: DictData) -> DictData:
|
784
|
+
"""Prepare incoming values before validating with model fields.
|
693
785
|
|
786
|
+
:rtype: DictData
|
787
|
+
"""
|
694
788
|
values["name"] = values["name"].replace(" ", "_")
|
695
789
|
|
696
790
|
cls.__bypass_on(values)
|
697
791
|
return values
|
698
792
|
|
699
793
|
@classmethod
|
700
|
-
def __bypass_on(
|
701
|
-
|
794
|
+
def __bypass_on(
|
795
|
+
cls,
|
796
|
+
data: DictData,
|
797
|
+
externals: DictData | None = None,
|
798
|
+
) -> DictData:
|
799
|
+
"""Bypass the on data to loaded config data.
|
800
|
+
|
801
|
+
:rtype: DictData
|
802
|
+
"""
|
702
803
|
if on := data.pop("on", []):
|
703
804
|
|
704
805
|
if isinstance(on, str):
|
@@ -731,9 +832,9 @@ class Schedule(BaseModel):
|
|
731
832
|
"A schedule description that can be string of markdown content."
|
732
833
|
),
|
733
834
|
)
|
734
|
-
workflows: list[
|
835
|
+
workflows: list[ScheduleWorkflow] = Field(
|
735
836
|
default_factory=list,
|
736
|
-
description="A list of
|
837
|
+
description="A list of ScheduleWorkflow models.",
|
737
838
|
)
|
738
839
|
|
739
840
|
@classmethod
|
@@ -742,6 +843,15 @@ class Schedule(BaseModel):
|
|
742
843
|
name: str,
|
743
844
|
externals: DictData | None = None,
|
744
845
|
) -> Self:
|
846
|
+
"""Create Schedule instance from the Loader object that only receive
|
847
|
+
an input schedule name. The loader object will use this schedule name to
|
848
|
+
searching configuration data of this schedule model in conf path.
|
849
|
+
|
850
|
+
:param name: A schedule name that want to pass to Loader object.
|
851
|
+
:param externals: An external parameters that want to pass to Loader
|
852
|
+
object.
|
853
|
+
:rtype: Self
|
854
|
+
"""
|
745
855
|
loader: Loader = Loader(name, externals=(externals or {}))
|
746
856
|
|
747
857
|
# NOTE: Validate the config type match with current connection model
|
@@ -762,18 +872,18 @@ class Schedule(BaseModel):
|
|
762
872
|
running: dict[str, list[datetime]],
|
763
873
|
*,
|
764
874
|
externals: DictData | None = None,
|
765
|
-
) -> list[
|
875
|
+
) -> list[WorkflowTaskData]:
|
766
876
|
"""Generate Task from the current datetime.
|
767
877
|
|
768
878
|
:param start_date: A start date that get from the workflow schedule.
|
769
879
|
:param queue: A mapping of name and list of datetime for queue.
|
770
880
|
:param running: A mapping of name and list of datetime for running.
|
771
881
|
:param externals: An external parameters that pass to the Loader object.
|
772
|
-
:rtype: list[
|
882
|
+
:rtype: list[WorkflowTaskData]
|
773
883
|
"""
|
774
884
|
|
775
885
|
# NOTE: Create pair of workflow and on.
|
776
|
-
workflow_tasks: list[
|
886
|
+
workflow_tasks: list[WorkflowTaskData] = []
|
777
887
|
externals: DictData = externals or {}
|
778
888
|
|
779
889
|
for wfs in self.workflows:
|
@@ -783,16 +893,20 @@ class Schedule(BaseModel):
|
|
783
893
|
queue[wfs.name]: list[datetime] = []
|
784
894
|
running[wfs.name]: list[datetime] = []
|
785
895
|
|
786
|
-
|
896
|
+
# NOTE: Create default on if it does not passing on the Schedule.
|
897
|
+
_ons: list[On] = wf.on.copy() if len(wfs.on) == 0 else wfs.on
|
898
|
+
|
899
|
+
for on in _ons:
|
787
900
|
on_gen = on.generate(start_date)
|
788
901
|
next_running_date = on_gen.next
|
789
902
|
while next_running_date in queue[wfs.name]:
|
790
903
|
next_running_date = on_gen.next
|
791
904
|
|
905
|
+
# NOTE: Push the next running date to queue list.
|
792
906
|
heappush(queue[wfs.name], next_running_date)
|
793
907
|
|
794
908
|
workflow_tasks.append(
|
795
|
-
|
909
|
+
WorkflowTaskData(
|
796
910
|
workflow=wf,
|
797
911
|
on=on,
|
798
912
|
params=wfs.params,
|
@@ -804,44 +918,47 @@ class Schedule(BaseModel):
|
|
804
918
|
return workflow_tasks
|
805
919
|
|
806
920
|
|
807
|
-
def catch_exceptions(
|
808
|
-
|
921
|
+
def catch_exceptions(
|
922
|
+
cancel_on_failure: bool = False,
|
923
|
+
) -> Callable[P, Optional[CancelJob]]:
|
924
|
+
"""Catch exception error from scheduler job that running with schedule
|
925
|
+
package and return CancelJob if this function raise an error.
|
809
926
|
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
return func(*args, **kwargs)
|
815
|
-
except Exception as err:
|
816
|
-
logger.exception(err)
|
817
|
-
if cancel_on_failure:
|
818
|
-
return CancelJob
|
927
|
+
:param cancel_on_failure: A flag that allow to return the CancelJob or not
|
928
|
+
it will raise.
|
929
|
+
:rtype: Callable[P, Optional[CancelJob]]
|
930
|
+
"""
|
819
931
|
|
820
|
-
|
932
|
+
def decorator(
|
933
|
+
func: Callable[P, Optional[CancelJob]],
|
934
|
+
) -> Callable[P, Optional[CancelJob]]:
|
935
|
+
try:
|
936
|
+
# NOTE: Check the function that want to handle is method or not.
|
937
|
+
if inspect.ismethod(func):
|
821
938
|
|
822
|
-
|
939
|
+
@wraps(func)
|
940
|
+
def wrapper(self, *args, **kwargs):
|
941
|
+
return func(self, *args, **kwargs)
|
823
942
|
|
943
|
+
return wrapper
|
824
944
|
|
825
|
-
|
826
|
-
|
945
|
+
@wraps(func)
|
946
|
+
def wrapper(*args, **kwargs):
|
947
|
+
return func(*args, **kwargs)
|
827
948
|
|
828
|
-
|
829
|
-
@wraps(func)
|
830
|
-
def wrapper(self, *args, **kwargs):
|
831
|
-
try:
|
832
|
-
return func(self, *args, **kwargs)
|
833
|
-
except Exception as err:
|
834
|
-
logger.exception(err)
|
835
|
-
if cancel_on_failure:
|
836
|
-
return CancelJob
|
949
|
+
return wrapper
|
837
950
|
|
838
|
-
|
951
|
+
except Exception as err:
|
952
|
+
logger.exception(err)
|
953
|
+
if cancel_on_failure:
|
954
|
+
return CancelJob
|
955
|
+
raise err
|
839
956
|
|
840
|
-
return
|
957
|
+
return decorator
|
841
958
|
|
842
959
|
|
843
960
|
@dataclass(frozen=True)
|
844
|
-
class
|
961
|
+
class WorkflowTaskData:
|
845
962
|
"""Workflow task dataclass that use to keep mapping data and objects for
|
846
963
|
passing in multithreading task.
|
847
964
|
"""
|
@@ -852,20 +969,29 @@ class WorkflowTask:
|
|
852
969
|
queue: list[datetime] = field(compare=False, hash=False)
|
853
970
|
running: list[datetime] = field(compare=False, hash=False)
|
854
971
|
|
855
|
-
@
|
856
|
-
def release(
|
972
|
+
@catch_exceptions(cancel_on_failure=True)
|
973
|
+
def release(
|
974
|
+
self,
|
975
|
+
log: Log | None = None,
|
976
|
+
*,
|
977
|
+
waiting_sec: int = 60,
|
978
|
+
sleep_interval: int = 15,
|
979
|
+
) -> None:
|
857
980
|
"""Workflow release, it will use with the same logic of
|
858
981
|
`workflow.release` method.
|
859
982
|
|
860
|
-
:param log: A log object
|
983
|
+
:param log: A log object for saving result logging from workflow
|
984
|
+
execution process.
|
985
|
+
:param waiting_sec: A second period value that allow workflow execute.
|
986
|
+
:param sleep_interval: A second value that want to waiting until time
|
987
|
+
to execute.
|
861
988
|
"""
|
862
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
863
989
|
log: Log = log or FileLog
|
864
990
|
wf: Workflow = self.workflow
|
865
991
|
on: On = self.on
|
866
992
|
|
867
993
|
gen: CronRunner = on.generate(
|
868
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
994
|
+
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
869
995
|
)
|
870
996
|
cron_tz: ZoneInfo = gen.tz
|
871
997
|
|
@@ -882,7 +1008,7 @@ class WorkflowTask:
|
|
882
1008
|
)
|
883
1009
|
heappush(self.running[wf.name], next_time)
|
884
1010
|
|
885
|
-
if get_diff_sec(next_time, tz=cron_tz) >
|
1011
|
+
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
886
1012
|
logger.debug(
|
887
1013
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
888
1014
|
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
@@ -902,7 +1028,9 @@ class WorkflowTask:
|
|
902
1028
|
)
|
903
1029
|
|
904
1030
|
# NOTE: Release when the time is nearly to schedule time.
|
905
|
-
while (duration := get_diff_sec(next_time, tz=tz)) > (
|
1031
|
+
while (duration := get_diff_sec(next_time, tz=config.tz)) > (
|
1032
|
+
sleep_interval + 5
|
1033
|
+
):
|
906
1034
|
logger.debug(
|
907
1035
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
908
1036
|
f": Sleep until: {duration}"
|
@@ -967,21 +1095,17 @@ class WorkflowTask:
|
|
967
1095
|
heappush(self.queue[wf.name], future_running_time)
|
968
1096
|
logger.debug(f"[CORE]: {'-' * 100}")
|
969
1097
|
|
970
|
-
def __eq__(self, other):
|
971
|
-
if isinstance(other,
|
1098
|
+
def __eq__(self, other) -> bool:
|
1099
|
+
if isinstance(other, WorkflowTaskData):
|
972
1100
|
return (
|
973
1101
|
self.workflow.name == other.workflow.name
|
974
1102
|
and self.on.cronjob == other.on.cronjob
|
975
1103
|
)
|
976
1104
|
|
977
1105
|
|
978
|
-
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
979
|
-
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
980
|
-
|
981
|
-
|
982
1106
|
@catch_exceptions(cancel_on_failure=True)
|
983
1107
|
def workflow_task(
|
984
|
-
workflow_tasks: list[
|
1108
|
+
workflow_tasks: list[WorkflowTaskData],
|
985
1109
|
stop: datetime,
|
986
1110
|
threads: dict[str, Thread],
|
987
1111
|
) -> CancelJob | None:
|
@@ -995,11 +1119,10 @@ def workflow_task(
|
|
995
1119
|
:param threads:
|
996
1120
|
:rtype: CancelJob | None
|
997
1121
|
"""
|
998
|
-
|
999
|
-
start_date: datetime = datetime.now(tz=tz)
|
1122
|
+
start_date: datetime = datetime.now(tz=config.tz)
|
1000
1123
|
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
1001
1124
|
|
1002
|
-
if start_date > stop.replace(tzinfo=tz):
|
1125
|
+
if start_date > stop.replace(tzinfo=config.tz):
|
1003
1126
|
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
1004
1127
|
while len(threads) > 0:
|
1005
1128
|
logger.warning(
|
@@ -1116,9 +1239,8 @@ def workflow_control(
|
|
1116
1239
|
"Should install schedule package before use this module."
|
1117
1240
|
) from None
|
1118
1241
|
|
1119
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
1120
1242
|
schedule: Scheduler = Scheduler()
|
1121
|
-
start_date: datetime = datetime.now(tz=tz)
|
1243
|
+
start_date: datetime = datetime.now(tz=config.tz)
|
1122
1244
|
|
1123
1245
|
# NOTE: Design workflow queue caching.
|
1124
1246
|
# ---
|
@@ -1133,7 +1255,7 @@ def workflow_control(
|
|
1133
1255
|
)
|
1134
1256
|
|
1135
1257
|
# NOTE: Create pair of workflow and on from schedule model.
|
1136
|
-
workflow_tasks: list[
|
1258
|
+
workflow_tasks: list[WorkflowTaskData] = []
|
1137
1259
|
for name in schedules:
|
1138
1260
|
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
1139
1261
|
workflow_tasks.extend(
|
@@ -1204,8 +1326,8 @@ def workflow_runner(
|
|
1204
1326
|
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
1205
1327
|
value to multiprocess executor pool.
|
1206
1328
|
|
1207
|
-
|
1208
|
-
|
1329
|
+
The current workflow logic that split to process will be below diagram:
|
1330
|
+
|
1209
1331
|
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
1210
1332
|
workflow task 01 01
|
1211
1333
|
--> thread of release
|