ddeutil-workflow 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +4 -1
- ddeutil/workflow/__types.py +59 -10
- ddeutil/workflow/api.py +2 -2
- ddeutil/workflow/conf.py +45 -0
- ddeutil/workflow/cron.py +19 -12
- ddeutil/workflow/job.py +191 -153
- ddeutil/workflow/log.py +28 -14
- ddeutil/workflow/scheduler.py +255 -119
- ddeutil/workflow/stage.py +77 -35
- ddeutil/workflow/utils.py +129 -51
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.15.dist-info}/METADATA +6 -4
- ddeutil_workflow-0.0.15.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.15.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.13.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.15.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.15.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.15.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -11,34 +11,39 @@ import json
|
|
11
11
|
import logging
|
12
12
|
import os
|
13
13
|
import time
|
14
|
-
from collections.abc import Iterator
|
15
14
|
from concurrent.futures import (
|
16
15
|
Future,
|
17
16
|
ProcessPoolExecutor,
|
18
17
|
ThreadPoolExecutor,
|
19
18
|
as_completed,
|
20
19
|
)
|
21
|
-
from dataclasses import
|
20
|
+
from dataclasses import field
|
22
21
|
from datetime import datetime, timedelta
|
23
22
|
from functools import wraps
|
24
23
|
from heapq import heappush
|
25
24
|
from queue import Queue
|
26
25
|
from textwrap import dedent
|
27
26
|
from threading import Thread
|
28
|
-
from typing import Optional
|
27
|
+
from typing import Callable, Optional
|
29
28
|
from zoneinfo import ZoneInfo
|
30
29
|
|
31
|
-
from dotenv import load_dotenv
|
32
30
|
from pydantic import BaseModel, Field
|
31
|
+
from pydantic.dataclasses import dataclass
|
33
32
|
from pydantic.functional_validators import field_validator, model_validator
|
34
33
|
from typing_extensions import Self
|
35
34
|
|
35
|
+
try:
|
36
|
+
from typing import ParamSpec
|
37
|
+
except ImportError:
|
38
|
+
from typing_extensions import ParamSpec
|
39
|
+
|
36
40
|
try:
|
37
41
|
from schedule import CancelJob
|
38
42
|
except ImportError:
|
39
43
|
CancelJob = None
|
40
44
|
|
41
45
|
from .__types import DictData, TupleStr
|
46
|
+
from .conf import config
|
42
47
|
from .cron import CronRunner
|
43
48
|
from .exceptions import JobException, WorkflowException
|
44
49
|
from .job import Job
|
@@ -54,9 +59,10 @@ from .utils import (
|
|
54
59
|
get_diff_sec,
|
55
60
|
has_template,
|
56
61
|
param2template,
|
62
|
+
queue2str,
|
57
63
|
)
|
58
64
|
|
59
|
-
|
65
|
+
P = ParamSpec("P")
|
60
66
|
logger = get_logger("ddeutil.workflow")
|
61
67
|
|
62
68
|
# NOTE: Adjust logging level on the schedule package.
|
@@ -65,9 +71,9 @@ logging.getLogger("schedule").setLevel(logging.INFO)
|
|
65
71
|
|
66
72
|
__all__: TupleStr = (
|
67
73
|
"Workflow",
|
68
|
-
"
|
69
|
-
"WorkflowTask",
|
74
|
+
"WorkflowTaskData",
|
70
75
|
"Schedule",
|
76
|
+
"ScheduleWorkflow",
|
71
77
|
"workflow_task",
|
72
78
|
"workflow_long_running_task",
|
73
79
|
"workflow_control",
|
@@ -76,10 +82,10 @@ __all__: TupleStr = (
|
|
76
82
|
|
77
83
|
|
78
84
|
class Workflow(BaseModel):
|
79
|
-
"""Workflow Model this is the main future of this project because
|
80
|
-
be workflow data for running everywhere that you want or using it
|
81
|
-
scheduler task in background. It use lightweight coding line from
|
82
|
-
Model and enhance execute method on it.
|
85
|
+
"""Workflow Pydantic Model this is the main future of this project because
|
86
|
+
it use to be workflow data for running everywhere that you want or using it
|
87
|
+
to scheduler task in background. It use lightweight coding line from
|
88
|
+
Pydantic Model and enhance execute method on it.
|
83
89
|
"""
|
84
90
|
|
85
91
|
name: str = Field(description="A workflow name.")
|
@@ -91,7 +97,7 @@ class Workflow(BaseModel):
|
|
91
97
|
)
|
92
98
|
params: dict[str, Param] = Field(
|
93
99
|
default_factory=dict,
|
94
|
-
description="A parameters that
|
100
|
+
description="A parameters that need to use on this workflow.",
|
95
101
|
)
|
96
102
|
on: list[On] = Field(
|
97
103
|
default_factory=list,
|
@@ -103,14 +109,19 @@ class Workflow(BaseModel):
|
|
103
109
|
)
|
104
110
|
run_id: Optional[str] = Field(
|
105
111
|
default=None,
|
106
|
-
description=
|
112
|
+
description=(
|
113
|
+
"A running workflow ID that is able to change after initialize."
|
114
|
+
),
|
107
115
|
repr=False,
|
108
116
|
exclude=True,
|
109
117
|
)
|
110
118
|
|
111
119
|
@property
|
112
120
|
def new_run_id(self) -> str:
|
113
|
-
"""Running ID of this workflow that always generate new unique value.
|
121
|
+
"""Running ID of this workflow that always generate new unique value.
|
122
|
+
|
123
|
+
:rtype: str
|
124
|
+
"""
|
114
125
|
return gen_id(self.name, unique=True)
|
115
126
|
|
116
127
|
@classmethod
|
@@ -144,8 +155,17 @@ class Workflow(BaseModel):
|
|
144
155
|
return cls.model_validate(obj=loader_data)
|
145
156
|
|
146
157
|
@classmethod
|
147
|
-
def __bypass_on(
|
148
|
-
|
158
|
+
def __bypass_on(
|
159
|
+
cls,
|
160
|
+
data: DictData,
|
161
|
+
externals: DictData | None = None,
|
162
|
+
) -> DictData:
|
163
|
+
"""Bypass the on data to loaded config data.
|
164
|
+
|
165
|
+
:param data:
|
166
|
+
:param externals:
|
167
|
+
:rtype: DictData
|
168
|
+
"""
|
149
169
|
if on := data.pop("on", []):
|
150
170
|
if isinstance(on, str):
|
151
171
|
on = [on]
|
@@ -180,12 +200,18 @@ class Workflow(BaseModel):
|
|
180
200
|
|
181
201
|
@field_validator("desc", mode="after")
|
182
202
|
def ___prepare_desc(cls, value: str) -> str:
|
183
|
-
"""Prepare description string that was created on a template.
|
203
|
+
"""Prepare description string that was created on a template.
|
204
|
+
|
205
|
+
:rtype: str
|
206
|
+
"""
|
184
207
|
return dedent(value)
|
185
208
|
|
186
209
|
@model_validator(mode="after")
|
187
|
-
def __validate_jobs_need_and_prepare_running_id(self):
|
188
|
-
"""Validate each need job in any jobs should exists.
|
210
|
+
def __validate_jobs_need_and_prepare_running_id(self) -> Self:
|
211
|
+
"""Validate each need job in any jobs should exists.
|
212
|
+
|
213
|
+
:rtype: Self
|
214
|
+
"""
|
189
215
|
for job in self.jobs:
|
190
216
|
if not_exist := [
|
191
217
|
need for need in self.jobs[job].needs if need not in self.jobs
|
@@ -221,7 +247,7 @@ class Workflow(BaseModel):
|
|
221
247
|
return self.model_copy(update={"run_id": run_id})
|
222
248
|
|
223
249
|
def job(self, name: str) -> Job:
|
224
|
-
"""Return
|
250
|
+
"""Return this workflow's job that already created on this job field.
|
225
251
|
|
226
252
|
:param name: A job name that want to get from a mapping of job models.
|
227
253
|
:type name: str
|
@@ -237,11 +263,18 @@ class Workflow(BaseModel):
|
|
237
263
|
return self.jobs[name]
|
238
264
|
|
239
265
|
def parameterize(self, params: DictData) -> DictData:
|
240
|
-
"""Prepare parameters before
|
241
|
-
|
242
|
-
|
266
|
+
"""Prepare a passing parameters before use it in execution process.
|
267
|
+
This method will validate keys of an incoming params with this object
|
268
|
+
necessary params field and then create a jobs key to result mapping
|
269
|
+
that will keep any execution result from its job.
|
270
|
+
|
271
|
+
... {
|
272
|
+
... "params": <an-incoming-params>,
|
273
|
+
... "jobs": {}
|
274
|
+
... }
|
243
275
|
|
244
276
|
:param params: A parameter mapping that receive from workflow execution.
|
277
|
+
:type params: DictData
|
245
278
|
:rtype: DictData
|
246
279
|
"""
|
247
280
|
# VALIDATE: Incoming params should have keys that set on this workflow.
|
@@ -255,7 +288,7 @@ class Workflow(BaseModel):
|
|
255
288
|
f"{', '.join(check_key)}."
|
256
289
|
)
|
257
290
|
|
258
|
-
# NOTE:
|
291
|
+
# NOTE: Mapping type of param before adding it to the ``params`` key.
|
259
292
|
return {
|
260
293
|
"params": (
|
261
294
|
params
|
@@ -299,9 +332,8 @@ class Workflow(BaseModel):
|
|
299
332
|
f"queue id: {id(queue)}"
|
300
333
|
)
|
301
334
|
log: Log = log or FileLog
|
302
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
303
335
|
gen: CronRunner = on.generate(
|
304
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
336
|
+
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
305
337
|
+ timedelta(seconds=1)
|
306
338
|
)
|
307
339
|
cron_tz: ZoneInfo = gen.tz
|
@@ -456,35 +488,54 @@ class Workflow(BaseModel):
|
|
456
488
|
|
457
489
|
def execute_job(
|
458
490
|
self,
|
459
|
-
|
491
|
+
job_id: str,
|
460
492
|
params: DictData,
|
493
|
+
*,
|
494
|
+
raise_error: bool = True,
|
461
495
|
) -> Result:
|
462
|
-
"""Job
|
496
|
+
"""Workflow Job execution with passing dynamic parameters from the
|
497
|
+
workflow execution to the target job.
|
463
498
|
|
464
|
-
|
499
|
+
This execution is the minimum level of execution of this workflow
|
500
|
+
model. It different with ``self.execute`` because this method run only
|
501
|
+
one job and return with context of this job data.
|
502
|
+
|
503
|
+
:param job_id: A job ID that want to execute.
|
465
504
|
:param params: A params that was parameterized from workflow execution.
|
505
|
+
:param raise_error: A flag that raise error instead catching to result
|
506
|
+
if it get exception from job execution.
|
466
507
|
:rtype: Result
|
467
508
|
"""
|
468
509
|
# VALIDATE: check a job ID that exists in this workflow or not.
|
469
|
-
if
|
510
|
+
if job_id not in self.jobs:
|
470
511
|
raise WorkflowException(
|
471
|
-
f"The job ID: {
|
512
|
+
f"The job ID: {job_id} does not exists in {self.name!r} "
|
513
|
+
f"workflow."
|
472
514
|
)
|
473
|
-
try:
|
474
|
-
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job!r}")
|
475
515
|
|
476
|
-
|
477
|
-
# Change any job running IDs to this workflow running ID.
|
478
|
-
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
479
|
-
j_rs: Result = job_obj.execute(params=params)
|
516
|
+
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
|
480
517
|
|
518
|
+
# IMPORTANT:
|
519
|
+
# Change any job running IDs to this workflow running ID.
|
520
|
+
#
|
521
|
+
try:
|
522
|
+
job: Job = self.jobs[job_id].get_running_id(self.run_id)
|
523
|
+
job.set_outputs(
|
524
|
+
job.execute(params=params).context,
|
525
|
+
to=params,
|
526
|
+
)
|
481
527
|
except JobException as err:
|
482
|
-
|
528
|
+
logger.error(
|
529
|
+
f"({self.run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
|
530
|
+
)
|
531
|
+
if raise_error:
|
532
|
+
raise WorkflowException(
|
533
|
+
f"Get job execution error {job_id}: JobException: {err}"
|
534
|
+
) from None
|
535
|
+
else:
|
536
|
+
raise NotImplementedError() from None
|
483
537
|
|
484
|
-
return Result(
|
485
|
-
status=j_rs.status,
|
486
|
-
context={job: job_obj.set_outputs(j_rs.context)},
|
487
|
-
)
|
538
|
+
return Result(status=0, context=params)
|
488
539
|
|
489
540
|
def execute(
|
490
541
|
self,
|
@@ -492,17 +543,8 @@ class Workflow(BaseModel):
|
|
492
543
|
*,
|
493
544
|
timeout: int = 60,
|
494
545
|
) -> Result:
|
495
|
-
"""Execute workflow with passing dynamic parameters to
|
496
|
-
included in
|
497
|
-
|
498
|
-
:param params: An input parameters that use on workflow execution that
|
499
|
-
will parameterize before using it.
|
500
|
-
:param timeout: A workflow execution time out in second unit that use
|
501
|
-
for limit time of execution and waiting job dependency.
|
502
|
-
:rtype: Result
|
503
|
-
|
504
|
-
See Also:
|
505
|
-
---
|
546
|
+
"""Execute workflow with passing a dynamic parameters to all jobs that
|
547
|
+
included in this workflow model with ``jobs`` field.
|
506
548
|
|
507
549
|
The result of execution process for each jobs and stages on this
|
508
550
|
workflow will keeping in dict which able to catch out with all jobs and
|
@@ -513,10 +555,22 @@ class Workflow(BaseModel):
|
|
513
555
|
|
514
556
|
... ${job-name}.stages.${stage-id}.outputs.${key}
|
515
557
|
|
558
|
+
:param params: An input parameters that use on workflow execution that
|
559
|
+
will parameterize before using it. Default is None.
|
560
|
+
:type params: DictData | None
|
561
|
+
:param timeout: A workflow execution time out in second unit that use
|
562
|
+
for limit time of execution and waiting job dependency. Default is
|
563
|
+
60 seconds.
|
564
|
+
:type timeout: int
|
565
|
+
:rtype: Result
|
516
566
|
"""
|
517
567
|
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
518
|
-
|
568
|
+
|
569
|
+
# NOTE: I use this condition because this method allow passing empty
|
570
|
+
# params and I do not want to create new dict object.
|
571
|
+
params: DictData = {} if params is None else params
|
519
572
|
ts: float = time.monotonic()
|
573
|
+
rs: Result = Result()
|
520
574
|
|
521
575
|
# NOTE: It should not do anything if it does not have job.
|
522
576
|
if not self.jobs:
|
@@ -524,7 +578,7 @@ class Workflow(BaseModel):
|
|
524
578
|
f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
525
579
|
f"does not have any jobs"
|
526
580
|
)
|
527
|
-
return
|
581
|
+
return rs.catch(status=0, context=params)
|
528
582
|
|
529
583
|
# NOTE: Create a job queue that keep the job that want to running after
|
530
584
|
# it dependency condition.
|
@@ -532,24 +586,41 @@ class Workflow(BaseModel):
|
|
532
586
|
for job_id in self.jobs:
|
533
587
|
jq.put(job_id)
|
534
588
|
|
535
|
-
# NOTE: Create
|
536
|
-
#
|
589
|
+
# NOTE: Create data context that will pass to any job executions
|
590
|
+
# on this workflow.
|
591
|
+
#
|
592
|
+
# {
|
593
|
+
# 'params': <input-params>,
|
594
|
+
# 'jobs': {},
|
595
|
+
# }
|
596
|
+
#
|
537
597
|
context: DictData = self.parameterize(params)
|
598
|
+
status: int = 0
|
538
599
|
try:
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
600
|
+
if config.max_job_parallel == 1:
|
601
|
+
self.__exec_non_threading(
|
602
|
+
context=context,
|
603
|
+
ts=ts,
|
604
|
+
job_queue=jq,
|
605
|
+
timeout=timeout,
|
606
|
+
)
|
607
|
+
else:
|
608
|
+
self.__exec_threading(
|
609
|
+
context=context,
|
610
|
+
ts=ts,
|
611
|
+
job_queue=jq,
|
612
|
+
worker=config.max_job_parallel,
|
613
|
+
timeout=timeout,
|
545
614
|
)
|
546
|
-
)
|
547
|
-
return Result(status=0, context=context)
|
548
615
|
except WorkflowException as err:
|
549
616
|
context.update(
|
550
|
-
{
|
617
|
+
{
|
618
|
+
"error": err,
|
619
|
+
"error_message": f"{err.__class__.__name__}: {err}",
|
620
|
+
},
|
551
621
|
)
|
552
|
-
|
622
|
+
status = 1
|
623
|
+
return rs.catch(status=status, context=context)
|
553
624
|
|
554
625
|
def __exec_threading(
|
555
626
|
self,
|
@@ -560,11 +631,15 @@ class Workflow(BaseModel):
|
|
560
631
|
worker: int = 2,
|
561
632
|
timeout: int = 600,
|
562
633
|
) -> DictData:
|
563
|
-
"""Workflow threading
|
634
|
+
"""Workflow execution by threading strategy.
|
635
|
+
|
636
|
+
If a job need dependency, it will check dependency job ID from
|
637
|
+
context data before allow it run.
|
564
638
|
|
565
639
|
:param context: A context workflow data that want to downstream passing.
|
566
640
|
:param ts: A start timestamp that use for checking execute time should
|
567
641
|
timeout.
|
642
|
+
:param job_queue: A job queue object.
|
568
643
|
:param timeout: A second value unit that bounding running time.
|
569
644
|
:param worker: A number of threading executor pool size.
|
570
645
|
:rtype: DictData
|
@@ -587,29 +662,42 @@ class Workflow(BaseModel):
|
|
587
662
|
job: Job = self.jobs[job_id]
|
588
663
|
|
589
664
|
if any(need not in context["jobs"] for need in job.needs):
|
665
|
+
job_queue.task_done()
|
590
666
|
job_queue.put(job_id)
|
591
667
|
time.sleep(0.25)
|
592
668
|
continue
|
593
669
|
|
670
|
+
# NOTE: Start workflow job execution with deep copy context data
|
671
|
+
# before release.
|
672
|
+
#
|
673
|
+
# {
|
674
|
+
# 'params': <input-params>,
|
675
|
+
# 'jobs': {},
|
676
|
+
# }
|
594
677
|
futures.append(
|
595
678
|
executor.submit(
|
596
679
|
self.execute_job,
|
597
680
|
job_id,
|
598
|
-
params=
|
681
|
+
params=context,
|
599
682
|
),
|
600
683
|
)
|
684
|
+
|
685
|
+
# NOTE: Mark this job queue done.
|
601
686
|
job_queue.task_done()
|
602
687
|
|
603
688
|
# NOTE: Wait for all items to finish processing
|
604
689
|
job_queue.join()
|
605
690
|
|
606
|
-
for future in as_completed(futures):
|
691
|
+
for future in as_completed(futures, timeout=1800):
|
607
692
|
if err := future.exception():
|
608
|
-
logger.error(f"{err}")
|
693
|
+
logger.error(f"({self.run_id}) [CORE]: {err}")
|
609
694
|
raise WorkflowException(f"{err}")
|
610
|
-
|
611
|
-
|
612
|
-
|
695
|
+
try:
|
696
|
+
future.result(timeout=60)
|
697
|
+
except TimeoutError as err:
|
698
|
+
raise WorkflowException(
|
699
|
+
"Timeout when getting result from future"
|
700
|
+
) from err
|
613
701
|
|
614
702
|
if not_time_out_flag:
|
615
703
|
return context
|
@@ -631,8 +719,11 @@ class Workflow(BaseModel):
|
|
631
719
|
*,
|
632
720
|
timeout: int = 600,
|
633
721
|
) -> DictData:
|
634
|
-
"""Workflow non-threading
|
635
|
-
and waiting previous run successful.
|
722
|
+
"""Workflow execution with non-threading strategy that use sequential
|
723
|
+
job running and waiting previous job was run successful.
|
724
|
+
|
725
|
+
If a job need dependency, it will check dependency job ID from
|
726
|
+
context data before allow it run.
|
636
727
|
|
637
728
|
:param context: A context workflow data that want to downstream passing.
|
638
729
|
:param ts: A start timestamp that use for checking execute time should
|
@@ -652,15 +743,23 @@ class Workflow(BaseModel):
|
|
652
743
|
job_id: str = job_queue.get()
|
653
744
|
job: Job = self.jobs[job_id]
|
654
745
|
|
655
|
-
# NOTE:
|
746
|
+
# NOTE: Waiting dependency job run successful before release.
|
656
747
|
if any(need not in context["jobs"] for need in job.needs):
|
748
|
+
job_queue.task_done()
|
657
749
|
job_queue.put(job_id)
|
658
|
-
time.sleep(0.
|
750
|
+
time.sleep(0.05)
|
659
751
|
continue
|
660
752
|
|
661
|
-
# NOTE: Start job execution
|
662
|
-
|
663
|
-
|
753
|
+
# NOTE: Start workflow job execution with deep copy context data
|
754
|
+
# before release.
|
755
|
+
#
|
756
|
+
# {
|
757
|
+
# 'params': <input-params>,
|
758
|
+
# 'jobs': {},
|
759
|
+
# }
|
760
|
+
self.execute_job(job_id=job_id, params=context)
|
761
|
+
|
762
|
+
# NOTE: Mark this job queue done.
|
664
763
|
job_queue.task_done()
|
665
764
|
|
666
765
|
# NOTE: Wait for all items to finish processing
|
@@ -678,8 +777,12 @@ class Workflow(BaseModel):
|
|
678
777
|
)
|
679
778
|
|
680
779
|
|
681
|
-
class
|
682
|
-
"""Workflow
|
780
|
+
class ScheduleWorkflow(BaseModel):
|
781
|
+
"""Schedule Workflow Pydantic model that use to keep workflow model for the
|
782
|
+
Schedule model. it should not use Workflow model directly because on the
|
783
|
+
schedule config it can adjust crontab value that different from the Workflow
|
784
|
+
model.
|
785
|
+
"""
|
683
786
|
|
684
787
|
name: str = Field(description="A workflow name.")
|
685
788
|
on: list[On] = Field(
|
@@ -692,17 +795,26 @@ class WorkflowSchedule(BaseModel):
|
|
692
795
|
)
|
693
796
|
|
694
797
|
@model_validator(mode="before")
|
695
|
-
def
|
696
|
-
"""Prepare incoming values before validating with model fields.
|
798
|
+
def __prepare_values(cls, values: DictData) -> DictData:
|
799
|
+
"""Prepare incoming values before validating with model fields.
|
697
800
|
|
801
|
+
:rtype: DictData
|
802
|
+
"""
|
698
803
|
values["name"] = values["name"].replace(" ", "_")
|
699
804
|
|
700
805
|
cls.__bypass_on(values)
|
701
806
|
return values
|
702
807
|
|
703
808
|
@classmethod
|
704
|
-
def __bypass_on(
|
705
|
-
|
809
|
+
def __bypass_on(
|
810
|
+
cls,
|
811
|
+
data: DictData,
|
812
|
+
externals: DictData | None = None,
|
813
|
+
) -> DictData:
|
814
|
+
"""Bypass the on data to loaded config data.
|
815
|
+
|
816
|
+
:rtype: DictData
|
817
|
+
"""
|
706
818
|
if on := data.pop("on", []):
|
707
819
|
|
708
820
|
if isinstance(on, str):
|
@@ -735,9 +847,9 @@ class Schedule(BaseModel):
|
|
735
847
|
"A schedule description that can be string of markdown content."
|
736
848
|
),
|
737
849
|
)
|
738
|
-
workflows: list[
|
850
|
+
workflows: list[ScheduleWorkflow] = Field(
|
739
851
|
default_factory=list,
|
740
|
-
description="A list of
|
852
|
+
description="A list of ScheduleWorkflow models.",
|
741
853
|
)
|
742
854
|
|
743
855
|
@classmethod
|
@@ -746,6 +858,15 @@ class Schedule(BaseModel):
|
|
746
858
|
name: str,
|
747
859
|
externals: DictData | None = None,
|
748
860
|
) -> Self:
|
861
|
+
"""Create Schedule instance from the Loader object that only receive
|
862
|
+
an input schedule name. The loader object will use this schedule name to
|
863
|
+
searching configuration data of this schedule model in conf path.
|
864
|
+
|
865
|
+
:param name: A schedule name that want to pass to Loader object.
|
866
|
+
:param externals: An external parameters that want to pass to Loader
|
867
|
+
object.
|
868
|
+
:rtype: Self
|
869
|
+
"""
|
749
870
|
loader: Loader = Loader(name, externals=(externals or {}))
|
750
871
|
|
751
872
|
# NOTE: Validate the config type match with current connection model
|
@@ -766,18 +887,18 @@ class Schedule(BaseModel):
|
|
766
887
|
running: dict[str, list[datetime]],
|
767
888
|
*,
|
768
889
|
externals: DictData | None = None,
|
769
|
-
) -> list[
|
890
|
+
) -> list[WorkflowTaskData]:
|
770
891
|
"""Generate Task from the current datetime.
|
771
892
|
|
772
893
|
:param start_date: A start date that get from the workflow schedule.
|
773
894
|
:param queue: A mapping of name and list of datetime for queue.
|
774
895
|
:param running: A mapping of name and list of datetime for running.
|
775
896
|
:param externals: An external parameters that pass to the Loader object.
|
776
|
-
:rtype: list[
|
897
|
+
:rtype: list[WorkflowTaskData]
|
777
898
|
"""
|
778
899
|
|
779
900
|
# NOTE: Create pair of workflow and on.
|
780
|
-
workflow_tasks: list[
|
901
|
+
workflow_tasks: list[WorkflowTaskData] = []
|
781
902
|
externals: DictData = externals or {}
|
782
903
|
|
783
904
|
for wfs in self.workflows:
|
@@ -800,7 +921,7 @@ class Schedule(BaseModel):
|
|
800
921
|
heappush(queue[wfs.name], next_running_date)
|
801
922
|
|
802
923
|
workflow_tasks.append(
|
803
|
-
|
924
|
+
WorkflowTaskData(
|
804
925
|
workflow=wf,
|
805
926
|
on=on,
|
806
927
|
params=wfs.params,
|
@@ -812,12 +933,22 @@ class Schedule(BaseModel):
|
|
812
933
|
return workflow_tasks
|
813
934
|
|
814
935
|
|
815
|
-
def catch_exceptions(
|
816
|
-
|
936
|
+
def catch_exceptions(
|
937
|
+
cancel_on_failure: bool = False,
|
938
|
+
) -> Callable[P, Optional[CancelJob]]:
|
939
|
+
"""Catch exception error from scheduler job that running with schedule
|
940
|
+
package and return CancelJob if this function raise an error.
|
817
941
|
|
818
|
-
|
942
|
+
:param cancel_on_failure: A flag that allow to return the CancelJob or not
|
943
|
+
it will raise.
|
944
|
+
:rtype: Callable[P, Optional[CancelJob]]
|
945
|
+
"""
|
819
946
|
|
947
|
+
def decorator(
|
948
|
+
func: Callable[P, Optional[CancelJob]],
|
949
|
+
) -> Callable[P, Optional[CancelJob]]:
|
820
950
|
try:
|
951
|
+
# NOTE: Check the function that want to handle is method or not.
|
821
952
|
if inspect.ismethod(func):
|
822
953
|
|
823
954
|
@wraps(func)
|
@@ -838,11 +969,11 @@ def catch_exceptions(cancel_on_failure=False):
|
|
838
969
|
return CancelJob
|
839
970
|
raise err
|
840
971
|
|
841
|
-
return
|
972
|
+
return decorator
|
842
973
|
|
843
974
|
|
844
975
|
@dataclass(frozen=True)
|
845
|
-
class
|
976
|
+
class WorkflowTaskData:
|
846
977
|
"""Workflow task dataclass that use to keep mapping data and objects for
|
847
978
|
passing in multithreading task.
|
848
979
|
"""
|
@@ -854,19 +985,28 @@ class WorkflowTask:
|
|
854
985
|
running: list[datetime] = field(compare=False, hash=False)
|
855
986
|
|
856
987
|
@catch_exceptions(cancel_on_failure=True)
|
857
|
-
def release(
|
988
|
+
def release(
|
989
|
+
self,
|
990
|
+
log: Log | None = None,
|
991
|
+
*,
|
992
|
+
waiting_sec: int = 60,
|
993
|
+
sleep_interval: int = 15,
|
994
|
+
) -> None:
|
858
995
|
"""Workflow release, it will use with the same logic of
|
859
996
|
`workflow.release` method.
|
860
997
|
|
861
|
-
:param log: A log object
|
998
|
+
:param log: A log object for saving result logging from workflow
|
999
|
+
execution process.
|
1000
|
+
:param waiting_sec: A second period value that allow workflow execute.
|
1001
|
+
:param sleep_interval: A second value that want to waiting until time
|
1002
|
+
to execute.
|
862
1003
|
"""
|
863
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
864
1004
|
log: Log = log or FileLog
|
865
1005
|
wf: Workflow = self.workflow
|
866
1006
|
on: On = self.on
|
867
1007
|
|
868
1008
|
gen: CronRunner = on.generate(
|
869
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
1009
|
+
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
870
1010
|
)
|
871
1011
|
cron_tz: ZoneInfo = gen.tz
|
872
1012
|
|
@@ -883,7 +1023,7 @@ class WorkflowTask:
|
|
883
1023
|
)
|
884
1024
|
heappush(self.running[wf.name], next_time)
|
885
1025
|
|
886
|
-
if get_diff_sec(next_time, tz=cron_tz) >
|
1026
|
+
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
887
1027
|
logger.debug(
|
888
1028
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
889
1029
|
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
@@ -903,7 +1043,9 @@ class WorkflowTask:
|
|
903
1043
|
)
|
904
1044
|
|
905
1045
|
# NOTE: Release when the time is nearly to schedule time.
|
906
|
-
while (duration := get_diff_sec(next_time, tz=tz)) > (
|
1046
|
+
while (duration := get_diff_sec(next_time, tz=config.tz)) > (
|
1047
|
+
sleep_interval + 5
|
1048
|
+
):
|
907
1049
|
logger.debug(
|
908
1050
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
909
1051
|
f": Sleep until: {duration}"
|
@@ -968,21 +1110,17 @@ class WorkflowTask:
|
|
968
1110
|
heappush(self.queue[wf.name], future_running_time)
|
969
1111
|
logger.debug(f"[CORE]: {'-' * 100}")
|
970
1112
|
|
971
|
-
def __eq__(self, other):
|
972
|
-
if isinstance(other,
|
1113
|
+
def __eq__(self, other) -> bool:
|
1114
|
+
if isinstance(other, WorkflowTaskData):
|
973
1115
|
return (
|
974
1116
|
self.workflow.name == other.workflow.name
|
975
1117
|
and self.on.cronjob == other.on.cronjob
|
976
1118
|
)
|
977
1119
|
|
978
1120
|
|
979
|
-
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
980
|
-
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
981
|
-
|
982
|
-
|
983
1121
|
@catch_exceptions(cancel_on_failure=True)
|
984
1122
|
def workflow_task(
|
985
|
-
workflow_tasks: list[
|
1123
|
+
workflow_tasks: list[WorkflowTaskData],
|
986
1124
|
stop: datetime,
|
987
1125
|
threads: dict[str, Thread],
|
988
1126
|
) -> CancelJob | None:
|
@@ -996,11 +1134,10 @@ def workflow_task(
|
|
996
1134
|
:param threads:
|
997
1135
|
:rtype: CancelJob | None
|
998
1136
|
"""
|
999
|
-
|
1000
|
-
start_date: datetime = datetime.now(tz=tz)
|
1137
|
+
start_date: datetime = datetime.now(tz=config.tz)
|
1001
1138
|
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
1002
1139
|
|
1003
|
-
if start_date > stop.replace(tzinfo=tz):
|
1140
|
+
if start_date > stop.replace(tzinfo=config.tz):
|
1004
1141
|
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
1005
1142
|
while len(threads) > 0:
|
1006
1143
|
logger.warning(
|
@@ -1117,9 +1254,8 @@ def workflow_control(
|
|
1117
1254
|
"Should install schedule package before use this module."
|
1118
1255
|
) from None
|
1119
1256
|
|
1120
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
1121
1257
|
schedule: Scheduler = Scheduler()
|
1122
|
-
start_date: datetime = datetime.now(tz=tz)
|
1258
|
+
start_date: datetime = datetime.now(tz=config.tz)
|
1123
1259
|
|
1124
1260
|
# NOTE: Design workflow queue caching.
|
1125
1261
|
# ---
|
@@ -1134,7 +1270,7 @@ def workflow_control(
|
|
1134
1270
|
)
|
1135
1271
|
|
1136
1272
|
# NOTE: Create pair of workflow and on from schedule model.
|
1137
|
-
workflow_tasks: list[
|
1273
|
+
workflow_tasks: list[WorkflowTaskData] = []
|
1138
1274
|
for name in schedules:
|
1139
1275
|
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
1140
1276
|
workflow_tasks.extend(
|
@@ -1205,8 +1341,8 @@ def workflow_runner(
|
|
1205
1341
|
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
1206
1342
|
value to multiprocess executor pool.
|
1207
1343
|
|
1208
|
-
|
1209
|
-
|
1344
|
+
The current workflow logic that split to process will be below diagram:
|
1345
|
+
|
1210
1346
|
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
1211
1347
|
workflow task 01 01
|
1212
1348
|
--> thread of release
|