ddeutil-workflow 0.0.63__py3-none-any.whl → 0.0.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +1 -8
- ddeutil/workflow/api/__init__.py +5 -84
- ddeutil/workflow/api/routes/__init__.py +0 -1
- ddeutil/workflow/api/routes/job.py +2 -3
- ddeutil/workflow/api/routes/logs.py +0 -2
- ddeutil/workflow/api/routes/workflows.py +0 -3
- ddeutil/workflow/conf.py +6 -38
- ddeutil/workflow/{exceptions.py → errors.py} +47 -12
- ddeutil/workflow/job.py +249 -118
- ddeutil/workflow/params.py +11 -11
- ddeutil/workflow/result.py +86 -10
- ddeutil/workflow/reusables.py +54 -23
- ddeutil/workflow/stages.py +692 -464
- ddeutil/workflow/utils.py +37 -2
- ddeutil/workflow/workflow.py +163 -664
- {ddeutil_workflow-0.0.63.dist-info → ddeutil_workflow-0.0.65.dist-info}/METADATA +17 -67
- ddeutil_workflow-0.0.65.dist-info/RECORD +28 -0
- {ddeutil_workflow-0.0.63.dist-info → ddeutil_workflow-0.0.65.dist-info}/WHEEL +1 -1
- ddeutil/workflow/api/routes/schedules.py +0 -141
- ddeutil/workflow/api/utils.py +0 -174
- ddeutil/workflow/scheduler.py +0 -813
- ddeutil_workflow-0.0.63.dist-info/RECORD +0 -31
- {ddeutil_workflow-0.0.63.dist-info → ddeutil_workflow-0.0.65.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.63.dist-info → ddeutil_workflow-0.0.65.dist-info}/licenses/LICENSE +0 -0
- {ddeutil_workflow-0.0.63.dist-info → ddeutil_workflow-0.0.65.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
DELETED
@@ -1,813 +0,0 @@
|
|
1
|
-
# ------------------------------------------------------------------------------
|
2
|
-
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
-
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
-
# license information.
|
5
|
-
# ------------------------------------------------------------------------------
|
6
|
-
"""The main schedule running is `schedule_runner` function that trigger the
|
7
|
-
multiprocess of `schedule_control` function for listing schedules on the
|
8
|
-
config by `Loader.finds(Schedule)`.
|
9
|
-
|
10
|
-
The `schedule_control` is the scheduler function that release 2 schedule
|
11
|
-
functions; `workflow_task`, and `workflow_monitor`.
|
12
|
-
|
13
|
-
`schedule_control` ---( Every minute at :02 )--> `schedule_task`
|
14
|
-
---( Every 5 minutes )--> `monitor`
|
15
|
-
|
16
|
-
The `schedule_task` will run `task.release` method in threading object
|
17
|
-
for multithreading strategy. This `release` method will run only one crontab
|
18
|
-
value with the on field.
|
19
|
-
|
20
|
-
Steps:
|
21
|
-
- Extract all schedule config on the conf path.
|
22
|
-
- Slice schedules to multiprocess
|
23
|
-
- Start running task.
|
24
|
-
"""
|
25
|
-
from __future__ import annotations
|
26
|
-
|
27
|
-
import copy
|
28
|
-
import logging
|
29
|
-
import time
|
30
|
-
from concurrent.futures import (
|
31
|
-
Future,
|
32
|
-
ProcessPoolExecutor,
|
33
|
-
as_completed,
|
34
|
-
)
|
35
|
-
from datetime import datetime, timedelta
|
36
|
-
from functools import wraps
|
37
|
-
from heapq import heappop, heappush
|
38
|
-
from pathlib import Path
|
39
|
-
from textwrap import dedent
|
40
|
-
from threading import Thread
|
41
|
-
from typing import Any, Callable, Optional, TypedDict, Union
|
42
|
-
|
43
|
-
from pydantic import BaseModel, Field, ValidationInfo
|
44
|
-
from pydantic.functional_validators import field_validator, model_validator
|
45
|
-
from typing_extensions import Self
|
46
|
-
|
47
|
-
try:
|
48
|
-
from typing import ParamSpec
|
49
|
-
except ImportError: # pragma: no cov
|
50
|
-
from typing_extensions import ParamSpec
|
51
|
-
|
52
|
-
try:
|
53
|
-
from schedule import CancelJob
|
54
|
-
except ImportError: # pragma: no cov
|
55
|
-
CancelJob = None
|
56
|
-
|
57
|
-
from .__cron import CronRunner
|
58
|
-
from .__types import DictData, TupleStr
|
59
|
-
from .conf import FileLoad, Loader, dynamic
|
60
|
-
from .event import Crontab
|
61
|
-
from .exceptions import ScheduleException, WorkflowException
|
62
|
-
from .logs import Audit, get_audit
|
63
|
-
from .result import SUCCESS, Result
|
64
|
-
from .utils import batch, delay
|
65
|
-
from .workflow import Release, ReleaseQueue, Workflow, WorkflowTask
|
66
|
-
|
67
|
-
P = ParamSpec("P")
|
68
|
-
|
69
|
-
logging.getLogger("schedule").setLevel(logging.INFO)
|
70
|
-
|
71
|
-
|
72
|
-
__all__: TupleStr = (
|
73
|
-
"Schedule",
|
74
|
-
"ScheduleWorkflow",
|
75
|
-
"schedule_task",
|
76
|
-
"monitor",
|
77
|
-
"schedule_control",
|
78
|
-
"schedule_runner",
|
79
|
-
"ReleaseThreads",
|
80
|
-
"ReleaseThread",
|
81
|
-
)
|
82
|
-
|
83
|
-
|
84
|
-
class ScheduleWorkflow(BaseModel):
|
85
|
-
"""Schedule Workflow Pydantic model that use to keep workflow model for
|
86
|
-
the Schedule model. it should not use Workflow model directly because on the
|
87
|
-
schedule config it can adjust crontab value that different from the Workflow
|
88
|
-
model.
|
89
|
-
|
90
|
-
This on field does not equal to the on field of Workflow model, but it
|
91
|
-
uses same logic to generate running release date with crontab object. It
|
92
|
-
uses for override the on field if the schedule time was change, but you do
|
93
|
-
not want to change on the workflow model.
|
94
|
-
"""
|
95
|
-
|
96
|
-
extras: DictData = Field(
|
97
|
-
default_factory=dict,
|
98
|
-
description="An extra parameters that want to override config values.",
|
99
|
-
)
|
100
|
-
|
101
|
-
alias: Optional[str] = Field(
|
102
|
-
default=None,
|
103
|
-
description="An alias name of workflow that use for schedule model.",
|
104
|
-
)
|
105
|
-
name: str = Field(description="A workflow name.")
|
106
|
-
on: list[Crontab] = Field(
|
107
|
-
default_factory=list,
|
108
|
-
description="An override the list of Crontab object values.",
|
109
|
-
)
|
110
|
-
values: DictData = Field(
|
111
|
-
default_factory=dict,
|
112
|
-
description=(
|
113
|
-
"A value that want to pass to the workflow params field when auto "
|
114
|
-
"calling release method."
|
115
|
-
),
|
116
|
-
alias="params",
|
117
|
-
)
|
118
|
-
|
119
|
-
@model_validator(mode="before")
|
120
|
-
def __prepare_before__(cls, data: Any) -> Any:
|
121
|
-
"""Prepare incoming values before validating with model fields."""
|
122
|
-
if isinstance(data, dict):
|
123
|
-
# VALIDATE: Add default the alias field with the name.
|
124
|
-
if "alias" not in data:
|
125
|
-
data["alias"] = data.get("name")
|
126
|
-
|
127
|
-
cls.__bypass_on(data, extras=data.get("extras"))
|
128
|
-
return data
|
129
|
-
|
130
|
-
@classmethod
|
131
|
-
def __bypass_on(
|
132
|
-
cls, data: DictData, *, extras: Optional[DictData] = None
|
133
|
-
) -> DictData:
|
134
|
-
"""Bypass and prepare the on data to loaded config data.
|
135
|
-
|
136
|
-
:param data: (DictData) A data that want to validate for the model
|
137
|
-
initialization.
|
138
|
-
:param extras: (DictData) An extra parameter that want to override core
|
139
|
-
config values.
|
140
|
-
|
141
|
-
:rtype: DictData
|
142
|
-
"""
|
143
|
-
if on := data.pop("on", []):
|
144
|
-
|
145
|
-
if isinstance(on, str):
|
146
|
-
on: list[str] = [on]
|
147
|
-
|
148
|
-
if any(not isinstance(n, (dict, str)) for n in on):
|
149
|
-
raise TypeError("The `on` key should be list of str or dict")
|
150
|
-
|
151
|
-
# NOTE: Pass on value to Loader and keep on model object to on
|
152
|
-
# field.
|
153
|
-
data["on"] = [
|
154
|
-
FileLoad(n, externals=extras).data if isinstance(n, str) else n
|
155
|
-
for n in on
|
156
|
-
]
|
157
|
-
|
158
|
-
return data
|
159
|
-
|
160
|
-
@field_validator("on", mode="after")
|
161
|
-
def __on_no_dup__(
|
162
|
-
cls, value: list[Crontab], info: ValidationInfo
|
163
|
-
) -> list[Crontab]:
|
164
|
-
"""Validate the on fields should not contain duplicate values and if it
|
165
|
-
contains every minute value, it should have only one on value.
|
166
|
-
|
167
|
-
:param value: (list[Crontab]) A list of `Crontab` object.
|
168
|
-
:param info: (ValidationInfo) An validation info object for getting an
|
169
|
-
extra parameter.
|
170
|
-
|
171
|
-
:rtype: list[Crontab]
|
172
|
-
"""
|
173
|
-
set_ons: set[str] = {str(on.cronjob) for on in value}
|
174
|
-
if len(set_ons) != len(value):
|
175
|
-
raise ValueError(
|
176
|
-
"The on fields should not contain duplicate on value."
|
177
|
-
)
|
178
|
-
|
179
|
-
extras: Optional[DictData] = info.data.get("extras")
|
180
|
-
if len(set_ons) > (
|
181
|
-
conf := dynamic("max_cron_per_workflow", extras=extras)
|
182
|
-
):
|
183
|
-
raise ValueError(
|
184
|
-
f"The number of the on should not more than {conf} crontabs."
|
185
|
-
)
|
186
|
-
|
187
|
-
return value
|
188
|
-
|
189
|
-
def tasks(
|
190
|
-
self,
|
191
|
-
start_date: datetime,
|
192
|
-
queue: dict[str, ReleaseQueue],
|
193
|
-
) -> list[WorkflowTask]:
|
194
|
-
"""Return the list of WorkflowTask object from the specific input
|
195
|
-
datetime that mapping with the on field.
|
196
|
-
|
197
|
-
This task creation need queue to tracking release date already
|
198
|
-
mapped or not.
|
199
|
-
|
200
|
-
:param start_date: (datetime) A start datetime that get from the
|
201
|
-
workflow schedule.
|
202
|
-
:param queue: (dict[str, ReleaseQueue]) A mapping of name and list of
|
203
|
-
datetime for queue.
|
204
|
-
|
205
|
-
:rtype: list[WorkflowTask]
|
206
|
-
:return: Return the list of WorkflowTask object from the specific
|
207
|
-
input datetime that mapping with the on field.
|
208
|
-
"""
|
209
|
-
wf: Workflow = Workflow.from_conf(self.name, extras=self.extras)
|
210
|
-
wf_queue: ReleaseQueue = queue[self.alias]
|
211
|
-
|
212
|
-
# IMPORTANT: Create the default 'on' value if it does not pass the `on`
|
213
|
-
# field to the Schedule object.
|
214
|
-
ons: list[Crontab] = self.on or wf.on.copy()
|
215
|
-
workflow_tasks: list[WorkflowTask] = []
|
216
|
-
for on in ons:
|
217
|
-
|
218
|
-
# NOTE: Create CronRunner instance from the start_date param.
|
219
|
-
runner: CronRunner = on.generate(start_date)
|
220
|
-
next_running_date = runner.next
|
221
|
-
|
222
|
-
while wf_queue.check_queue(next_running_date):
|
223
|
-
next_running_date = runner.next
|
224
|
-
|
225
|
-
workflow_tasks.append(
|
226
|
-
WorkflowTask(
|
227
|
-
alias=self.alias,
|
228
|
-
workflow=wf,
|
229
|
-
runner=runner,
|
230
|
-
values=self.values,
|
231
|
-
extras=self.extras,
|
232
|
-
),
|
233
|
-
)
|
234
|
-
|
235
|
-
return workflow_tasks
|
236
|
-
|
237
|
-
|
238
|
-
class Schedule(BaseModel):
|
239
|
-
"""Schedule Pydantic model that use to run with any scheduler package.
|
240
|
-
|
241
|
-
The workflows field of this model include ScheduleWorkflow objects that
|
242
|
-
enhance the workflow object by adding the alias and values fields.
|
243
|
-
"""
|
244
|
-
|
245
|
-
extras: DictData = Field(
|
246
|
-
default_factory=dict,
|
247
|
-
description="An extra parameters that want to override config values.",
|
248
|
-
)
|
249
|
-
|
250
|
-
desc: Optional[str] = Field(
|
251
|
-
default=None,
|
252
|
-
description=(
|
253
|
-
"A schedule description that can be string of markdown content."
|
254
|
-
),
|
255
|
-
)
|
256
|
-
workflows: list[ScheduleWorkflow] = Field(
|
257
|
-
default_factory=list,
|
258
|
-
description="A list of ScheduleWorkflow model.",
|
259
|
-
)
|
260
|
-
|
261
|
-
@field_validator("desc", mode="after")
|
262
|
-
def __dedent_desc__(cls, value: str) -> str:
|
263
|
-
"""Prepare description string that was created on a template.
|
264
|
-
|
265
|
-
:param value: A description string value that want to dedent.
|
266
|
-
|
267
|
-
:rtype: str
|
268
|
-
"""
|
269
|
-
return dedent(value)
|
270
|
-
|
271
|
-
@classmethod
|
272
|
-
def from_conf(
|
273
|
-
cls,
|
274
|
-
name: str,
|
275
|
-
*,
|
276
|
-
path: Optional[Path] = None,
|
277
|
-
extras: DictData | None = None,
|
278
|
-
) -> Self:
|
279
|
-
"""Create Schedule instance from the Loader object that only receive
|
280
|
-
an input schedule name. The loader object will use this schedule name to
|
281
|
-
searching configuration data of this schedule model in conf path.
|
282
|
-
|
283
|
-
:param name: (str) A schedule name that want to pass to Loader object.
|
284
|
-
:param path: (Path) An override config path.
|
285
|
-
:param extras: An extra parameters that want to pass to Loader
|
286
|
-
object.
|
287
|
-
|
288
|
-
:raise ValueError: If the type does not match with current object.
|
289
|
-
|
290
|
-
:rtype: Self
|
291
|
-
"""
|
292
|
-
loader: Loader = FileLoad(name, path=path, extras=extras)
|
293
|
-
|
294
|
-
# NOTE: Validate the config type match with current connection model
|
295
|
-
if loader.type != cls.__name__:
|
296
|
-
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
297
|
-
|
298
|
-
loader_data: DictData = copy.deepcopy(loader.data)
|
299
|
-
loader_data["name"] = name
|
300
|
-
|
301
|
-
if extras:
|
302
|
-
loader_data["extras"] = extras
|
303
|
-
|
304
|
-
return cls.model_validate(obj=loader_data)
|
305
|
-
|
306
|
-
def tasks(
|
307
|
-
self,
|
308
|
-
start_date: datetime,
|
309
|
-
queue: dict[str, ReleaseQueue],
|
310
|
-
) -> list[WorkflowTask]:
|
311
|
-
"""Return the list of WorkflowTask object from the specific input
|
312
|
-
datetime that mapping with the on field from workflow schedule model.
|
313
|
-
|
314
|
-
:param start_date: A start date that get from the workflow schedule.
|
315
|
-
:param queue: (dict[str, ReleaseQueue]) A mapping of name and list of
|
316
|
-
datetime for queue.
|
317
|
-
|
318
|
-
:rtype: list[WorkflowTask]
|
319
|
-
:return: Return the list of WorkflowTask object from the specific
|
320
|
-
input datetime that mapping with the on field.
|
321
|
-
"""
|
322
|
-
workflow_tasks: list[WorkflowTask] = []
|
323
|
-
|
324
|
-
for workflow in self.workflows:
|
325
|
-
if self.extras:
|
326
|
-
workflow.extras = self.extras
|
327
|
-
|
328
|
-
if workflow.alias not in queue:
|
329
|
-
queue[workflow.alias] = ReleaseQueue()
|
330
|
-
|
331
|
-
workflow_tasks.extend(workflow.tasks(start_date, queue=queue))
|
332
|
-
|
333
|
-
return workflow_tasks
|
334
|
-
|
335
|
-
def pending(
|
336
|
-
self,
|
337
|
-
*,
|
338
|
-
stop: Optional[datetime] = None,
|
339
|
-
audit: type[Audit] | None = None,
|
340
|
-
parent_run_id: Optional[str] = None,
|
341
|
-
) -> Result: # pragma: no cov
|
342
|
-
"""Pending this schedule tasks with the schedule package.
|
343
|
-
|
344
|
-
:param stop: A datetime value that use to stop running schedule.
|
345
|
-
:param audit: An audit class that use on the workflow task release for
|
346
|
-
writing its release audit context.
|
347
|
-
:param parent_run_id: A parent workflow running ID for this release.
|
348
|
-
"""
|
349
|
-
audit: type[Audit] = audit or get_audit(extras=self.extras)
|
350
|
-
result: Result = Result().set_parent_run_id(parent_run_id)
|
351
|
-
|
352
|
-
# NOTE: Create the start and stop datetime.
|
353
|
-
start_date: datetime = datetime.now(
|
354
|
-
tz=dynamic("tz", extras=self.extras)
|
355
|
-
)
|
356
|
-
stop_date: datetime = stop or (
|
357
|
-
start_date + dynamic("stop_boundary_delta", extras=self.extras)
|
358
|
-
)
|
359
|
-
|
360
|
-
# IMPORTANT: Create main mapping of queue and thread object.
|
361
|
-
queue: dict[str, ReleaseQueue] = {}
|
362
|
-
threads: ReleaseThreads = {}
|
363
|
-
|
364
|
-
start_date_waiting: datetime = start_date.replace(
|
365
|
-
second=0, microsecond=0
|
366
|
-
) + timedelta(minutes=1)
|
367
|
-
|
368
|
-
scheduler_pending(
|
369
|
-
tasks=self.tasks(start_date_waiting, queue=queue),
|
370
|
-
stop=stop_date,
|
371
|
-
queue=queue,
|
372
|
-
threads=threads,
|
373
|
-
result=result,
|
374
|
-
audit=audit,
|
375
|
-
)
|
376
|
-
|
377
|
-
return result.catch(status=SUCCESS)
|
378
|
-
|
379
|
-
|
380
|
-
ResultOrCancel = Union[type[CancelJob], Result]
|
381
|
-
ReturnResultOrCancel = Callable[P, ResultOrCancel]
|
382
|
-
DecoratorCancelJob = Callable[[ReturnResultOrCancel], ReturnResultOrCancel]
|
383
|
-
|
384
|
-
|
385
|
-
def catch_exceptions(
|
386
|
-
cancel_on_failure: bool = False,
|
387
|
-
parent_run_id: Optional[str] = None,
|
388
|
-
) -> DecoratorCancelJob:
|
389
|
-
"""Catch exception error from scheduler job that running with schedule
|
390
|
-
package and return CancelJob if this function raise an error.
|
391
|
-
|
392
|
-
:param cancel_on_failure: A flag that allow to return the CancelJob or not
|
393
|
-
it will raise.
|
394
|
-
:param parent_run_id:
|
395
|
-
|
396
|
-
:rtype: DecoratorCancelJob
|
397
|
-
"""
|
398
|
-
|
399
|
-
def decorator(
|
400
|
-
func: ReturnResultOrCancel,
|
401
|
-
) -> ReturnResultOrCancel: # pragma: no cov
|
402
|
-
|
403
|
-
@wraps(func)
|
404
|
-
def wrapper(*args: P.args, **kwargs: P.kwargs) -> ResultOrCancel:
|
405
|
-
|
406
|
-
try:
|
407
|
-
return func(*args, **kwargs)
|
408
|
-
|
409
|
-
except Exception as err:
|
410
|
-
if parent_run_id:
|
411
|
-
(
|
412
|
-
Result(parent_run_id=parent_run_id).trace.exception(
|
413
|
-
str(err)
|
414
|
-
)
|
415
|
-
)
|
416
|
-
if cancel_on_failure:
|
417
|
-
return CancelJob
|
418
|
-
raise err
|
419
|
-
|
420
|
-
return wrapper
|
421
|
-
|
422
|
-
return decorator
|
423
|
-
|
424
|
-
|
425
|
-
class ReleaseThread(TypedDict):
|
426
|
-
"""TypeDict for the release thread."""
|
427
|
-
|
428
|
-
thread: Optional[Thread]
|
429
|
-
start_date: datetime
|
430
|
-
release_date: datetime
|
431
|
-
|
432
|
-
|
433
|
-
ReleaseThreads = dict[str, ReleaseThread]
|
434
|
-
|
435
|
-
|
436
|
-
def schedule_task(
|
437
|
-
tasks: list[WorkflowTask],
|
438
|
-
stop: datetime,
|
439
|
-
queue: dict[str, ReleaseQueue],
|
440
|
-
threads: ReleaseThreads,
|
441
|
-
audit: type[Audit],
|
442
|
-
*,
|
443
|
-
parent_run_id: Optional[str] = None,
|
444
|
-
extras: Optional[DictData] = None,
|
445
|
-
) -> ResultOrCancel:
|
446
|
-
"""Schedule task function that generate thread of workflow task release
|
447
|
-
method in background. This function do the same logic as the workflow poke
|
448
|
-
method, but it runs with map of schedules and the on values.
|
449
|
-
|
450
|
-
This schedule task start runs every minute at ':02' second, and it does
|
451
|
-
not allow you to run with offset time.
|
452
|
-
|
453
|
-
:param tasks: A list of WorkflowTask object.
|
454
|
-
:param stop: A stop datetime object that force stop running scheduler.
|
455
|
-
:param queue: A mapping of alias name and ReleaseQueue object.
|
456
|
-
:param threads: A mapping of alias name and Thread object.
|
457
|
-
:param audit: An audit class that want to make audit object.
|
458
|
-
:param parent_run_id: A parent workflow running ID for this release.
|
459
|
-
:param extras: An extra parameter that want to override the core config.
|
460
|
-
|
461
|
-
:rtype: ResultOrCancel
|
462
|
-
"""
|
463
|
-
result: Result = Result().set_parent_run_id(parent_run_id)
|
464
|
-
current_date: datetime = datetime.now(tz=dynamic("tz", extras=extras))
|
465
|
-
if current_date > stop.replace(tzinfo=dynamic("tz", extras=extras)):
|
466
|
-
return CancelJob
|
467
|
-
|
468
|
-
# IMPORTANT:
|
469
|
-
# Filter workflow & on that should to run with `workflow_release`
|
470
|
-
# function. It will deplicate running with different schedule value
|
471
|
-
# because I use current time in this condition.
|
472
|
-
#
|
473
|
-
# For example, if a queue has a time release be '00:02:00' that should
|
474
|
-
# to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
|
475
|
-
# This condition make this function create 2 threading tasks.
|
476
|
-
#
|
477
|
-
# '00:02:00' --> '*/2 * * * *' --> run
|
478
|
-
# --> '*/35 * * * *' --> skip
|
479
|
-
#
|
480
|
-
for task in tasks:
|
481
|
-
|
482
|
-
# NOTE: Get the ReleaseQueue with an alias of the WorkflowTask.
|
483
|
-
q: ReleaseQueue = queue[task.alias]
|
484
|
-
|
485
|
-
# NOTE: Start adding queue and move the runner date in the WorkflowTask.
|
486
|
-
task.queue(stop, q, audit=audit)
|
487
|
-
|
488
|
-
# NOTE: Get incoming datetime queue.
|
489
|
-
result.trace.debug(
|
490
|
-
f"[WORKFLOW]: Queue: {task.alias!r} : {list(q.queue)}"
|
491
|
-
)
|
492
|
-
|
493
|
-
# VALIDATE: Check the queue is empty or not.
|
494
|
-
if not q.is_queued:
|
495
|
-
result.trace.warning(
|
496
|
-
f"[WORKFLOW]: Queue is empty for : {task.alias!r} : "
|
497
|
-
f"{task.runner.cron}"
|
498
|
-
)
|
499
|
-
continue
|
500
|
-
|
501
|
-
# VALIDATE: Check this task is the first release in the queue or not.
|
502
|
-
current_release: datetime = current_date.replace(
|
503
|
-
second=0, microsecond=0
|
504
|
-
)
|
505
|
-
if (first_date := q.queue[0].date) > current_release: # pragma: no cov
|
506
|
-
result.trace.debug(
|
507
|
-
f"[WORKFLOW]: Skip schedule "
|
508
|
-
f"{first_date:%Y-%m-%d %H:%M:%S} for : {task.alias!r}"
|
509
|
-
)
|
510
|
-
continue
|
511
|
-
elif first_date < current_release: # pragma: no cov
|
512
|
-
raise ScheduleException(
|
513
|
-
"The first release date from queue should not less than current"
|
514
|
-
"release date."
|
515
|
-
)
|
516
|
-
|
517
|
-
# NOTE: Pop the latest release and push it to running.
|
518
|
-
release: Release = heappop(q.queue)
|
519
|
-
heappush(q.running, release)
|
520
|
-
|
521
|
-
result.trace.info(
|
522
|
-
f"[WORKFLOW]: Start thread: '{task.alias}|"
|
523
|
-
f"{release.date:%Y%m%d%H%M}'"
|
524
|
-
)
|
525
|
-
|
526
|
-
# NOTE: Create thread name that able to tracking with observe schedule
|
527
|
-
# job.
|
528
|
-
thread_name: str = f"{task.alias}|{release.date:%Y%m%d%H%M}"
|
529
|
-
thread: Thread = Thread(
|
530
|
-
target=catch_exceptions(
|
531
|
-
cancel_on_failure=True,
|
532
|
-
)(task.release),
|
533
|
-
kwargs={
|
534
|
-
"release": release,
|
535
|
-
"queue": q,
|
536
|
-
"audit": audit,
|
537
|
-
},
|
538
|
-
name=thread_name,
|
539
|
-
daemon=True,
|
540
|
-
)
|
541
|
-
|
542
|
-
threads[thread_name] = {
|
543
|
-
"thread": thread,
|
544
|
-
"start_date": datetime.now(tz=dynamic("tz", extras=extras)),
|
545
|
-
"release_date": release.date,
|
546
|
-
}
|
547
|
-
|
548
|
-
thread.start()
|
549
|
-
|
550
|
-
delay()
|
551
|
-
|
552
|
-
result.trace.debug(
|
553
|
-
f"[SCHEDULE]: End schedule task that run since "
|
554
|
-
f"{current_date:%Y-%m-%d %H:%M:%S} {'=' * 30}"
|
555
|
-
)
|
556
|
-
return result.catch(status=SUCCESS, context={"task_date": current_date})
|
557
|
-
|
558
|
-
|
559
|
-
def monitor(
|
560
|
-
threads: ReleaseThreads,
|
561
|
-
parent_run_id: Optional[str] = None,
|
562
|
-
) -> None: # pragma: no cov
|
563
|
-
"""Monitoring function that running every five minute for track long-running
|
564
|
-
thread instance from the schedule_control function that run every minute.
|
565
|
-
|
566
|
-
:param threads: A mapping of Thread object and its name.
|
567
|
-
:param parent_run_id: A parent workflow running ID for this release.
|
568
|
-
|
569
|
-
:type threads: ReleaseThreads
|
570
|
-
"""
|
571
|
-
result: Result = Result().set_parent_run_id(parent_run_id)
|
572
|
-
result.trace.debug("[MONITOR]: Start checking long running schedule task.")
|
573
|
-
|
574
|
-
snapshot_threads: list[str] = list(threads.keys())
|
575
|
-
for thread_name in snapshot_threads:
|
576
|
-
|
577
|
-
thread_release: ReleaseThread = threads[thread_name]
|
578
|
-
|
579
|
-
# NOTE: remove the thread that running success.
|
580
|
-
thread = thread_release["thread"]
|
581
|
-
if thread and (not thread_release["thread"].is_alive()):
|
582
|
-
thread_release["thread"] = None
|
583
|
-
|
584
|
-
|
585
|
-
def scheduler_pending(
|
586
|
-
tasks: list[WorkflowTask],
|
587
|
-
stop: datetime,
|
588
|
-
queue: dict[str, ReleaseQueue],
|
589
|
-
threads: ReleaseThreads,
|
590
|
-
result: Result,
|
591
|
-
audit: type[Audit],
|
592
|
-
) -> Result: # pragma: no cov
|
593
|
-
"""Scheduler pending function.
|
594
|
-
|
595
|
-
:param tasks: A list of WorkflowTask object.
|
596
|
-
:param stop: A stop datetime object that force stop running scheduler.
|
597
|
-
:param queue: A mapping of alias name and ReleaseQueue object.
|
598
|
-
:param threads: A mapping of alias name and Thread object.
|
599
|
-
:param result: A result object.
|
600
|
-
:param audit: An audit class that want to make audit object.
|
601
|
-
|
602
|
-
:rtype: Result
|
603
|
-
"""
|
604
|
-
try:
|
605
|
-
from schedule import Scheduler
|
606
|
-
except ImportError:
|
607
|
-
raise ImportError(
|
608
|
-
"Should install schedule package before use this method."
|
609
|
-
) from None
|
610
|
-
|
611
|
-
scheduler: Scheduler = Scheduler()
|
612
|
-
|
613
|
-
# NOTE: This schedule job will start every minute at :02 seconds.
|
614
|
-
(
|
615
|
-
scheduler.every(1)
|
616
|
-
.minutes.at(":02")
|
617
|
-
.do(
|
618
|
-
catch_exceptions(
|
619
|
-
cancel_on_failure=True,
|
620
|
-
parent_run_id=result.parent_run_id,
|
621
|
-
)(schedule_task),
|
622
|
-
tasks=tasks,
|
623
|
-
stop=stop,
|
624
|
-
queue=queue,
|
625
|
-
threads=threads,
|
626
|
-
audit=audit,
|
627
|
-
parent_run_id=result.parent_run_id,
|
628
|
-
)
|
629
|
-
.tag("control")
|
630
|
-
)
|
631
|
-
|
632
|
-
# NOTE: Checking zombie task with schedule job will start every 5 minute at
|
633
|
-
# :10 seconds.
|
634
|
-
(
|
635
|
-
scheduler.every(5)
|
636
|
-
.minutes.at(":10")
|
637
|
-
.do(
|
638
|
-
monitor,
|
639
|
-
threads=threads,
|
640
|
-
parent_run_id=result.parent_run_id,
|
641
|
-
)
|
642
|
-
.tag("monitor")
|
643
|
-
)
|
644
|
-
|
645
|
-
# NOTE: Start running schedule
|
646
|
-
result.trace.info(
|
647
|
-
f"[SCHEDULE]: Schedule with stopper: {stop:%Y-%m-%d %H:%M:%S}"
|
648
|
-
)
|
649
|
-
|
650
|
-
while True:
|
651
|
-
scheduler.run_pending()
|
652
|
-
time.sleep(1)
|
653
|
-
|
654
|
-
# NOTE: Break the scheduler when the control job does not exist.
|
655
|
-
if not scheduler.get_jobs("control"):
|
656
|
-
scheduler.clear("monitor")
|
657
|
-
|
658
|
-
while len([t for t in threads.values() if t["thread"]]) > 0:
|
659
|
-
result.trace.warning(
|
660
|
-
"[SCHEDULE]: Waiting schedule release thread that still "
|
661
|
-
"running in background."
|
662
|
-
)
|
663
|
-
delay(10)
|
664
|
-
monitor(threads, parent_run_id=result.parent_run_id)
|
665
|
-
|
666
|
-
break
|
667
|
-
|
668
|
-
result.trace.warning(
|
669
|
-
f"[SCHEDULE]: Queue: {[list(queue[wf].queue) for wf in queue]}"
|
670
|
-
)
|
671
|
-
return result.catch(
|
672
|
-
status=SUCCESS,
|
673
|
-
context={
|
674
|
-
"threads": [
|
675
|
-
{
|
676
|
-
"name": thread,
|
677
|
-
"start_date": threads[thread]["start_date"],
|
678
|
-
"release_date": threads[thread]["release_date"],
|
679
|
-
}
|
680
|
-
for thread in threads
|
681
|
-
],
|
682
|
-
},
|
683
|
-
)
|
684
|
-
|
685
|
-
|
686
|
-
def schedule_control(
|
687
|
-
schedules: list[str],
|
688
|
-
stop: Optional[datetime] = None,
|
689
|
-
*,
|
690
|
-
extras: DictData | None = None,
|
691
|
-
audit: type[Audit] | None = None,
|
692
|
-
parent_run_id: Optional[str] = None,
|
693
|
-
) -> Result: # pragma: no cov
|
694
|
-
"""Scheduler control function that run the chuck of schedules every minute
|
695
|
-
and this function release monitoring thread for tracking undead thread in
|
696
|
-
the background.
|
697
|
-
|
698
|
-
:param schedules: A list of workflow names that want to schedule running.
|
699
|
-
:param stop: A datetime value that use to stop running schedule.
|
700
|
-
:param extras: An extra parameters that want to override core config.
|
701
|
-
:param audit: An audit class that use on the workflow task release for
|
702
|
-
writing its release audit context.
|
703
|
-
:param parent_run_id: A parent workflow running ID for this release.
|
704
|
-
|
705
|
-
:rtype: Result
|
706
|
-
"""
|
707
|
-
audit: type[Audit] = audit or get_audit(extras=extras)
|
708
|
-
result: Result = Result.construct_with_rs_or_id(parent_run_id=parent_run_id)
|
709
|
-
|
710
|
-
# NOTE: Create the start and stop datetime.
|
711
|
-
start_date: datetime = datetime.now(tz=dynamic("tz", extras=extras))
|
712
|
-
stop_date: datetime = stop or (
|
713
|
-
start_date + dynamic("stop_boundary_delta", extras=extras)
|
714
|
-
)
|
715
|
-
|
716
|
-
# IMPORTANT: Create main mapping of queue and thread object.
|
717
|
-
queue: dict[str, ReleaseQueue] = {}
|
718
|
-
threads: ReleaseThreads = {}
|
719
|
-
|
720
|
-
start_date_waiting: datetime = start_date.replace(
|
721
|
-
second=0, microsecond=0
|
722
|
-
) + timedelta(minutes=1)
|
723
|
-
|
724
|
-
tasks: list[WorkflowTask] = []
|
725
|
-
for name in schedules:
|
726
|
-
tasks.extend(
|
727
|
-
(
|
728
|
-
Schedule.from_conf(name, extras=extras).tasks(
|
729
|
-
start_date_waiting, queue=queue
|
730
|
-
)
|
731
|
-
),
|
732
|
-
)
|
733
|
-
|
734
|
-
scheduler_pending(
|
735
|
-
tasks=tasks,
|
736
|
-
stop=stop_date,
|
737
|
-
queue=queue,
|
738
|
-
threads=threads,
|
739
|
-
result=result,
|
740
|
-
audit=audit,
|
741
|
-
)
|
742
|
-
|
743
|
-
return result.catch(status=SUCCESS, context={"schedules": schedules})
|
744
|
-
|
745
|
-
|
746
|
-
def schedule_runner(
|
747
|
-
stop: Optional[datetime] = None,
|
748
|
-
*,
|
749
|
-
max_process: int | None = None,
|
750
|
-
extras: DictData | None = None,
|
751
|
-
excluded: list[str] | None = None,
|
752
|
-
) -> Result: # pragma: no cov
|
753
|
-
"""Schedule runner function it the multiprocess controller function for
|
754
|
-
split the setting schedule to the `schedule_control` function on the
|
755
|
-
process pool. It chunks schedule configs that exists in config
|
756
|
-
path by `WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS` value.
|
757
|
-
|
758
|
-
:param stop: A stop datetime object that force stop running scheduler.
|
759
|
-
:param max_process: (int) The maximum process that want to run this func.
|
760
|
-
:param extras: An extra parameter that want to override core config.
|
761
|
-
:param excluded: A list of schedule name that want to exclude from finding.
|
762
|
-
|
763
|
-
This function will get all workflows that include on value that was
|
764
|
-
created in config path and chuck it with application config variable
|
765
|
-
`WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS` env var to multiprocess executor
|
766
|
-
pool.
|
767
|
-
|
768
|
-
The current workflow logic that split to process will be below diagram:
|
769
|
-
|
770
|
-
MAIN ==> process 01 ==> schedule ==> thread 01 --> 01
|
771
|
-
==> thread 01 --> 02
|
772
|
-
==> schedule ==> thread 02 --> 01
|
773
|
-
==> thread 02 --> 02
|
774
|
-
==> ...
|
775
|
-
==> process 02 ==> ...
|
776
|
-
|
777
|
-
:rtype: Result
|
778
|
-
"""
|
779
|
-
result: Result = Result()
|
780
|
-
context: DictData = {"schedules": [], "threads": []}
|
781
|
-
|
782
|
-
with ProcessPoolExecutor(
|
783
|
-
max_workers=dynamic(
|
784
|
-
"max_schedule_process", f=max_process, extras=extras
|
785
|
-
),
|
786
|
-
) as executor:
|
787
|
-
|
788
|
-
futures: list[Future] = [
|
789
|
-
executor.submit(
|
790
|
-
schedule_control,
|
791
|
-
schedules=[load[0] for load in loader],
|
792
|
-
stop=stop,
|
793
|
-
extras=extras,
|
794
|
-
parent_run_id=result.parent_run_id,
|
795
|
-
)
|
796
|
-
for loader in batch(
|
797
|
-
Loader.finds(Schedule, excluded=excluded),
|
798
|
-
n=dynamic("max_schedule_per_process", extras=extras),
|
799
|
-
)
|
800
|
-
]
|
801
|
-
|
802
|
-
for future in as_completed(futures):
|
803
|
-
|
804
|
-
# NOTE: Raise error when it has any error from schedule_control.
|
805
|
-
if err := future.exception():
|
806
|
-
result.trace.error(str(err))
|
807
|
-
raise WorkflowException(str(err)) from err
|
808
|
-
|
809
|
-
rs: Result = future.result(timeout=1)
|
810
|
-
context["schedule"].extend(rs.context.get("schedules", []))
|
811
|
-
context["threads"].extend(rs.context.get("threads", []))
|
812
|
-
|
813
|
-
return result.catch(status=SUCCESS, context=context)
|