ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/api.py +16 -16
- ddeutil/workflow/cli.py +105 -22
- ddeutil/workflow/cron.py +116 -26
- ddeutil/workflow/exceptions.py +3 -0
- ddeutil/workflow/log.py +66 -59
- ddeutil/workflow/on.py +10 -4
- ddeutil/workflow/pipeline.py +267 -223
- ddeutil/workflow/repeat.py +66 -39
- ddeutil/workflow/route.py +59 -38
- ddeutil/workflow/scheduler.py +355 -187
- ddeutil/workflow/stage.py +15 -11
- ddeutil/workflow/utils.py +142 -6
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.10.dist-info}/METADATA +17 -108
- ddeutil_workflow-0.0.10.dist-info/RECORD +21 -0
- ddeutil_workflow-0.0.10.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/loader.py +0 -132
- ddeutil_workflow-0.0.9.dist-info/RECORD +0 -22
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +0 -2
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.10.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.10.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.10.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import copy
|
9
|
+
import json
|
8
10
|
import logging
|
9
11
|
import os
|
10
12
|
import time
|
@@ -15,49 +17,189 @@ from datetime import datetime, timedelta
|
|
15
17
|
from functools import wraps
|
16
18
|
from heapq import heappush
|
17
19
|
from threading import Thread
|
20
|
+
from typing import Optional
|
18
21
|
from zoneinfo import ZoneInfo
|
19
22
|
|
20
|
-
from
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
|
25
|
-
|
26
|
-
from
|
23
|
+
from dotenv import load_dotenv
|
24
|
+
from pydantic import BaseModel, Field
|
25
|
+
from pydantic.functional_validators import model_validator
|
26
|
+
from typing_extensions import Self
|
27
|
+
|
28
|
+
try:
|
29
|
+
from schedule import CancelJob, Scheduler
|
30
|
+
except ImportError:
|
31
|
+
raise ImportError(
|
32
|
+
"Should install schedule package before use this module."
|
33
|
+
) from None
|
34
|
+
|
35
|
+
from .__types import DictData
|
36
|
+
from .cron import CronRunner
|
37
|
+
from .exceptions import WorkflowException
|
38
|
+
from .log import FileLog, Log, get_logger
|
39
|
+
from .on import On
|
40
|
+
from .pipeline import Pipeline
|
41
|
+
from .utils import (
|
42
|
+
Loader,
|
27
43
|
Result,
|
28
44
|
batch,
|
29
45
|
delay,
|
30
46
|
get_diff_sec,
|
31
47
|
param2template,
|
32
48
|
)
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
load_dotenv("../../../.env")
|
37
|
-
logging.basicConfig(
|
38
|
-
level=logging.DEBUG,
|
39
|
-
format=(
|
40
|
-
"%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, %(thread)-5d) "
|
41
|
-
"[%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
|
42
|
-
),
|
43
|
-
handlers=[logging.StreamHandler()],
|
44
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
45
|
-
)
|
49
|
+
|
50
|
+
load_dotenv()
|
51
|
+
logger = get_logger("ddeutil.workflow")
|
46
52
|
logging.getLogger("schedule").setLevel(logging.INFO)
|
47
53
|
|
48
|
-
|
54
|
+
|
55
|
+
__all__ = (
|
56
|
+
"PipelineSchedule",
|
57
|
+
"Schedule",
|
58
|
+
"workflow",
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
class PipelineSchedule(BaseModel):
|
63
|
+
"""Pipeline schedule Pydantic Model."""
|
64
|
+
|
65
|
+
name: str = Field(description="A pipeline name.")
|
66
|
+
on: list[On] = Field(
|
67
|
+
default_factory=list,
|
68
|
+
description="An override On instance value.",
|
69
|
+
)
|
70
|
+
params: DictData = Field(
|
71
|
+
default_factory=dict,
|
72
|
+
description="A parameters that want to use to pipeline execution.",
|
73
|
+
)
|
74
|
+
|
75
|
+
@model_validator(mode="before")
|
76
|
+
def __prepare__values(cls, values: DictData) -> DictData:
|
77
|
+
"""Prepare incoming values before validating with model fields."""
|
78
|
+
|
79
|
+
values["name"] = values["name"].replace(" ", "_")
|
80
|
+
|
81
|
+
cls.__bypass_on(values)
|
82
|
+
return values
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
86
|
+
"""Bypass the on data to loaded config data."""
|
87
|
+
if on := data.pop("on", []):
|
88
|
+
|
89
|
+
if isinstance(on, str):
|
90
|
+
on = [on]
|
91
|
+
|
92
|
+
if any(not isinstance(n, (dict, str)) for n in on):
|
93
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
94
|
+
|
95
|
+
# NOTE: Pass on value to Loader and keep on model object to on field
|
96
|
+
data["on"] = [
|
97
|
+
(
|
98
|
+
Loader(n, externals=(externals or {})).data
|
99
|
+
if isinstance(n, str)
|
100
|
+
else n
|
101
|
+
)
|
102
|
+
for n in on
|
103
|
+
]
|
104
|
+
return data
|
105
|
+
|
106
|
+
|
107
|
+
class Schedule(BaseModel):
|
108
|
+
"""Schedule Pydantic Model that use to run with scheduler package. It does
|
109
|
+
not equal the on value in Pipeline model but it use same logic to running
|
110
|
+
release date with crontab interval.
|
111
|
+
"""
|
112
|
+
|
113
|
+
desc: Optional[str] = Field(
|
114
|
+
default=None,
|
115
|
+
description=(
|
116
|
+
"A schedule description that can be string of markdown content."
|
117
|
+
),
|
118
|
+
)
|
119
|
+
pipelines: list[PipelineSchedule] = Field(
|
120
|
+
default_factory=list,
|
121
|
+
description="A list of PipelineSchedule models.",
|
122
|
+
)
|
123
|
+
|
124
|
+
@classmethod
|
125
|
+
def from_loader(
|
126
|
+
cls,
|
127
|
+
name: str,
|
128
|
+
externals: DictData | None = None,
|
129
|
+
) -> Self:
|
130
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
131
|
+
|
132
|
+
# NOTE: Validate the config type match with current connection model
|
133
|
+
if loader.type != cls:
|
134
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
135
|
+
|
136
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
137
|
+
|
138
|
+
# NOTE: Add name to loader data
|
139
|
+
loader_data["name"] = name.replace(" ", "_")
|
140
|
+
|
141
|
+
return cls.model_validate(obj=loader_data)
|
142
|
+
|
143
|
+
def tasks(
|
144
|
+
self,
|
145
|
+
start_date: datetime,
|
146
|
+
queue: dict[str, list[datetime]],
|
147
|
+
running: dict[str, list[datetime]],
|
148
|
+
externals: DictData | None = None,
|
149
|
+
) -> list[PipelineTask]:
|
150
|
+
"""Generate Task from the current datetime.
|
151
|
+
|
152
|
+
:param start_date: A start date that get from the workflow schedule.
|
153
|
+
:param queue:
|
154
|
+
:param running:
|
155
|
+
:param externals: An external parameters that pass to the Loader object.
|
156
|
+
:rtype: list[PipelineTask]
|
157
|
+
"""
|
158
|
+
|
159
|
+
# NOTE: Create pair of pipeline and on.
|
160
|
+
pipeline_tasks: list[PipelineTask] = []
|
161
|
+
externals: DictData = externals or {}
|
162
|
+
|
163
|
+
for pipe in self.pipelines:
|
164
|
+
pipeline: Pipeline = Pipeline.from_loader(
|
165
|
+
pipe.name, externals=externals
|
166
|
+
)
|
167
|
+
|
168
|
+
# NOTE: Create default list of release datetime.
|
169
|
+
queue[pipe.name]: list[datetime] = []
|
170
|
+
running[pipe.name]: list[datetime] = []
|
171
|
+
|
172
|
+
for on in pipeline.on:
|
173
|
+
on_gen = on.generate(start_date)
|
174
|
+
next_running_date = on_gen.next
|
175
|
+
while next_running_date in queue[pipe.name]:
|
176
|
+
next_running_date = on_gen.next
|
177
|
+
|
178
|
+
heappush(queue[pipe.name], next_running_date)
|
179
|
+
|
180
|
+
pipeline_tasks.append(
|
181
|
+
PipelineTask(
|
182
|
+
pipeline=pipeline,
|
183
|
+
on=on,
|
184
|
+
params=pipe.params,
|
185
|
+
queue=queue,
|
186
|
+
running=running,
|
187
|
+
),
|
188
|
+
)
|
189
|
+
|
190
|
+
return pipeline_tasks
|
49
191
|
|
50
192
|
|
51
193
|
def catch_exceptions(cancel_on_failure=False):
|
52
194
|
"""Catch exception error from scheduler job."""
|
53
195
|
|
54
|
-
def catch_exceptions_decorator(
|
55
|
-
@wraps(
|
196
|
+
def catch_exceptions_decorator(func):
|
197
|
+
@wraps(func)
|
56
198
|
def wrapper(*args, **kwargs):
|
57
199
|
try:
|
58
|
-
return
|
200
|
+
return func(*args, **kwargs)
|
59
201
|
except Exception as err:
|
60
|
-
|
202
|
+
logger.exception(err)
|
61
203
|
if cancel_on_failure:
|
62
204
|
return CancelJob
|
63
205
|
|
@@ -66,136 +208,156 @@ def catch_exceptions(cancel_on_failure=False):
|
|
66
208
|
return catch_exceptions_decorator
|
67
209
|
|
68
210
|
|
69
|
-
|
211
|
+
def catch_exceptions_method(cancel_on_failure=False):
|
212
|
+
"""Catch exception error from scheduler job."""
|
213
|
+
|
214
|
+
def catch_exceptions_decorator(func):
|
215
|
+
@wraps(func)
|
216
|
+
def wrapper(self, *args, **kwargs):
|
217
|
+
try:
|
218
|
+
return func(self, *args, **kwargs)
|
219
|
+
except Exception as err:
|
220
|
+
logger.exception(err)
|
221
|
+
if cancel_on_failure:
|
222
|
+
return CancelJob
|
223
|
+
|
224
|
+
return wrapper
|
225
|
+
|
226
|
+
return catch_exceptions_decorator
|
227
|
+
|
228
|
+
|
229
|
+
@dataclass(frozen=True)
|
70
230
|
class PipelineTask:
|
231
|
+
"""Pipeline task dataclass that use to keep mapping data and objects for
|
232
|
+
passing in multithreading task.
|
233
|
+
"""
|
234
|
+
|
71
235
|
pipeline: Pipeline
|
72
236
|
on: On
|
237
|
+
params: DictData
|
73
238
|
queue: list[datetime]
|
74
239
|
running: list[datetime]
|
75
240
|
|
241
|
+
@catch_exceptions_method(cancel_on_failure=True)
|
242
|
+
def release(self, log: Log | None = None) -> None:
|
243
|
+
"""Pipeline release, it will use with the same logic of
|
244
|
+
`pipeline.release` method.
|
76
245
|
|
77
|
-
|
78
|
-
|
246
|
+
:param log: A log object.
|
247
|
+
"""
|
248
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
249
|
+
log: Log = log or FileLog
|
250
|
+
pipeline: Pipeline = self.pipeline
|
251
|
+
on: On = self.on
|
79
252
|
|
253
|
+
gen: CronRunner = on.generate(
|
254
|
+
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
255
|
+
)
|
256
|
+
cron_tz: ZoneInfo = gen.tz
|
80
257
|
|
81
|
-
|
82
|
-
|
83
|
-
*,
|
84
|
-
log: Log | None = None,
|
85
|
-
) -> None:
|
86
|
-
"""Pipeline release, it will use with the same logic of `pipeline.release`
|
87
|
-
method.
|
258
|
+
# NOTE: get next schedule time that generate from now.
|
259
|
+
next_time: datetime = gen.next
|
88
260
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
on: On = task.on
|
261
|
+
# NOTE: get next utils it does not running.
|
262
|
+
while log.is_pointed(
|
263
|
+
pipeline.name, next_time, queue=self.running[pipeline.name]
|
264
|
+
):
|
265
|
+
next_time: datetime = gen.next
|
95
266
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
267
|
+
logger.debug(
|
268
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
269
|
+
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
270
|
+
)
|
271
|
+
heappush(self.running[pipeline.name], next_time)
|
100
272
|
|
101
|
-
|
102
|
-
|
103
|
-
|
273
|
+
if get_diff_sec(next_time, tz=cron_tz) > 55:
|
274
|
+
logger.debug(
|
275
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} "
|
276
|
+
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
277
|
+
)
|
104
278
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
heappush(task.running[pipeline.name], next_running_time)
|
279
|
+
# NOTE: Add this next running datetime that not in period to queue
|
280
|
+
# and remove it to running.
|
281
|
+
self.running[pipeline.name].remove(next_time)
|
282
|
+
heappush(self.queue[pipeline.name], next_time)
|
110
283
|
|
111
|
-
|
284
|
+
time.sleep(0.2)
|
285
|
+
return
|
112
286
|
|
113
|
-
|
114
|
-
logging.debug(
|
287
|
+
logger.debug(
|
115
288
|
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
116
|
-
f"
|
289
|
+
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
117
290
|
)
|
118
291
|
|
119
|
-
# NOTE:
|
120
|
-
|
121
|
-
|
292
|
+
# NOTE: Release when the time is nearly to schedule time.
|
293
|
+
while (duration := get_diff_sec(next_time, tz=tz)) > (15 + 5):
|
294
|
+
logger.debug(
|
295
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} "
|
296
|
+
f": Sleep until: {duration}"
|
297
|
+
)
|
298
|
+
time.sleep(15)
|
299
|
+
|
122
300
|
time.sleep(0.5)
|
123
|
-
return
|
124
301
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
302
|
+
# NOTE: Release parameter that use to change if params has
|
303
|
+
# templating.
|
304
|
+
release_params: DictData = {
|
305
|
+
"release": {
|
306
|
+
"logical_date": next_time,
|
307
|
+
},
|
308
|
+
}
|
129
309
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
310
|
+
# WARNING: Re-create pipeline object that use new running pipeline
|
311
|
+
# ID.
|
312
|
+
runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
|
313
|
+
rs: Result = runner.execute(
|
314
|
+
params=param2template(self.params, release_params),
|
315
|
+
)
|
316
|
+
logger.debug(
|
317
|
+
f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
318
|
+
f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
135
319
|
)
|
136
|
-
time.sleep(15)
|
137
|
-
|
138
|
-
time.sleep(0.5)
|
139
|
-
|
140
|
-
# NOTE: Release parameter that use to change if params has
|
141
|
-
# templating.
|
142
|
-
release_params: DictData = {
|
143
|
-
"release": {
|
144
|
-
"logical_date": next_running_time,
|
145
|
-
},
|
146
|
-
}
|
147
|
-
|
148
|
-
# WARNING: Re-create pipeline object that use new running pipeline
|
149
|
-
# ID.
|
150
|
-
runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
|
151
|
-
rs: Result = runner.execute(
|
152
|
-
# FIXME: replace fix parameters on this execution process.
|
153
|
-
params=param2template(
|
154
|
-
{"asat-dt": "${{ release.logical_date }}"}, release_params
|
155
|
-
),
|
156
|
-
)
|
157
|
-
logging.debug(
|
158
|
-
f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
159
|
-
f"End release"
|
160
|
-
)
|
161
320
|
|
162
|
-
|
321
|
+
del runner
|
322
|
+
|
323
|
+
# NOTE: Set parent ID on this result.
|
324
|
+
rs.set_parent_run_id(pipeline.run_id)
|
325
|
+
|
326
|
+
# NOTE: Save result to log object saving.
|
327
|
+
rs_log: Log = log.model_validate(
|
328
|
+
{
|
329
|
+
"name": pipeline.name,
|
330
|
+
"on": str(on.cronjob),
|
331
|
+
"release": next_time,
|
332
|
+
"context": rs.context,
|
333
|
+
"parent_run_id": rs.run_id,
|
334
|
+
"run_id": rs.run_id,
|
335
|
+
}
|
336
|
+
)
|
337
|
+
rs_log.save(excluded=None)
|
163
338
|
|
164
|
-
|
165
|
-
|
339
|
+
# NOTE: remove this release date from running
|
340
|
+
self.running[pipeline.name].remove(next_time)
|
166
341
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
future_running_time: datetime = gen.next
|
173
|
-
while (
|
174
|
-
future_running_time in task.running[pipeline.name]
|
175
|
-
or future_running_time in task.queue[pipeline.name]
|
176
|
-
or future_running_time < finish_time
|
177
|
-
):
|
342
|
+
# IMPORTANT:
|
343
|
+
# Add the next running datetime to pipeline queue
|
344
|
+
finish_time: datetime = datetime.now(tz=cron_tz).replace(
|
345
|
+
second=0, microsecond=0
|
346
|
+
)
|
178
347
|
future_running_time: datetime = gen.next
|
348
|
+
while (
|
349
|
+
future_running_time in self.running[pipeline.name]
|
350
|
+
or future_running_time in self.queue[pipeline.name]
|
351
|
+
or future_running_time < finish_time
|
352
|
+
):
|
353
|
+
future_running_time: datetime = gen.next
|
179
354
|
|
180
|
-
|
181
|
-
|
182
|
-
# NOTE: Set parent ID on this result.
|
183
|
-
rs.set_parent_run_id(pipeline.run_id)
|
355
|
+
heappush(self.queue[pipeline.name], future_running_time)
|
356
|
+
logger.debug(f"[CORE]: {'-' * 100}")
|
184
357
|
|
185
|
-
# NOTE: Save result to log object saving.
|
186
|
-
rs_log: Log = log.model_validate(
|
187
|
-
{
|
188
|
-
"name": pipeline.name,
|
189
|
-
"on": str(on.cronjob),
|
190
|
-
"release": next_running_time,
|
191
|
-
"context": rs.context,
|
192
|
-
"parent_run_id": rs.run_id,
|
193
|
-
"run_id": rs.run_id,
|
194
|
-
}
|
195
|
-
)
|
196
|
-
rs_log.save()
|
197
358
|
|
198
|
-
|
359
|
+
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
360
|
+
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
199
361
|
|
200
362
|
|
201
363
|
@catch_exceptions(cancel_on_failure=True)
|
@@ -208,14 +370,20 @@ def workflow_task(
|
|
208
370
|
the threading in background.
|
209
371
|
|
210
372
|
This workflow task will start every minute at :02 second.
|
373
|
+
|
374
|
+
:param pipeline_tasks:
|
375
|
+
:param stop:
|
376
|
+
:param threads:
|
377
|
+
:rtype: CancelJob | None
|
211
378
|
"""
|
379
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
212
380
|
start_date: datetime = datetime.now(tz=tz)
|
213
|
-
start_date_minute = start_date.replace(second=0, microsecond=0)
|
381
|
+
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
214
382
|
|
215
383
|
if start_date > stop:
|
216
|
-
|
384
|
+
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
217
385
|
while len(threads) > 0:
|
218
|
-
|
386
|
+
logger.warning(
|
219
387
|
"[WORKFLOW]: Waiting pipeline release thread that still "
|
220
388
|
"running in background."
|
221
389
|
)
|
@@ -238,7 +406,7 @@ def workflow_task(
|
|
238
406
|
for task in pipeline_tasks:
|
239
407
|
|
240
408
|
# NOTE: Get incoming datetime queue.
|
241
|
-
|
409
|
+
logger.debug(
|
242
410
|
f"[WORKFLOW]: Current queue: {task.pipeline.name!r} : "
|
243
411
|
f"{list(queue2str(task.queue[task.pipeline.name]))}"
|
244
412
|
)
|
@@ -255,15 +423,14 @@ def workflow_task(
|
|
255
423
|
task.on.next(current_running_time)
|
256
424
|
!= task.queue[task.pipeline.name][0]
|
257
425
|
):
|
258
|
-
|
426
|
+
logger.debug(
|
259
427
|
f"[WORKFLOW]: Skip schedule "
|
260
428
|
f"{current_running_time:%Y-%m-%d %H:%M:%S} "
|
261
429
|
f"for : {task.pipeline.name!r} : {task.on.cronjob}"
|
262
430
|
)
|
263
431
|
continue
|
264
432
|
elif len(task.queue[task.pipeline.name]) == 0:
|
265
|
-
|
266
|
-
logging.warning(
|
433
|
+
logger.warning(
|
267
434
|
f"[WORKFLOW]: Queue is empty for : {task.pipeline.name!r} : "
|
268
435
|
f"{task.on.cronjob}"
|
269
436
|
)
|
@@ -272,13 +439,14 @@ def workflow_task(
|
|
272
439
|
# NOTE: Remove this datetime from queue.
|
273
440
|
task.queue[task.pipeline.name].pop(0)
|
274
441
|
|
442
|
+
# NOTE: Create thread name that able to tracking with observe schedule
|
443
|
+
# job.
|
275
444
|
thread_name: str = (
|
276
445
|
f"{task.pipeline.name}|{str(task.on.cronjob)}|"
|
277
446
|
f"{current_running_time:%Y%m%d%H%M}"
|
278
447
|
)
|
279
448
|
pipe_thread: Thread = Thread(
|
280
|
-
target=
|
281
|
-
args=(task,),
|
449
|
+
target=task.release,
|
282
450
|
name=thread_name,
|
283
451
|
daemon=True,
|
284
452
|
)
|
@@ -289,7 +457,7 @@ def workflow_task(
|
|
289
457
|
|
290
458
|
delay()
|
291
459
|
|
292
|
-
|
460
|
+
logger.debug(f"[WORKFLOW]: {'=' * 100}")
|
293
461
|
|
294
462
|
|
295
463
|
def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
@@ -297,8 +465,11 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
|
297
465
|
control.
|
298
466
|
|
299
467
|
:param threads: A mapping of Thread object and its name.
|
468
|
+
:rtype: None
|
300
469
|
"""
|
301
|
-
|
470
|
+
logger.debug(
|
471
|
+
"[MONITOR]: Start checking long running pipeline release task."
|
472
|
+
)
|
302
473
|
snapshot_threads = list(threads.keys())
|
303
474
|
for t_name in snapshot_threads:
|
304
475
|
|
@@ -308,16 +479,18 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
|
308
479
|
|
309
480
|
|
310
481
|
def workflow_control(
|
311
|
-
|
312
|
-
|
482
|
+
schedules: list[str],
|
483
|
+
stop: datetime | None = None,
|
313
484
|
externals: DictData | None = None,
|
314
485
|
) -> list[str]:
|
315
486
|
"""Workflow scheduler control.
|
316
487
|
|
317
|
-
:param
|
318
|
-
:param
|
488
|
+
:param schedules: A list of pipeline names that want to schedule running.
|
489
|
+
:param stop: An datetime value that use to stop running schedule.
|
319
490
|
:param externals: An external parameters that pass to Loader.
|
491
|
+
:rtype: list[str]
|
320
492
|
"""
|
493
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
321
494
|
schedule: Scheduler = Scheduler()
|
322
495
|
start_date: datetime = datetime.now(tz=tz)
|
323
496
|
|
@@ -333,35 +506,28 @@ def workflow_control(
|
|
333
506
|
second=0, microsecond=0
|
334
507
|
)
|
335
508
|
|
336
|
-
# NOTE: Create pair of pipeline and on.
|
509
|
+
# NOTE: Create pair of pipeline and on from schedule model.
|
337
510
|
pipeline_tasks: list[PipelineTask] = []
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
wf_queue[name]: list[datetime] = []
|
344
|
-
wf_running[name]: list[datetime] = []
|
345
|
-
|
346
|
-
for on in pipeline.on:
|
347
|
-
|
348
|
-
on_gen = on.generate(start_date_waiting)
|
349
|
-
next_running_date = on_gen.next
|
350
|
-
while next_running_date in wf_queue[name]:
|
351
|
-
next_running_date = on_gen.next
|
352
|
-
|
353
|
-
heappush(wf_queue[name], next_running_date)
|
354
|
-
pipeline_tasks.append(
|
355
|
-
PipelineTask(
|
356
|
-
pipeline=pipeline, on=on, queue=wf_queue, running=wf_running
|
357
|
-
),
|
358
|
-
)
|
511
|
+
for name in schedules:
|
512
|
+
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
513
|
+
pipeline_tasks.extend(
|
514
|
+
sch.tasks(start_date_waiting, wf_queue, wf_running, externals)
|
515
|
+
)
|
359
516
|
|
360
517
|
# NOTE: This schedule job will start every minute at :02 seconds.
|
361
518
|
schedule.every(1).minutes.at(":02").do(
|
362
519
|
workflow_task,
|
363
520
|
pipeline_tasks=pipeline_tasks,
|
364
|
-
stop=
|
521
|
+
stop=stop
|
522
|
+
or (
|
523
|
+
start_date
|
524
|
+
+ timedelta(
|
525
|
+
**json.loads(
|
526
|
+
os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
|
527
|
+
or '{"minutes": 5, "seconds": 20}'
|
528
|
+
)
|
529
|
+
)
|
530
|
+
),
|
365
531
|
threads=thread_releases,
|
366
532
|
).tag("control")
|
367
533
|
|
@@ -372,37 +538,42 @@ def workflow_control(
|
|
372
538
|
).tag("monitor")
|
373
539
|
|
374
540
|
# NOTE: Start running schedule
|
375
|
-
|
541
|
+
logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
|
376
542
|
while True:
|
377
543
|
schedule.run_pending()
|
378
544
|
time.sleep(1)
|
379
545
|
if not schedule.get_jobs("control"):
|
380
546
|
schedule.clear("monitor")
|
381
|
-
|
547
|
+
logger.warning(
|
382
548
|
f"[WORKFLOW]: Pipeline release thread: {thread_releases}"
|
383
549
|
)
|
384
|
-
|
550
|
+
logger.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
|
385
551
|
break
|
386
552
|
|
387
|
-
|
388
|
-
|
389
|
-
|
553
|
+
logger.warning(
|
554
|
+
f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
|
555
|
+
)
|
556
|
+
logger.warning(
|
557
|
+
f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
|
558
|
+
)
|
559
|
+
return schedules
|
390
560
|
|
391
561
|
|
392
562
|
def workflow(
|
393
|
-
|
563
|
+
stop: datetime | None = None,
|
394
564
|
externals: DictData | None = None,
|
395
565
|
excluded: list[str] | None = None,
|
396
|
-
):
|
566
|
+
) -> list[str]:
|
397
567
|
"""Workflow application that running multiprocessing schedule with chunk of
|
398
568
|
pipelines that exists in config path.
|
399
569
|
|
400
|
-
:param
|
570
|
+
:param stop:
|
401
571
|
:param excluded:
|
402
572
|
:param externals:
|
573
|
+
:rtype: list[str]
|
403
574
|
|
404
575
|
This function will get all pipelines that include on value that was
|
405
|
-
created in config path and chuck it with
|
576
|
+
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
406
577
|
value to multiprocess executor pool.
|
407
578
|
|
408
579
|
The current workflow logic:
|
@@ -417,36 +588,33 @@ def workflow(
|
|
417
588
|
pipeline task 02 02
|
418
589
|
==> ...
|
419
590
|
"""
|
420
|
-
excluded: list = excluded or []
|
591
|
+
excluded: list[str] = excluded or []
|
421
592
|
|
422
|
-
with ProcessPoolExecutor(
|
593
|
+
with ProcessPoolExecutor(
|
594
|
+
max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
|
595
|
+
) as executor:
|
423
596
|
futures: list[Future] = [
|
424
597
|
executor.submit(
|
425
598
|
workflow_control,
|
426
|
-
|
427
|
-
|
599
|
+
schedules=[load[0] for load in loader],
|
600
|
+
stop=stop,
|
428
601
|
externals=(externals or {}),
|
429
602
|
)
|
430
603
|
for loader in batch(
|
431
|
-
|
432
|
-
|
433
|
-
("pipe-scheduling", None),
|
434
|
-
# ("pipe-scheduling-minute", None),
|
435
|
-
],
|
436
|
-
n=1,
|
604
|
+
Loader.finds(Schedule, excluded=excluded),
|
605
|
+
n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
|
437
606
|
)
|
438
607
|
]
|
439
608
|
|
440
609
|
results: list[str] = []
|
441
610
|
for future in as_completed(futures):
|
442
611
|
if err := future.exception():
|
443
|
-
|
612
|
+
logger.error(str(err))
|
444
613
|
raise WorkflowException(str(err)) from err
|
445
614
|
results.extend(future.result(timeout=1))
|
446
615
|
return results
|
447
616
|
|
448
617
|
|
449
618
|
if __name__ == "__main__":
|
450
|
-
# TODO: Define input arguments that want to manage this application.
|
451
619
|
workflow_rs: list[str] = workflow()
|
452
|
-
|
620
|
+
logger.info(f"Application run success: {workflow_rs}")
|