ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -2
- ddeutil/workflow/api.py +99 -31
- ddeutil/workflow/cli.py +105 -22
- ddeutil/workflow/cron.py +116 -26
- ddeutil/workflow/exceptions.py +8 -5
- ddeutil/workflow/job.py +572 -0
- ddeutil/workflow/log.py +73 -66
- ddeutil/workflow/on.py +10 -4
- ddeutil/workflow/repeat.py +68 -39
- ddeutil/workflow/route.py +194 -44
- ddeutil/workflow/scheduler.py +1020 -229
- ddeutil/workflow/stage.py +27 -23
- ddeutil/workflow/utils.py +145 -9
- ddeutil_workflow-0.0.11.dist-info/METADATA +178 -0
- ddeutil_workflow-0.0.11.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.11.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/loader.py +0 -132
- ddeutil/workflow/pipeline.py +0 -1142
- ddeutil_workflow-0.0.9.dist-info/METADATA +0 -273
- ddeutil_workflow-0.0.9.dist-info/RECORD +0 -22
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +0 -2
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -5,59 +5,815 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import copy
|
9
|
+
import json
|
8
10
|
import logging
|
9
11
|
import os
|
10
12
|
import time
|
11
13
|
from collections.abc import Iterator
|
12
|
-
from concurrent.futures import
|
13
|
-
|
14
|
+
from concurrent.futures import (
|
15
|
+
Future,
|
16
|
+
ProcessPoolExecutor,
|
17
|
+
ThreadPoolExecutor,
|
18
|
+
as_completed,
|
19
|
+
)
|
20
|
+
from dataclasses import dataclass, field
|
14
21
|
from datetime import datetime, timedelta
|
15
22
|
from functools import wraps
|
16
23
|
from heapq import heappush
|
24
|
+
from queue import Queue
|
25
|
+
from textwrap import dedent
|
17
26
|
from threading import Thread
|
27
|
+
from typing import Optional
|
18
28
|
from zoneinfo import ZoneInfo
|
19
29
|
|
20
|
-
from
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
|
25
|
-
|
26
|
-
from
|
30
|
+
from dotenv import load_dotenv
|
31
|
+
from pydantic import BaseModel, Field
|
32
|
+
from pydantic.functional_validators import field_validator, model_validator
|
33
|
+
from typing_extensions import Self
|
34
|
+
|
35
|
+
try:
|
36
|
+
from schedule import CancelJob
|
37
|
+
except ImportError:
|
38
|
+
CancelJob = None
|
39
|
+
|
40
|
+
from .__types import DictData
|
41
|
+
from .cron import CronRunner
|
42
|
+
from .exceptions import JobException, WorkflowException
|
43
|
+
from .job import Job
|
44
|
+
from .log import FileLog, Log, get_logger
|
45
|
+
from .on import On
|
46
|
+
from .utils import (
|
47
|
+
Loader,
|
48
|
+
Param,
|
27
49
|
Result,
|
28
50
|
batch,
|
29
51
|
delay,
|
52
|
+
gen_id,
|
30
53
|
get_diff_sec,
|
54
|
+
has_template,
|
31
55
|
param2template,
|
32
56
|
)
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
load_dotenv("../../../.env")
|
37
|
-
logging.basicConfig(
|
38
|
-
level=logging.DEBUG,
|
39
|
-
format=(
|
40
|
-
"%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, %(thread)-5d) "
|
41
|
-
"[%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
|
42
|
-
),
|
43
|
-
handlers=[logging.StreamHandler()],
|
44
|
-
datefmt="%Y-%m-%d %H:%M:%S",
|
45
|
-
)
|
57
|
+
|
58
|
+
load_dotenv()
|
59
|
+
logger = get_logger("ddeutil.workflow")
|
46
60
|
logging.getLogger("schedule").setLevel(logging.INFO)
|
47
61
|
|
48
|
-
|
62
|
+
|
63
|
+
__all__ = (
|
64
|
+
"Workflow",
|
65
|
+
"WorkflowSchedule",
|
66
|
+
"WorkflowTask",
|
67
|
+
"Schedule",
|
68
|
+
"workflow_runner",
|
69
|
+
"workflow_task",
|
70
|
+
)
|
71
|
+
|
72
|
+
|
73
|
+
class Workflow(BaseModel):
|
74
|
+
"""Workflow Model this is the main future of this project because it use to
|
75
|
+
be workflow data for running everywhere that you want or using it to
|
76
|
+
scheduler task in background. It use lightweight coding line from Pydantic
|
77
|
+
Model and enhance execute method on it.
|
78
|
+
"""
|
79
|
+
|
80
|
+
name: str = Field(description="A workflow name.")
|
81
|
+
desc: Optional[str] = Field(
|
82
|
+
default=None,
|
83
|
+
description=(
|
84
|
+
"A workflow description that can be string of markdown content."
|
85
|
+
),
|
86
|
+
)
|
87
|
+
params: dict[str, Param] = Field(
|
88
|
+
default_factory=dict,
|
89
|
+
description="A parameters that want to use on this workflow.",
|
90
|
+
)
|
91
|
+
on: list[On] = Field(
|
92
|
+
default_factory=list,
|
93
|
+
description="A list of On instance for this workflow schedule.",
|
94
|
+
)
|
95
|
+
jobs: dict[str, Job] = Field(
|
96
|
+
default_factory=dict,
|
97
|
+
description="A mapping of job ID and job model that already loaded.",
|
98
|
+
)
|
99
|
+
run_id: Optional[str] = Field(
|
100
|
+
default=None,
|
101
|
+
description="A running workflow ID.",
|
102
|
+
repr=False,
|
103
|
+
exclude=True,
|
104
|
+
)
|
105
|
+
|
106
|
+
@property
|
107
|
+
def new_run_id(self) -> str:
|
108
|
+
"""Running ID of this workflow that always generate new unique value."""
|
109
|
+
return gen_id(self.name, unique=True)
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def from_loader(
|
113
|
+
cls,
|
114
|
+
name: str,
|
115
|
+
externals: DictData | None = None,
|
116
|
+
) -> Self:
|
117
|
+
"""Create Workflow instance from the Loader object that only receive
|
118
|
+
an input workflow name. The loader object will use this workflow name to
|
119
|
+
searching configuration data of this workflow model in conf path.
|
120
|
+
|
121
|
+
:param name: A workflow name that want to pass to Loader object.
|
122
|
+
:param externals: An external parameters that want to pass to Loader
|
123
|
+
object.
|
124
|
+
:rtype: Self
|
125
|
+
"""
|
126
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
127
|
+
|
128
|
+
# NOTE: Validate the config type match with current connection model
|
129
|
+
if loader.type != cls:
|
130
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
131
|
+
|
132
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
133
|
+
|
134
|
+
# NOTE: Add name to loader data
|
135
|
+
loader_data["name"] = name.replace(" ", "_")
|
136
|
+
|
137
|
+
# NOTE: Prepare `on` data
|
138
|
+
cls.__bypass_on(loader_data)
|
139
|
+
return cls.model_validate(obj=loader_data)
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
143
|
+
"""Bypass the on data to loaded config data."""
|
144
|
+
if on := data.pop("on", []):
|
145
|
+
if isinstance(on, str):
|
146
|
+
on = [on]
|
147
|
+
if any(not isinstance(i, (dict, str)) for i in on):
|
148
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
149
|
+
|
150
|
+
# NOTE: Pass on value to Loader and keep on model object to on field
|
151
|
+
data["on"] = [
|
152
|
+
(
|
153
|
+
Loader(n, externals=(externals or {})).data
|
154
|
+
if isinstance(n, str)
|
155
|
+
else n
|
156
|
+
)
|
157
|
+
for n in on
|
158
|
+
]
|
159
|
+
return data
|
160
|
+
|
161
|
+
@model_validator(mode="before")
|
162
|
+
def __prepare_params(cls, values: DictData) -> DictData:
|
163
|
+
"""Prepare the params key."""
|
164
|
+
# NOTE: Prepare params type if it passing with only type value.
|
165
|
+
if params := values.pop("params", {}):
|
166
|
+
values["params"] = {
|
167
|
+
p: (
|
168
|
+
{"type": params[p]}
|
169
|
+
if isinstance(params[p], str)
|
170
|
+
else params[p]
|
171
|
+
)
|
172
|
+
for p in params
|
173
|
+
}
|
174
|
+
return values
|
175
|
+
|
176
|
+
@field_validator("desc", mode="after")
|
177
|
+
def ___prepare_desc(cls, value: str) -> str:
|
178
|
+
"""Prepare description string that was created on a template."""
|
179
|
+
return dedent(value)
|
180
|
+
|
181
|
+
@model_validator(mode="after")
|
182
|
+
def __validate_jobs_need_and_prepare_running_id(self):
|
183
|
+
"""Validate each need job in any jobs should exists."""
|
184
|
+
for job in self.jobs:
|
185
|
+
if not_exist := [
|
186
|
+
need for need in self.jobs[job].needs if need not in self.jobs
|
187
|
+
]:
|
188
|
+
raise WorkflowException(
|
189
|
+
f"This needed jobs: {not_exist} do not exist in this "
|
190
|
+
f"workflow, {self.name!r}"
|
191
|
+
)
|
192
|
+
|
193
|
+
# NOTE: update a job id with its job id from workflow template
|
194
|
+
self.jobs[job].id = job
|
195
|
+
|
196
|
+
if self.run_id is None:
|
197
|
+
self.run_id = self.new_run_id
|
198
|
+
|
199
|
+
# VALIDATE: Validate workflow name should not dynamic with params
|
200
|
+
# template.
|
201
|
+
if has_template(self.name):
|
202
|
+
raise ValueError(
|
203
|
+
f"Workflow name should not has any template, please check, "
|
204
|
+
f"{self.name!r}."
|
205
|
+
)
|
206
|
+
|
207
|
+
return self
|
208
|
+
|
209
|
+
def get_running_id(self, run_id: str) -> Self:
|
210
|
+
"""Return Workflow model object that changing workflow running ID with
|
211
|
+
an input running ID.
|
212
|
+
|
213
|
+
:param run_id: A replace workflow running ID.
|
214
|
+
:rtype: Self
|
215
|
+
"""
|
216
|
+
return self.model_copy(update={"run_id": run_id})
|
217
|
+
|
218
|
+
def job(self, name: str) -> Job:
|
219
|
+
"""Return Job model that exists on this workflow.
|
220
|
+
|
221
|
+
:param name: A job name that want to get from a mapping of job models.
|
222
|
+
:type name: str
|
223
|
+
|
224
|
+
:rtype: Job
|
225
|
+
:returns: A job model that exists on this workflow by input name.
|
226
|
+
"""
|
227
|
+
if name not in self.jobs:
|
228
|
+
raise ValueError(
|
229
|
+
f"A Job {name!r} does not exists in this workflow, "
|
230
|
+
f"{self.name!r}"
|
231
|
+
)
|
232
|
+
return self.jobs[name]
|
233
|
+
|
234
|
+
def parameterize(self, params: DictData) -> DictData:
|
235
|
+
"""Prepare parameters before passing to execution process. This method
|
236
|
+
will create jobs key to params mapping that will keep any result from
|
237
|
+
job execution.
|
238
|
+
|
239
|
+
:param params: A parameter mapping that receive from workflow execution.
|
240
|
+
:rtype: DictData
|
241
|
+
"""
|
242
|
+
# VALIDATE: Incoming params should have keys that set on this workflow.
|
243
|
+
if check_key := tuple(
|
244
|
+
f"{k!r}"
|
245
|
+
for k in self.params
|
246
|
+
if (k not in params and self.params[k].required)
|
247
|
+
):
|
248
|
+
raise WorkflowException(
|
249
|
+
f"Required Param on this workflow setting does not set: "
|
250
|
+
f"{', '.join(check_key)}."
|
251
|
+
)
|
252
|
+
|
253
|
+
# NOTE: mapping type of param before adding it to params variable.
|
254
|
+
return {
|
255
|
+
"params": (
|
256
|
+
params
|
257
|
+
| {
|
258
|
+
k: self.params[k].receive(params[k])
|
259
|
+
for k in params
|
260
|
+
if k in self.params
|
261
|
+
}
|
262
|
+
),
|
263
|
+
"jobs": {},
|
264
|
+
}
|
265
|
+
|
266
|
+
def release(
|
267
|
+
self,
|
268
|
+
on: On,
|
269
|
+
params: DictData,
|
270
|
+
queue: list[datetime],
|
271
|
+
*,
|
272
|
+
waiting_sec: int = 60,
|
273
|
+
sleep_interval: int = 15,
|
274
|
+
log: Log = None,
|
275
|
+
) -> Result:
|
276
|
+
"""Start running workflow with the on schedule in period of 30 minutes.
|
277
|
+
That mean it will still running at background 30 minutes until the
|
278
|
+
schedule matching with its time.
|
279
|
+
|
280
|
+
This method allow workflow use log object to save the execution
|
281
|
+
result to log destination like file log to local `/logs` directory.
|
282
|
+
|
283
|
+
:param on: An on schedule value.
|
284
|
+
:param params: A workflow parameter that pass to execute method.
|
285
|
+
:param queue: A list of release time that already running.
|
286
|
+
:param waiting_sec: A second period value that allow workflow execute.
|
287
|
+
:param sleep_interval: A second value that want to waiting until time
|
288
|
+
to execute.
|
289
|
+
:param log: A log object that want to save execution result.
|
290
|
+
:rtype: Result
|
291
|
+
"""
|
292
|
+
logger.debug(
|
293
|
+
f"({self.run_id}) [CORE]: {self.name!r}: {on.cronjob} : run with "
|
294
|
+
f"queue id: {id(queue)}"
|
295
|
+
)
|
296
|
+
log: Log = log or FileLog
|
297
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
298
|
+
gen: CronRunner = on.generate(
|
299
|
+
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
300
|
+
+ timedelta(seconds=1)
|
301
|
+
)
|
302
|
+
cron_tz: ZoneInfo = gen.tz
|
303
|
+
|
304
|
+
# NOTE: get next schedule time that generate from now.
|
305
|
+
next_time: datetime = gen.next
|
306
|
+
|
307
|
+
# NOTE: get next utils it does not logger.
|
308
|
+
while log.is_pointed(self.name, next_time, queue=queue):
|
309
|
+
next_time: datetime = gen.next
|
310
|
+
|
311
|
+
# NOTE: push this next running time to log queue
|
312
|
+
heappush(queue, next_time)
|
313
|
+
|
314
|
+
# VALIDATE: Check the different time between the next schedule time and
|
315
|
+
# now that less than waiting period (second unit).
|
316
|
+
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
317
|
+
logger.debug(
|
318
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
319
|
+
f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
320
|
+
)
|
321
|
+
|
322
|
+
# NOTE: Remove next datetime from queue.
|
323
|
+
queue.remove(next_time)
|
324
|
+
|
325
|
+
time.sleep(0.15)
|
326
|
+
return Result(
|
327
|
+
status=0,
|
328
|
+
context={
|
329
|
+
"params": params,
|
330
|
+
"poking": {"skipped": [str(on.cronjob)], "run": []},
|
331
|
+
},
|
332
|
+
)
|
333
|
+
|
334
|
+
logger.debug(
|
335
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
336
|
+
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
337
|
+
)
|
338
|
+
|
339
|
+
# NOTE: Release when the time is nearly to schedule time.
|
340
|
+
while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
|
341
|
+
sleep_interval + 5
|
342
|
+
):
|
343
|
+
logger.debug(
|
344
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
345
|
+
f"Sleep until: {duration}"
|
346
|
+
)
|
347
|
+
time.sleep(sleep_interval)
|
348
|
+
|
349
|
+
time.sleep(0.5)
|
350
|
+
|
351
|
+
# NOTE: Release parameter that use to change if params has
|
352
|
+
# templating.
|
353
|
+
release_params: DictData = {
|
354
|
+
"release": {
|
355
|
+
"logical_date": next_time,
|
356
|
+
},
|
357
|
+
}
|
358
|
+
|
359
|
+
# WARNING: Re-create workflow object that use new running workflow
|
360
|
+
# ID.
|
361
|
+
runner: Self = self.get_running_id(run_id=self.new_run_id)
|
362
|
+
rs: Result = runner.execute(
|
363
|
+
params=param2template(params, release_params),
|
364
|
+
)
|
365
|
+
logger.debug(
|
366
|
+
f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
367
|
+
f"End release {next_time:%Y-%m-%d %H:%M:%S}"
|
368
|
+
)
|
369
|
+
|
370
|
+
# NOTE: Delete a copied workflow instance for saving memory.
|
371
|
+
del runner
|
372
|
+
|
373
|
+
rs.set_parent_run_id(self.run_id)
|
374
|
+
rs_log: Log = log.model_validate(
|
375
|
+
{
|
376
|
+
"name": self.name,
|
377
|
+
"on": str(on.cronjob),
|
378
|
+
"release": next_time,
|
379
|
+
"context": rs.context,
|
380
|
+
"parent_run_id": rs.run_id,
|
381
|
+
"run_id": rs.run_id,
|
382
|
+
}
|
383
|
+
)
|
384
|
+
# NOTE: Saving execution result to destination of the input log object.
|
385
|
+
rs_log.save(excluded=None)
|
386
|
+
|
387
|
+
queue.remove(next_time)
|
388
|
+
time.sleep(0.05)
|
389
|
+
return Result(
|
390
|
+
status=0,
|
391
|
+
context={
|
392
|
+
"params": params,
|
393
|
+
"poking": {"skipped": [], "run": [str(on.cronjob)]},
|
394
|
+
},
|
395
|
+
)
|
396
|
+
|
397
|
+
def poke(
|
398
|
+
self,
|
399
|
+
params: DictData | None = None,
|
400
|
+
*,
|
401
|
+
log: Log | None = None,
|
402
|
+
) -> list[Result]:
|
403
|
+
"""Poke workflow with threading executor pool for executing with all its
|
404
|
+
schedules that was set on the `on` value. This method will observe its
|
405
|
+
schedule that nearing to run with the ``self.release()`` method.
|
406
|
+
|
407
|
+
:param params: A parameters that want to pass to the release method.
|
408
|
+
:param log: A log object that want to use on this poking process.
|
409
|
+
:rtype: list[Result]
|
410
|
+
"""
|
411
|
+
logger.info(
|
412
|
+
f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
|
413
|
+
)
|
414
|
+
|
415
|
+
# NOTE: If this workflow does not set the on schedule, it will return
|
416
|
+
# empty result.
|
417
|
+
if len(self.on) == 0:
|
418
|
+
return []
|
419
|
+
|
420
|
+
params: DictData = params or {}
|
421
|
+
queue: list[datetime] = []
|
422
|
+
results: list[Result] = []
|
423
|
+
|
424
|
+
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
|
425
|
+
with ThreadPoolExecutor(max_workers=worker) as executor:
|
426
|
+
# TODO: If I want to run infinite loop.
|
427
|
+
futures: list[Future] = []
|
428
|
+
for on in self.on:
|
429
|
+
futures.append(
|
430
|
+
executor.submit(
|
431
|
+
self.release,
|
432
|
+
on,
|
433
|
+
params=params,
|
434
|
+
log=log,
|
435
|
+
queue=queue,
|
436
|
+
)
|
437
|
+
)
|
438
|
+
delay(second=0.15)
|
439
|
+
|
440
|
+
# WARNING: This poking method does not allow to use fail-fast logic
|
441
|
+
# to catching parallel execution result.
|
442
|
+
for future in as_completed(futures):
|
443
|
+
results.append(future.result(timeout=60))
|
444
|
+
|
445
|
+
if len(queue) > 0:
|
446
|
+
logger.error(
|
447
|
+
f"({self.run_id}) [POKING]: Log Queue does empty when poking "
|
448
|
+
f"process was finishing."
|
449
|
+
)
|
450
|
+
|
451
|
+
return results
|
452
|
+
|
453
|
+
def execute_job(
|
454
|
+
self,
|
455
|
+
job: str,
|
456
|
+
params: DictData,
|
457
|
+
) -> Result:
|
458
|
+
"""Job Executor that use on workflow executor.
|
459
|
+
|
460
|
+
:param job: A job ID that want to execute.
|
461
|
+
:param params: A params that was parameterized from workflow execution.
|
462
|
+
:rtype: Result
|
463
|
+
"""
|
464
|
+
# VALIDATE: check a job ID that exists in this workflow or not.
|
465
|
+
if job not in self.jobs:
|
466
|
+
raise WorkflowException(
|
467
|
+
f"The job ID: {job} does not exists on {self.name!r} workflow."
|
468
|
+
)
|
469
|
+
try:
|
470
|
+
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job!r}")
|
471
|
+
|
472
|
+
# IMPORTANT:
|
473
|
+
# Change any job running IDs to this workflow running ID.
|
474
|
+
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
475
|
+
j_rs: Result = job_obj.execute(params=params)
|
476
|
+
|
477
|
+
except JobException as err:
|
478
|
+
raise WorkflowException(f"{job}: JobException: {err}") from None
|
479
|
+
|
480
|
+
return Result(
|
481
|
+
status=j_rs.status,
|
482
|
+
context={job: job_obj.set_outputs(j_rs.context)},
|
483
|
+
)
|
484
|
+
|
485
|
+
def execute(
|
486
|
+
self,
|
487
|
+
params: DictData | None = None,
|
488
|
+
*,
|
489
|
+
timeout: int = 60,
|
490
|
+
) -> Result:
|
491
|
+
"""Execute workflow with passing dynamic parameters to any jobs that
|
492
|
+
included in the workflow.
|
493
|
+
|
494
|
+
:param params: An input parameters that use on workflow execution that
|
495
|
+
will parameterize before using it.
|
496
|
+
:param timeout: A workflow execution time out in second unit that use
|
497
|
+
for limit time of execution and waiting job dependency.
|
498
|
+
:rtype: Result
|
499
|
+
|
500
|
+
See Also:
|
501
|
+
---
|
502
|
+
|
503
|
+
The result of execution process for each jobs and stages on this
|
504
|
+
workflow will keeping in dict which able to catch out with all jobs and
|
505
|
+
stages by dot annotation.
|
506
|
+
|
507
|
+
For example, when I want to use the output from previous stage, I
|
508
|
+
can access it with syntax:
|
509
|
+
|
510
|
+
... ${job-name}.stages.${stage-id}.outputs.${key}
|
511
|
+
|
512
|
+
"""
|
513
|
+
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
514
|
+
params: DictData = params or {}
|
515
|
+
ts: float = time.monotonic()
|
516
|
+
|
517
|
+
# NOTE: It should not do anything if it does not have job.
|
518
|
+
if not self.jobs:
|
519
|
+
logger.warning(
|
520
|
+
f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
521
|
+
f"does not have any jobs"
|
522
|
+
)
|
523
|
+
return Result(status=0, context=params)
|
524
|
+
|
525
|
+
# NOTE: Create a job queue that keep the job that want to running after
|
526
|
+
# it dependency condition.
|
527
|
+
jq: Queue = Queue()
|
528
|
+
for job_id in self.jobs:
|
529
|
+
jq.put(job_id)
|
530
|
+
|
531
|
+
# NOTE: Create result context that will pass this context to any
|
532
|
+
# execution dependency.
|
533
|
+
context: DictData = self.parameterize(params)
|
534
|
+
try:
|
535
|
+
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
|
536
|
+
(
|
537
|
+
self.__exec_non_threading(context, ts, jq, timeout=timeout)
|
538
|
+
if worker == 1
|
539
|
+
else self.__exec_threading(
|
540
|
+
context, ts, jq, worker=worker, timeout=timeout
|
541
|
+
)
|
542
|
+
)
|
543
|
+
return Result(status=0, context=context)
|
544
|
+
except WorkflowException as err:
|
545
|
+
context.update(
|
546
|
+
{"error_message": f"{err.__class__.__name__}: {err}"}
|
547
|
+
)
|
548
|
+
return Result(status=1, context=context)
|
549
|
+
|
550
|
+
def __exec_threading(
|
551
|
+
self,
|
552
|
+
context: DictData,
|
553
|
+
ts: float,
|
554
|
+
job_queue: Queue,
|
555
|
+
*,
|
556
|
+
worker: int = 2,
|
557
|
+
timeout: int = 600,
|
558
|
+
) -> DictData:
|
559
|
+
"""Workflow threading execution.
|
560
|
+
|
561
|
+
:param context: A context workflow data that want to downstream passing.
|
562
|
+
:param ts: A start timestamp that use for checking execute time should
|
563
|
+
timeout.
|
564
|
+
:param timeout: A second value unit that bounding running time.
|
565
|
+
:param worker: A number of threading executor pool size.
|
566
|
+
:rtype: DictData
|
567
|
+
"""
|
568
|
+
not_time_out_flag: bool = True
|
569
|
+
logger.debug(
|
570
|
+
f"({self.run_id}): [CORE]: Run {self.name} with threading job "
|
571
|
+
f"executor"
|
572
|
+
)
|
573
|
+
|
574
|
+
# IMPORTANT: The job execution can run parallel and waiting by
|
575
|
+
# needed.
|
576
|
+
with ThreadPoolExecutor(max_workers=worker) as executor:
|
577
|
+
futures: list[Future] = []
|
578
|
+
|
579
|
+
while not job_queue.empty() and (
|
580
|
+
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
581
|
+
):
|
582
|
+
job_id: str = job_queue.get()
|
583
|
+
job: Job = self.jobs[job_id]
|
584
|
+
|
585
|
+
if any(need not in context["jobs"] for need in job.needs):
|
586
|
+
job_queue.put(job_id)
|
587
|
+
time.sleep(0.25)
|
588
|
+
continue
|
589
|
+
|
590
|
+
futures.append(
|
591
|
+
executor.submit(
|
592
|
+
self.execute_job,
|
593
|
+
job_id,
|
594
|
+
params=copy.deepcopy(context),
|
595
|
+
),
|
596
|
+
)
|
597
|
+
job_queue.task_done()
|
598
|
+
|
599
|
+
# NOTE: Wait for all items to finish processing
|
600
|
+
job_queue.join()
|
601
|
+
|
602
|
+
for future in as_completed(futures):
|
603
|
+
if err := future.exception():
|
604
|
+
logger.error(f"{err}")
|
605
|
+
raise WorkflowException(f"{err}")
|
606
|
+
|
607
|
+
# NOTE: Update job result to workflow result.
|
608
|
+
context["jobs"].update(future.result(timeout=20).conext)
|
609
|
+
|
610
|
+
if not_time_out_flag:
|
611
|
+
return context
|
612
|
+
|
613
|
+
# NOTE: Raise timeout error.
|
614
|
+
logger.warning(
|
615
|
+
f"({self.run_id}) [WORKFLOW]: Execution of workflow, {self.name!r} "
|
616
|
+
f", was timeout"
|
617
|
+
)
|
618
|
+
raise WorkflowException(
|
619
|
+
f"Execution of workflow: {self.name} was timeout"
|
620
|
+
)
|
621
|
+
|
622
|
+
def __exec_non_threading(
|
623
|
+
self,
|
624
|
+
context: DictData,
|
625
|
+
ts: float,
|
626
|
+
job_queue: Queue,
|
627
|
+
*,
|
628
|
+
timeout: int = 600,
|
629
|
+
) -> DictData:
|
630
|
+
"""Workflow non-threading execution that use sequential job running
|
631
|
+
and waiting previous run successful.
|
632
|
+
|
633
|
+
:param context: A context workflow data that want to downstream passing.
|
634
|
+
:param ts: A start timestamp that use for checking execute time should
|
635
|
+
timeout.
|
636
|
+
:param timeout: A second value unit that bounding running time.
|
637
|
+
:rtype: DictData
|
638
|
+
"""
|
639
|
+
not_time_out_flag: bool = True
|
640
|
+
logger.debug(
|
641
|
+
f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
|
642
|
+
f"executor"
|
643
|
+
)
|
644
|
+
|
645
|
+
while not job_queue.empty() and (
|
646
|
+
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
647
|
+
):
|
648
|
+
job_id: str = job_queue.get()
|
649
|
+
job: Job = self.jobs[job_id]
|
650
|
+
|
651
|
+
# NOTE:
|
652
|
+
if any(need not in context["jobs"] for need in job.needs):
|
653
|
+
job_queue.put(job_id)
|
654
|
+
time.sleep(0.25)
|
655
|
+
continue
|
656
|
+
|
657
|
+
# NOTE: Start job execution.
|
658
|
+
job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
|
659
|
+
context["jobs"].update(job_rs.context)
|
660
|
+
job_queue.task_done()
|
661
|
+
|
662
|
+
# NOTE: Wait for all items to finish processing
|
663
|
+
job_queue.join()
|
664
|
+
|
665
|
+
if not_time_out_flag:
|
666
|
+
return context
|
667
|
+
|
668
|
+
# NOTE: Raise timeout error.
|
669
|
+
logger.warning(
|
670
|
+
f"({self.run_id}) [WORKFLOW]: Execution of workflow was timeout"
|
671
|
+
)
|
672
|
+
raise WorkflowException(
|
673
|
+
f"Execution of workflow: {self.name} was timeout"
|
674
|
+
)
|
675
|
+
|
676
|
+
|
677
|
+
class WorkflowSchedule(BaseModel):
|
678
|
+
"""Workflow schedule Pydantic Model."""
|
679
|
+
|
680
|
+
name: str = Field(description="A workflow name.")
|
681
|
+
on: list[On] = Field(
|
682
|
+
default_factory=list,
|
683
|
+
description="An override On instance value.",
|
684
|
+
)
|
685
|
+
params: DictData = Field(
|
686
|
+
default_factory=dict,
|
687
|
+
description="A parameters that want to use to workflow execution.",
|
688
|
+
)
|
689
|
+
|
690
|
+
@model_validator(mode="before")
|
691
|
+
def __prepare__values(cls, values: DictData) -> DictData:
|
692
|
+
"""Prepare incoming values before validating with model fields."""
|
693
|
+
|
694
|
+
values["name"] = values["name"].replace(" ", "_")
|
695
|
+
|
696
|
+
cls.__bypass_on(values)
|
697
|
+
return values
|
698
|
+
|
699
|
+
@classmethod
|
700
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
701
|
+
"""Bypass the on data to loaded config data."""
|
702
|
+
if on := data.pop("on", []):
|
703
|
+
|
704
|
+
if isinstance(on, str):
|
705
|
+
on = [on]
|
706
|
+
|
707
|
+
if any(not isinstance(n, (dict, str)) for n in on):
|
708
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
709
|
+
|
710
|
+
# NOTE: Pass on value to Loader and keep on model object to on field
|
711
|
+
data["on"] = [
|
712
|
+
(
|
713
|
+
Loader(n, externals=(externals or {})).data
|
714
|
+
if isinstance(n, str)
|
715
|
+
else n
|
716
|
+
)
|
717
|
+
for n in on
|
718
|
+
]
|
719
|
+
return data
|
720
|
+
|
721
|
+
|
722
|
+
class Schedule(BaseModel):
|
723
|
+
"""Schedule Pydantic Model that use to run with scheduler package. It does
|
724
|
+
not equal the on value in Workflow model but it use same logic to running
|
725
|
+
release date with crontab interval.
|
726
|
+
"""
|
727
|
+
|
728
|
+
desc: Optional[str] = Field(
|
729
|
+
default=None,
|
730
|
+
description=(
|
731
|
+
"A schedule description that can be string of markdown content."
|
732
|
+
),
|
733
|
+
)
|
734
|
+
workflows: list[WorkflowSchedule] = Field(
|
735
|
+
default_factory=list,
|
736
|
+
description="A list of WorkflowSchedule models.",
|
737
|
+
)
|
738
|
+
|
739
|
+
@classmethod
|
740
|
+
def from_loader(
|
741
|
+
cls,
|
742
|
+
name: str,
|
743
|
+
externals: DictData | None = None,
|
744
|
+
) -> Self:
|
745
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
746
|
+
|
747
|
+
# NOTE: Validate the config type match with current connection model
|
748
|
+
if loader.type != cls:
|
749
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
750
|
+
|
751
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
752
|
+
|
753
|
+
# NOTE: Add name to loader data
|
754
|
+
loader_data["name"] = name.replace(" ", "_")
|
755
|
+
|
756
|
+
return cls.model_validate(obj=loader_data)
|
757
|
+
|
758
|
+
def tasks(
|
759
|
+
self,
|
760
|
+
start_date: datetime,
|
761
|
+
queue: dict[str, list[datetime]],
|
762
|
+
running: dict[str, list[datetime]],
|
763
|
+
*,
|
764
|
+
externals: DictData | None = None,
|
765
|
+
) -> list[WorkflowTask]:
|
766
|
+
"""Generate Task from the current datetime.
|
767
|
+
|
768
|
+
:param start_date: A start date that get from the workflow schedule.
|
769
|
+
:param queue: A mapping of name and list of datetime for queue.
|
770
|
+
:param running: A mapping of name and list of datetime for running.
|
771
|
+
:param externals: An external parameters that pass to the Loader object.
|
772
|
+
:rtype: list[WorkflowTask]
|
773
|
+
"""
|
774
|
+
|
775
|
+
# NOTE: Create pair of workflow and on.
|
776
|
+
workflow_tasks: list[WorkflowTask] = []
|
777
|
+
externals: DictData = externals or {}
|
778
|
+
|
779
|
+
for wfs in self.workflows:
|
780
|
+
wf: Workflow = Workflow.from_loader(wfs.name, externals=externals)
|
781
|
+
|
782
|
+
# NOTE: Create default list of release datetime.
|
783
|
+
queue[wfs.name]: list[datetime] = []
|
784
|
+
running[wfs.name]: list[datetime] = []
|
785
|
+
|
786
|
+
for on in wf.on:
|
787
|
+
on_gen = on.generate(start_date)
|
788
|
+
next_running_date = on_gen.next
|
789
|
+
while next_running_date in queue[wfs.name]:
|
790
|
+
next_running_date = on_gen.next
|
791
|
+
|
792
|
+
heappush(queue[wfs.name], next_running_date)
|
793
|
+
|
794
|
+
workflow_tasks.append(
|
795
|
+
WorkflowTask(
|
796
|
+
workflow=wf,
|
797
|
+
on=on,
|
798
|
+
params=wfs.params,
|
799
|
+
queue=queue,
|
800
|
+
running=running,
|
801
|
+
),
|
802
|
+
)
|
803
|
+
|
804
|
+
return workflow_tasks
|
49
805
|
|
50
806
|
|
51
807
|
def catch_exceptions(cancel_on_failure=False):
|
52
808
|
"""Catch exception error from scheduler job."""
|
53
809
|
|
54
|
-
def catch_exceptions_decorator(
|
55
|
-
@wraps(
|
810
|
+
def catch_exceptions_decorator(func):
|
811
|
+
@wraps(func)
|
56
812
|
def wrapper(*args, **kwargs):
|
57
813
|
try:
|
58
|
-
return
|
814
|
+
return func(*args, **kwargs)
|
59
815
|
except Exception as err:
|
60
|
-
|
816
|
+
logger.exception(err)
|
61
817
|
if cancel_on_failure:
|
62
818
|
return CancelJob
|
63
819
|
|
@@ -66,157 +822,188 @@ def catch_exceptions(cancel_on_failure=False):
|
|
66
822
|
return catch_exceptions_decorator
|
67
823
|
|
68
824
|
|
69
|
-
|
70
|
-
|
71
|
-
pipeline: Pipeline
|
72
|
-
on: On
|
73
|
-
queue: list[datetime]
|
74
|
-
running: list[datetime]
|
825
|
+
def catch_exceptions_method(cancel_on_failure=False):
|
826
|
+
"""Catch exception error from scheduler job."""
|
75
827
|
|
828
|
+
def catch_exceptions_decorator(func):
|
829
|
+
@wraps(func)
|
830
|
+
def wrapper(self, *args, **kwargs):
|
831
|
+
try:
|
832
|
+
return func(self, *args, **kwargs)
|
833
|
+
except Exception as err:
|
834
|
+
logger.exception(err)
|
835
|
+
if cancel_on_failure:
|
836
|
+
return CancelJob
|
76
837
|
|
77
|
-
|
78
|
-
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
838
|
+
return wrapper
|
79
839
|
|
840
|
+
return catch_exceptions_decorator
|
80
841
|
|
81
|
-
def pipeline_release(
|
82
|
-
task: PipelineTask,
|
83
|
-
*,
|
84
|
-
log: Log | None = None,
|
85
|
-
) -> None:
|
86
|
-
"""Pipeline release, it will use with the same logic of `pipeline.release`
|
87
|
-
method.
|
88
842
|
|
89
|
-
|
90
|
-
|
843
|
+
@dataclass(frozen=True)
|
844
|
+
class WorkflowTask:
|
845
|
+
"""Workflow task dataclass that use to keep mapping data and objects for
|
846
|
+
passing in multithreading task.
|
91
847
|
"""
|
92
|
-
log: Log = log or FileLog
|
93
|
-
pipeline: Pipeline = task.pipeline
|
94
|
-
on: On = task.on
|
95
848
|
|
96
|
-
|
97
|
-
|
98
|
-
)
|
99
|
-
|
849
|
+
workflow: Workflow
|
850
|
+
on: On
|
851
|
+
params: DictData = field(compare=False, hash=False)
|
852
|
+
queue: list[datetime] = field(compare=False, hash=False)
|
853
|
+
running: list[datetime] = field(compare=False, hash=False)
|
854
|
+
|
855
|
+
@catch_exceptions_method(cancel_on_failure=True)
|
856
|
+
def release(self, log: Log | None = None) -> None:
|
857
|
+
"""Workflow release, it will use with the same logic of
|
858
|
+
`workflow.release` method.
|
859
|
+
|
860
|
+
:param log: A log object.
|
861
|
+
"""
|
862
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
863
|
+
log: Log = log or FileLog
|
864
|
+
wf: Workflow = self.workflow
|
865
|
+
on: On = self.on
|
866
|
+
|
867
|
+
gen: CronRunner = on.generate(
|
868
|
+
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
869
|
+
)
|
870
|
+
cron_tz: ZoneInfo = gen.tz
|
100
871
|
|
101
|
-
|
102
|
-
|
103
|
-
next_running_time: datetime = gen.next
|
872
|
+
# NOTE: get next schedule time that generate from now.
|
873
|
+
next_time: datetime = gen.next
|
104
874
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
)
|
109
|
-
heappush(task.running[pipeline.name], next_running_time)
|
875
|
+
# NOTE: get next utils it does not running.
|
876
|
+
while log.is_pointed(wf.name, next_time, queue=self.running[wf.name]):
|
877
|
+
next_time: datetime = gen.next
|
110
878
|
|
111
|
-
|
879
|
+
logger.debug(
|
880
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
881
|
+
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
882
|
+
)
|
883
|
+
heappush(self.running[wf.name], next_time)
|
112
884
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
885
|
+
if get_diff_sec(next_time, tz=cron_tz) > 55:
|
886
|
+
logger.debug(
|
887
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
888
|
+
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
889
|
+
)
|
890
|
+
|
891
|
+
# NOTE: Add this next running datetime that not in period to queue
|
892
|
+
# and remove it to running.
|
893
|
+
self.running[wf.name].remove(next_time)
|
894
|
+
heappush(self.queue[wf.name], next_time)
|
895
|
+
|
896
|
+
time.sleep(0.2)
|
897
|
+
return
|
898
|
+
|
899
|
+
logger.debug(
|
900
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
901
|
+
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
117
902
|
)
|
118
903
|
|
119
|
-
# NOTE:
|
120
|
-
|
121
|
-
|
904
|
+
# NOTE: Release when the time is nearly to schedule time.
|
905
|
+
while (duration := get_diff_sec(next_time, tz=tz)) > (15 + 5):
|
906
|
+
logger.debug(
|
907
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
908
|
+
f": Sleep until: {duration}"
|
909
|
+
)
|
910
|
+
time.sleep(15)
|
911
|
+
|
122
912
|
time.sleep(0.5)
|
123
|
-
return
|
124
913
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
914
|
+
# NOTE: Release parameter that use to change if params has
|
915
|
+
# templating.
|
916
|
+
release_params: DictData = {
|
917
|
+
"release": {
|
918
|
+
"logical_date": next_time,
|
919
|
+
},
|
920
|
+
}
|
129
921
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
922
|
+
# WARNING: Re-create workflow object that use new running workflow
|
923
|
+
# ID.
|
924
|
+
runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
|
925
|
+
rs: Result = runner.execute(
|
926
|
+
params=param2template(self.params, release_params),
|
927
|
+
)
|
928
|
+
logger.debug(
|
929
|
+
f"({runner.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
930
|
+
f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
135
931
|
)
|
136
|
-
time.sleep(15)
|
137
|
-
|
138
|
-
time.sleep(0.5)
|
139
|
-
|
140
|
-
# NOTE: Release parameter that use to change if params has
|
141
|
-
# templating.
|
142
|
-
release_params: DictData = {
|
143
|
-
"release": {
|
144
|
-
"logical_date": next_running_time,
|
145
|
-
},
|
146
|
-
}
|
147
|
-
|
148
|
-
# WARNING: Re-create pipeline object that use new running pipeline
|
149
|
-
# ID.
|
150
|
-
runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
|
151
|
-
rs: Result = runner.execute(
|
152
|
-
# FIXME: replace fix parameters on this execution process.
|
153
|
-
params=param2template(
|
154
|
-
{"asat-dt": "${{ release.logical_date }}"}, release_params
|
155
|
-
),
|
156
|
-
)
|
157
|
-
logging.debug(
|
158
|
-
f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
159
|
-
f"End release"
|
160
|
-
)
|
161
932
|
|
162
|
-
|
933
|
+
del runner
|
934
|
+
|
935
|
+
# NOTE: Set parent ID on this result.
|
936
|
+
rs.set_parent_run_id(wf.run_id)
|
937
|
+
|
938
|
+
# NOTE: Save result to log object saving.
|
939
|
+
rs_log: Log = log.model_validate(
|
940
|
+
{
|
941
|
+
"name": wf.name,
|
942
|
+
"on": str(on.cronjob),
|
943
|
+
"release": next_time,
|
944
|
+
"context": rs.context,
|
945
|
+
"parent_run_id": rs.run_id,
|
946
|
+
"run_id": rs.run_id,
|
947
|
+
}
|
948
|
+
)
|
949
|
+
rs_log.save(excluded=None)
|
163
950
|
|
164
|
-
|
165
|
-
|
951
|
+
# NOTE: remove this release date from running
|
952
|
+
self.running[wf.name].remove(next_time)
|
166
953
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
future_running_time: datetime = gen.next
|
173
|
-
while (
|
174
|
-
future_running_time in task.running[pipeline.name]
|
175
|
-
or future_running_time in task.queue[pipeline.name]
|
176
|
-
or future_running_time < finish_time
|
177
|
-
):
|
954
|
+
# IMPORTANT:
|
955
|
+
# Add the next running datetime to workflow queue
|
956
|
+
finish_time: datetime = datetime.now(tz=cron_tz).replace(
|
957
|
+
second=0, microsecond=0
|
958
|
+
)
|
178
959
|
future_running_time: datetime = gen.next
|
960
|
+
while (
|
961
|
+
future_running_time in self.running[wf.name]
|
962
|
+
or future_running_time in self.queue[wf.name]
|
963
|
+
or future_running_time < finish_time
|
964
|
+
):
|
965
|
+
future_running_time: datetime = gen.next
|
179
966
|
|
180
|
-
|
967
|
+
heappush(self.queue[wf.name], future_running_time)
|
968
|
+
logger.debug(f"[CORE]: {'-' * 100}")
|
181
969
|
|
182
|
-
|
183
|
-
|
970
|
+
def __eq__(self, other):
|
971
|
+
if isinstance(other, WorkflowTask):
|
972
|
+
return (
|
973
|
+
self.workflow.name == other.workflow.name
|
974
|
+
and self.on.cronjob == other.on.cronjob
|
975
|
+
)
|
184
976
|
|
185
|
-
# NOTE: Save result to log object saving.
|
186
|
-
rs_log: Log = log.model_validate(
|
187
|
-
{
|
188
|
-
"name": pipeline.name,
|
189
|
-
"on": str(on.cronjob),
|
190
|
-
"release": next_running_time,
|
191
|
-
"context": rs.context,
|
192
|
-
"parent_run_id": rs.run_id,
|
193
|
-
"run_id": rs.run_id,
|
194
|
-
}
|
195
|
-
)
|
196
|
-
rs_log.save()
|
197
977
|
|
198
|
-
|
978
|
+
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
979
|
+
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
199
980
|
|
200
981
|
|
201
982
|
@catch_exceptions(cancel_on_failure=True)
|
202
983
|
def workflow_task(
|
203
|
-
|
984
|
+
workflow_tasks: list[WorkflowTask],
|
204
985
|
stop: datetime,
|
205
986
|
threads: dict[str, Thread],
|
206
987
|
) -> CancelJob | None:
|
207
|
-
"""Workflow task generator that create release pair of
|
988
|
+
"""Workflow task generator that create release pair of workflow and on to
|
208
989
|
the threading in background.
|
209
990
|
|
210
991
|
This workflow task will start every minute at :02 second.
|
992
|
+
|
993
|
+
:param workflow_tasks:
|
994
|
+
:param stop:
|
995
|
+
:param threads:
|
996
|
+
:rtype: CancelJob | None
|
211
997
|
"""
|
998
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
212
999
|
start_date: datetime = datetime.now(tz=tz)
|
213
|
-
start_date_minute = start_date.replace(second=0, microsecond=0)
|
1000
|
+
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
214
1001
|
|
215
|
-
if start_date > stop:
|
216
|
-
|
1002
|
+
if start_date > stop.replace(tzinfo=tz):
|
1003
|
+
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
217
1004
|
while len(threads) > 0:
|
218
|
-
|
219
|
-
"[WORKFLOW]: Waiting
|
1005
|
+
logger.warning(
|
1006
|
+
"[WORKFLOW]: Waiting workflow release thread that still "
|
220
1007
|
"running in background."
|
221
1008
|
)
|
222
1009
|
time.sleep(15)
|
@@ -224,72 +1011,72 @@ def workflow_task(
|
|
224
1011
|
return CancelJob
|
225
1012
|
|
226
1013
|
# IMPORTANT:
|
227
|
-
# Filter
|
1014
|
+
# Filter workflow & on that should to run with `workflow_release`
|
228
1015
|
# function. It will deplicate running with different schedule value
|
229
1016
|
# because I use current time in this condition.
|
230
1017
|
#
|
231
|
-
# For example, if a
|
1018
|
+
# For example, if a workflow A queue has '00:02:00' time that
|
232
1019
|
# should to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
|
233
1020
|
# This condition will release with 2 threading job.
|
234
1021
|
#
|
235
1022
|
# '00:02:00' --> '*/2 * * * *' --> running
|
236
1023
|
# --> '*/35 * * * *' --> skip
|
237
1024
|
#
|
238
|
-
for task in
|
1025
|
+
for task in workflow_tasks:
|
239
1026
|
|
240
1027
|
# NOTE: Get incoming datetime queue.
|
241
|
-
|
242
|
-
f"[WORKFLOW]: Current queue: {task.
|
243
|
-
f"{list(queue2str(task.queue[task.
|
1028
|
+
logger.debug(
|
1029
|
+
f"[WORKFLOW]: Current queue: {task.workflow.name!r} : "
|
1030
|
+
f"{list(queue2str(task.queue[task.workflow.name]))}"
|
244
1031
|
)
|
245
1032
|
|
246
1033
|
# NOTE: Create minute unit value for any scheduler datetime that
|
247
|
-
# checking a
|
1034
|
+
# checking a workflow task should run in this datetime.
|
248
1035
|
current_running_time: datetime = start_date_minute.astimezone(
|
249
1036
|
tz=ZoneInfo(task.on.tz)
|
250
1037
|
)
|
251
1038
|
if (
|
252
|
-
len(task.queue[task.
|
253
|
-
and current_running_time != task.queue[task.
|
1039
|
+
len(task.queue[task.workflow.name]) > 0
|
1040
|
+
and current_running_time != task.queue[task.workflow.name][0]
|
254
1041
|
) or (
|
255
1042
|
task.on.next(current_running_time)
|
256
|
-
!= task.queue[task.
|
1043
|
+
!= task.queue[task.workflow.name][0]
|
257
1044
|
):
|
258
|
-
|
1045
|
+
logger.debug(
|
259
1046
|
f"[WORKFLOW]: Skip schedule "
|
260
1047
|
f"{current_running_time:%Y-%m-%d %H:%M:%S} "
|
261
|
-
f"for : {task.
|
1048
|
+
f"for : {task.workflow.name!r} : {task.on.cronjob}"
|
262
1049
|
)
|
263
1050
|
continue
|
264
|
-
elif len(task.queue[task.
|
265
|
-
|
266
|
-
|
267
|
-
f"[WORKFLOW]: Queue is empty for : {task.pipeline.name!r} : "
|
1051
|
+
elif len(task.queue[task.workflow.name]) == 0:
|
1052
|
+
logger.warning(
|
1053
|
+
f"[WORKFLOW]: Queue is empty for : {task.workflow.name!r} : "
|
268
1054
|
f"{task.on.cronjob}"
|
269
1055
|
)
|
270
1056
|
continue
|
271
1057
|
|
272
1058
|
# NOTE: Remove this datetime from queue.
|
273
|
-
task.queue[task.
|
1059
|
+
task.queue[task.workflow.name].pop(0)
|
274
1060
|
|
1061
|
+
# NOTE: Create thread name that able to tracking with observe schedule
|
1062
|
+
# job.
|
275
1063
|
thread_name: str = (
|
276
|
-
f"{task.
|
1064
|
+
f"{task.workflow.name}|{str(task.on.cronjob)}|"
|
277
1065
|
f"{current_running_time:%Y%m%d%H%M}"
|
278
1066
|
)
|
279
|
-
|
280
|
-
target=
|
281
|
-
args=(task,),
|
1067
|
+
wf_thread: Thread = Thread(
|
1068
|
+
target=task.release,
|
282
1069
|
name=thread_name,
|
283
1070
|
daemon=True,
|
284
1071
|
)
|
285
1072
|
|
286
|
-
threads[thread_name] =
|
1073
|
+
threads[thread_name] = wf_thread
|
287
1074
|
|
288
|
-
|
1075
|
+
wf_thread.start()
|
289
1076
|
|
290
1077
|
delay()
|
291
1078
|
|
292
|
-
|
1079
|
+
logger.debug(f"[WORKFLOW]: {'=' * 100}")
|
293
1080
|
|
294
1081
|
|
295
1082
|
def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
@@ -297,8 +1084,11 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
|
297
1084
|
control.
|
298
1085
|
|
299
1086
|
:param threads: A mapping of Thread object and its name.
|
1087
|
+
:rtype: None
|
300
1088
|
"""
|
301
|
-
|
1089
|
+
logger.debug(
|
1090
|
+
"[MONITOR]: Start checking long running workflow release task."
|
1091
|
+
)
|
302
1092
|
snapshot_threads = list(threads.keys())
|
303
1093
|
for t_name in snapshot_threads:
|
304
1094
|
|
@@ -308,22 +1098,31 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
|
308
1098
|
|
309
1099
|
|
310
1100
|
def workflow_control(
|
311
|
-
|
312
|
-
|
1101
|
+
schedules: list[str],
|
1102
|
+
stop: datetime | None = None,
|
313
1103
|
externals: DictData | None = None,
|
314
1104
|
) -> list[str]:
|
315
1105
|
"""Workflow scheduler control.
|
316
1106
|
|
317
|
-
:param
|
318
|
-
:param
|
1107
|
+
:param schedules: A list of workflow names that want to schedule running.
|
1108
|
+
:param stop: An datetime value that use to stop running schedule.
|
319
1109
|
:param externals: An external parameters that pass to Loader.
|
1110
|
+
:rtype: list[str]
|
320
1111
|
"""
|
1112
|
+
try:
|
1113
|
+
from schedule import Scheduler
|
1114
|
+
except ImportError:
|
1115
|
+
raise ImportError(
|
1116
|
+
"Should install schedule package before use this module."
|
1117
|
+
) from None
|
1118
|
+
|
1119
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
321
1120
|
schedule: Scheduler = Scheduler()
|
322
1121
|
start_date: datetime = datetime.now(tz=tz)
|
323
1122
|
|
324
1123
|
# NOTE: Design workflow queue caching.
|
325
1124
|
# ---
|
326
|
-
# {"
|
1125
|
+
# {"workflow-name": [<release-datetime>, <release-datetime>, ...]}
|
327
1126
|
#
|
328
1127
|
wf_queue: dict[str, list[datetime]] = {}
|
329
1128
|
wf_running: dict[str, list[datetime]] = {}
|
@@ -333,35 +1132,30 @@ def workflow_control(
|
|
333
1132
|
second=0, microsecond=0
|
334
1133
|
)
|
335
1134
|
|
336
|
-
# NOTE: Create pair of
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
for on in pipeline.on:
|
347
|
-
|
348
|
-
on_gen = on.generate(start_date_waiting)
|
349
|
-
next_running_date = on_gen.next
|
350
|
-
while next_running_date in wf_queue[name]:
|
351
|
-
next_running_date = on_gen.next
|
352
|
-
|
353
|
-
heappush(wf_queue[name], next_running_date)
|
354
|
-
pipeline_tasks.append(
|
355
|
-
PipelineTask(
|
356
|
-
pipeline=pipeline, on=on, queue=wf_queue, running=wf_running
|
357
|
-
),
|
358
|
-
)
|
1135
|
+
# NOTE: Create pair of workflow and on from schedule model.
|
1136
|
+
workflow_tasks: list[WorkflowTask] = []
|
1137
|
+
for name in schedules:
|
1138
|
+
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
1139
|
+
workflow_tasks.extend(
|
1140
|
+
sch.tasks(
|
1141
|
+
start_date_waiting, wf_queue, wf_running, externals=externals
|
1142
|
+
),
|
1143
|
+
)
|
359
1144
|
|
360
1145
|
# NOTE: This schedule job will start every minute at :02 seconds.
|
361
1146
|
schedule.every(1).minutes.at(":02").do(
|
362
1147
|
workflow_task,
|
363
|
-
|
364
|
-
stop=
|
1148
|
+
workflow_tasks=workflow_tasks,
|
1149
|
+
stop=stop
|
1150
|
+
or (
|
1151
|
+
start_date
|
1152
|
+
+ timedelta(
|
1153
|
+
**json.loads(
|
1154
|
+
os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
|
1155
|
+
or '{"minutes": 5, "seconds": 20}'
|
1156
|
+
)
|
1157
|
+
)
|
1158
|
+
),
|
365
1159
|
threads=thread_releases,
|
366
1160
|
).tag("control")
|
367
1161
|
|
@@ -372,81 +1166,78 @@ def workflow_control(
|
|
372
1166
|
).tag("monitor")
|
373
1167
|
|
374
1168
|
# NOTE: Start running schedule
|
375
|
-
|
1169
|
+
logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
|
376
1170
|
while True:
|
377
1171
|
schedule.run_pending()
|
378
1172
|
time.sleep(1)
|
379
1173
|
if not schedule.get_jobs("control"):
|
380
1174
|
schedule.clear("monitor")
|
381
|
-
|
382
|
-
f"[WORKFLOW]:
|
1175
|
+
logger.warning(
|
1176
|
+
f"[WORKFLOW]: Workflow release thread: {thread_releases}"
|
383
1177
|
)
|
384
|
-
|
1178
|
+
logger.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
|
385
1179
|
break
|
386
1180
|
|
387
|
-
|
388
|
-
|
389
|
-
|
1181
|
+
logger.warning(
|
1182
|
+
f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
|
1183
|
+
)
|
1184
|
+
logger.warning(
|
1185
|
+
f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
|
1186
|
+
)
|
1187
|
+
return schedules
|
390
1188
|
|
391
1189
|
|
392
|
-
def
|
393
|
-
|
1190
|
+
def workflow_runner(
|
1191
|
+
stop: datetime | None = None,
|
394
1192
|
externals: DictData | None = None,
|
395
1193
|
excluded: list[str] | None = None,
|
396
|
-
):
|
1194
|
+
) -> list[str]:
|
397
1195
|
"""Workflow application that running multiprocessing schedule with chunk of
|
398
|
-
|
1196
|
+
workflows that exists in config path.
|
399
1197
|
|
400
|
-
:param
|
1198
|
+
:param stop:
|
401
1199
|
:param excluded:
|
402
1200
|
:param externals:
|
1201
|
+
:rtype: list[str]
|
403
1202
|
|
404
|
-
This function will get all
|
405
|
-
created in config path and chuck it with
|
1203
|
+
This function will get all workflows that include on value that was
|
1204
|
+
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
406
1205
|
value to multiprocess executor pool.
|
407
1206
|
|
408
1207
|
The current workflow logic:
|
409
1208
|
---
|
410
1209
|
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
411
|
-
|
1210
|
+
workflow task 01 01
|
412
1211
|
--> thread of release
|
413
|
-
|
1212
|
+
workflow task 01 02
|
414
1213
|
==> process 02 ==> schedule 1 minute --> thread of release
|
415
|
-
|
1214
|
+
workflow task 02 01
|
416
1215
|
--> thread of release
|
417
|
-
|
1216
|
+
workflow task 02 02
|
418
1217
|
==> ...
|
419
1218
|
"""
|
420
|
-
excluded: list = excluded or []
|
1219
|
+
excluded: list[str] = excluded or []
|
421
1220
|
|
422
|
-
with ProcessPoolExecutor(
|
1221
|
+
with ProcessPoolExecutor(
|
1222
|
+
max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
|
1223
|
+
) as executor:
|
423
1224
|
futures: list[Future] = [
|
424
1225
|
executor.submit(
|
425
1226
|
workflow_control,
|
426
|
-
|
427
|
-
|
1227
|
+
schedules=[load[0] for load in loader],
|
1228
|
+
stop=stop,
|
428
1229
|
externals=(externals or {}),
|
429
1230
|
)
|
430
1231
|
for loader in batch(
|
431
|
-
|
432
|
-
|
433
|
-
("pipe-scheduling", None),
|
434
|
-
# ("pipe-scheduling-minute", None),
|
435
|
-
],
|
436
|
-
n=1,
|
1232
|
+
Loader.finds(Schedule, excluded=excluded),
|
1233
|
+
n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
|
437
1234
|
)
|
438
1235
|
]
|
439
1236
|
|
440
1237
|
results: list[str] = []
|
441
1238
|
for future in as_completed(futures):
|
442
1239
|
if err := future.exception():
|
443
|
-
|
1240
|
+
logger.error(str(err))
|
444
1241
|
raise WorkflowException(str(err)) from err
|
445
1242
|
results.extend(future.result(timeout=1))
|
446
1243
|
return results
|
447
|
-
|
448
|
-
|
449
|
-
if __name__ == "__main__":
|
450
|
-
# TODO: Define input arguments that want to manage this application.
|
451
|
-
workflow_rs: list[str] = workflow()
|
452
|
-
logging.info(f"Application run success: {workflow_rs}")
|