ddeutil-workflow 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__cron.py +29 -2
- ddeutil/workflow/__init__.py +9 -4
- ddeutil/workflow/conf.py +49 -40
- ddeutil/workflow/exceptions.py +4 -0
- ddeutil/workflow/job.py +58 -45
- ddeutil/workflow/on.py +4 -2
- ddeutil/workflow/scheduler.py +117 -947
- ddeutil/workflow/stage.py +92 -66
- ddeutil/workflow/utils.py +61 -43
- ddeutil/workflow/workflow.py +1084 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.20.dist-info}/METADATA +12 -12
- ddeutil_workflow-0.0.20.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.20.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.18.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.20.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.20.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.20.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -27,21 +27,15 @@ import time
|
|
27
27
|
from concurrent.futures import (
|
28
28
|
Future,
|
29
29
|
ProcessPoolExecutor,
|
30
|
-
ThreadPoolExecutor,
|
31
30
|
as_completed,
|
32
31
|
)
|
33
|
-
from dataclasses import field
|
34
32
|
from datetime import datetime, timedelta
|
35
33
|
from functools import wraps
|
36
|
-
from heapq import heappush
|
37
|
-
from queue import Queue
|
38
34
|
from textwrap import dedent
|
39
35
|
from threading import Thread
|
40
36
|
from typing import Callable, Optional
|
41
|
-
from zoneinfo import ZoneInfo
|
42
37
|
|
43
38
|
from pydantic import BaseModel, Field
|
44
|
-
from pydantic.dataclasses import dataclass
|
45
39
|
from pydantic.functional_validators import field_validator, model_validator
|
46
40
|
from typing_extensions import Self
|
47
41
|
|
@@ -57,21 +51,15 @@ except ImportError: # pragma: no cov
|
|
57
51
|
|
58
52
|
from .__cron import CronRunner
|
59
53
|
from .__types import DictData, TupleStr
|
60
|
-
from .conf import
|
61
|
-
from .exceptions import
|
62
|
-
from .job import Job
|
54
|
+
from .conf import Loader, config, get_logger
|
55
|
+
from .exceptions import WorkflowException
|
63
56
|
from .on import On
|
64
57
|
from .utils import (
|
65
|
-
Param,
|
66
|
-
Result,
|
67
58
|
batch,
|
68
59
|
delay,
|
69
|
-
gen_id,
|
70
|
-
get_diff_sec,
|
71
|
-
has_template,
|
72
|
-
param2template,
|
73
60
|
queue2str,
|
74
61
|
)
|
62
|
+
from .workflow import Workflow, WorkflowTaskData
|
75
63
|
|
76
64
|
P = ParamSpec("P")
|
77
65
|
logger = get_logger("ddeutil.workflow")
|
@@ -81,717 +69,15 @@ logging.getLogger("schedule").setLevel(logging.INFO)
|
|
81
69
|
|
82
70
|
|
83
71
|
__all__: TupleStr = (
|
84
|
-
"Workflow",
|
85
|
-
"WorkflowTaskData",
|
86
72
|
"Schedule",
|
87
73
|
"ScheduleWorkflow",
|
88
|
-
"
|
74
|
+
"workflow_task_release",
|
89
75
|
"workflow_monitor",
|
90
76
|
"workflow_control",
|
91
77
|
"workflow_runner",
|
92
78
|
)
|
93
79
|
|
94
80
|
|
95
|
-
class Workflow(BaseModel):
|
96
|
-
"""Workflow Pydantic Model this is the main future of this project because
|
97
|
-
it use to be workflow data for running everywhere that you want or using it
|
98
|
-
to scheduler task in background. It use lightweight coding line from
|
99
|
-
Pydantic Model and enhance execute method on it.
|
100
|
-
"""
|
101
|
-
|
102
|
-
name: str = Field(description="A workflow name.")
|
103
|
-
desc: Optional[str] = Field(
|
104
|
-
default=None,
|
105
|
-
description=(
|
106
|
-
"A workflow description that can be string of markdown content."
|
107
|
-
),
|
108
|
-
)
|
109
|
-
params: dict[str, Param] = Field(
|
110
|
-
default_factory=dict,
|
111
|
-
description="A parameters that need to use on this workflow.",
|
112
|
-
)
|
113
|
-
on: list[On] = Field(
|
114
|
-
default_factory=list,
|
115
|
-
description="A list of On instance for this workflow schedule.",
|
116
|
-
)
|
117
|
-
jobs: dict[str, Job] = Field(
|
118
|
-
default_factory=dict,
|
119
|
-
description="A mapping of job ID and job model that already loaded.",
|
120
|
-
)
|
121
|
-
run_id: Optional[str] = Field(
|
122
|
-
default=None,
|
123
|
-
description=(
|
124
|
-
"A running workflow ID that is able to change after initialize."
|
125
|
-
),
|
126
|
-
repr=False,
|
127
|
-
exclude=True,
|
128
|
-
)
|
129
|
-
|
130
|
-
@property
|
131
|
-
def new_run_id(self) -> str:
|
132
|
-
"""Running ID of this workflow that always generate new unique value.
|
133
|
-
|
134
|
-
:rtype: str
|
135
|
-
"""
|
136
|
-
return gen_id(self.name, unique=True)
|
137
|
-
|
138
|
-
@classmethod
|
139
|
-
def from_loader(
|
140
|
-
cls,
|
141
|
-
name: str,
|
142
|
-
externals: DictData | None = None,
|
143
|
-
) -> Self:
|
144
|
-
"""Create Workflow instance from the Loader object that only receive
|
145
|
-
an input workflow name. The loader object will use this workflow name to
|
146
|
-
searching configuration data of this workflow model in conf path.
|
147
|
-
|
148
|
-
:param name: A workflow name that want to pass to Loader object.
|
149
|
-
:param externals: An external parameters that want to pass to Loader
|
150
|
-
object.
|
151
|
-
:rtype: Self
|
152
|
-
"""
|
153
|
-
loader: Loader = Loader(name, externals=(externals or {}))
|
154
|
-
|
155
|
-
# NOTE: Validate the config type match with current connection model
|
156
|
-
if loader.type != cls:
|
157
|
-
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
158
|
-
|
159
|
-
loader_data: DictData = copy.deepcopy(loader.data)
|
160
|
-
|
161
|
-
# NOTE: Add name to loader data
|
162
|
-
loader_data["name"] = name.replace(" ", "_")
|
163
|
-
|
164
|
-
# NOTE: Prepare `on` data
|
165
|
-
cls.__bypass_on(loader_data)
|
166
|
-
return cls.model_validate(obj=loader_data)
|
167
|
-
|
168
|
-
@classmethod
|
169
|
-
def __bypass_on(
|
170
|
-
cls,
|
171
|
-
data: DictData,
|
172
|
-
externals: DictData | None = None,
|
173
|
-
) -> DictData:
|
174
|
-
"""Bypass the on data to loaded config data.
|
175
|
-
|
176
|
-
:param data:
|
177
|
-
:param externals:
|
178
|
-
:rtype: DictData
|
179
|
-
"""
|
180
|
-
if on := data.pop("on", []):
|
181
|
-
if isinstance(on, str):
|
182
|
-
on = [on]
|
183
|
-
if any(not isinstance(i, (dict, str)) for i in on):
|
184
|
-
raise TypeError("The ``on`` key should be list of str or dict")
|
185
|
-
|
186
|
-
# NOTE: Pass on value to Loader and keep on model object to on field
|
187
|
-
data["on"] = [
|
188
|
-
(
|
189
|
-
Loader(n, externals=(externals or {})).data
|
190
|
-
if isinstance(n, str)
|
191
|
-
else n
|
192
|
-
)
|
193
|
-
for n in on
|
194
|
-
]
|
195
|
-
return data
|
196
|
-
|
197
|
-
@model_validator(mode="before")
|
198
|
-
def __prepare_model_before__(cls, values: DictData) -> DictData:
|
199
|
-
"""Prepare the params key."""
|
200
|
-
# NOTE: Prepare params type if it passing with only type value.
|
201
|
-
if params := values.pop("params", {}):
|
202
|
-
values["params"] = {
|
203
|
-
p: (
|
204
|
-
{"type": params[p]}
|
205
|
-
if isinstance(params[p], str)
|
206
|
-
else params[p]
|
207
|
-
)
|
208
|
-
for p in params
|
209
|
-
}
|
210
|
-
return values
|
211
|
-
|
212
|
-
@field_validator("desc", mode="after")
|
213
|
-
def __dedent_desc__(cls, value: str) -> str:
|
214
|
-
"""Prepare description string that was created on a template.
|
215
|
-
|
216
|
-
:param value: A description string value that want to dedent.
|
217
|
-
:rtype: str
|
218
|
-
"""
|
219
|
-
return dedent(value)
|
220
|
-
|
221
|
-
@model_validator(mode="after")
|
222
|
-
def __validate_jobs_need_and_prepare_running_id(self) -> Self:
|
223
|
-
"""Validate each need job in any jobs should exists.
|
224
|
-
|
225
|
-
:rtype: Self
|
226
|
-
"""
|
227
|
-
for job in self.jobs:
|
228
|
-
if not_exist := [
|
229
|
-
need for need in self.jobs[job].needs if need not in self.jobs
|
230
|
-
]:
|
231
|
-
raise WorkflowException(
|
232
|
-
f"The needed jobs: {not_exist} do not found in "
|
233
|
-
f"{self.name!r}."
|
234
|
-
)
|
235
|
-
|
236
|
-
# NOTE: update a job id with its job id from workflow template
|
237
|
-
self.jobs[job].id = job
|
238
|
-
|
239
|
-
if self.run_id is None:
|
240
|
-
self.run_id = self.new_run_id
|
241
|
-
|
242
|
-
# VALIDATE: Validate workflow name should not dynamic with params
|
243
|
-
# template.
|
244
|
-
if has_template(self.name):
|
245
|
-
raise ValueError(
|
246
|
-
f"Workflow name should not has any template, please check, "
|
247
|
-
f"{self.name!r}."
|
248
|
-
)
|
249
|
-
|
250
|
-
return self
|
251
|
-
|
252
|
-
def get_running_id(self, run_id: str) -> Self:
|
253
|
-
"""Return Workflow model object that changing workflow running ID with
|
254
|
-
an input running ID.
|
255
|
-
|
256
|
-
:param run_id: A replace workflow running ID.
|
257
|
-
:rtype: Self
|
258
|
-
"""
|
259
|
-
return self.model_copy(update={"run_id": run_id})
|
260
|
-
|
261
|
-
def job(self, name: str) -> Job:
|
262
|
-
"""Return this workflow's job that already created on this job field.
|
263
|
-
|
264
|
-
:param name: A job name that want to get from a mapping of job models.
|
265
|
-
:type name: str
|
266
|
-
|
267
|
-
:rtype: Job
|
268
|
-
:returns: A job model that exists on this workflow by input name.
|
269
|
-
"""
|
270
|
-
if name not in self.jobs:
|
271
|
-
raise ValueError(
|
272
|
-
f"A Job {name!r} does not exists in this workflow, "
|
273
|
-
f"{self.name!r}"
|
274
|
-
)
|
275
|
-
return self.jobs[name]
|
276
|
-
|
277
|
-
def parameterize(self, params: DictData) -> DictData:
|
278
|
-
"""Prepare a passing parameters before use it in execution process.
|
279
|
-
This method will validate keys of an incoming params with this object
|
280
|
-
necessary params field and then create a jobs key to result mapping
|
281
|
-
that will keep any execution result from its job.
|
282
|
-
|
283
|
-
... {
|
284
|
-
... "params": <an-incoming-params>,
|
285
|
-
... "jobs": {}
|
286
|
-
... }
|
287
|
-
|
288
|
-
:param params: A parameter mapping that receive from workflow execution.
|
289
|
-
:type params: DictData
|
290
|
-
:rtype: DictData
|
291
|
-
"""
|
292
|
-
# VALIDATE: Incoming params should have keys that set on this workflow.
|
293
|
-
if check_key := tuple(
|
294
|
-
f"{k!r}"
|
295
|
-
for k in self.params
|
296
|
-
if (k not in params and self.params[k].required)
|
297
|
-
):
|
298
|
-
raise WorkflowException(
|
299
|
-
f"Required Param on this workflow setting does not set: "
|
300
|
-
f"{', '.join(check_key)}."
|
301
|
-
)
|
302
|
-
|
303
|
-
# NOTE: Mapping type of param before adding it to the ``params`` key.
|
304
|
-
return {
|
305
|
-
"params": (
|
306
|
-
params
|
307
|
-
| {
|
308
|
-
k: self.params[k].receive(params[k])
|
309
|
-
for k in params
|
310
|
-
if k in self.params
|
311
|
-
}
|
312
|
-
),
|
313
|
-
"jobs": {},
|
314
|
-
}
|
315
|
-
|
316
|
-
def release(
|
317
|
-
self,
|
318
|
-
on: On,
|
319
|
-
params: DictData,
|
320
|
-
queue: list[datetime],
|
321
|
-
*,
|
322
|
-
waiting_sec: int = 60,
|
323
|
-
sleep_interval: int = 15,
|
324
|
-
log: Log = None,
|
325
|
-
) -> Result:
|
326
|
-
"""Start running workflow with the on schedule in period of 30 minutes.
|
327
|
-
That mean it will still running at background 30 minutes until the
|
328
|
-
schedule matching with its time.
|
329
|
-
|
330
|
-
This method allow workflow use log object to save the execution
|
331
|
-
result to log destination like file log to local `/logs` directory.
|
332
|
-
|
333
|
-
:param on: An on schedule value.
|
334
|
-
:param params: A workflow parameter that pass to execute method.
|
335
|
-
:param queue: A list of release time that already running.
|
336
|
-
:param waiting_sec: A second period value that allow workflow execute.
|
337
|
-
:param sleep_interval: A second value that want to waiting until time
|
338
|
-
to execute.
|
339
|
-
:param log: A log object that want to save execution result.
|
340
|
-
:rtype: Result
|
341
|
-
"""
|
342
|
-
logger.debug(
|
343
|
-
f"({self.run_id}) [CORE]: {self.name!r}: {on.cronjob} : run with "
|
344
|
-
f"queue id: {id(queue)}"
|
345
|
-
)
|
346
|
-
log: Log = log or FileLog
|
347
|
-
gen: CronRunner = on.generate(
|
348
|
-
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
349
|
-
+ timedelta(seconds=1)
|
350
|
-
)
|
351
|
-
cron_tz: ZoneInfo = gen.tz
|
352
|
-
|
353
|
-
# NOTE: get next schedule time that generate from now.
|
354
|
-
next_time: datetime = gen.next
|
355
|
-
|
356
|
-
# NOTE: While-loop to getting next until it does not logger.
|
357
|
-
while log.is_pointed(self.name, next_time, queue=queue):
|
358
|
-
next_time: datetime = gen.next
|
359
|
-
|
360
|
-
# NOTE: Heap-push this next running time to log queue list.
|
361
|
-
heappush(queue, next_time)
|
362
|
-
|
363
|
-
# VALIDATE: Check the different time between the next schedule time and
|
364
|
-
# now that less than waiting period (second unit).
|
365
|
-
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
366
|
-
logger.debug(
|
367
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
368
|
-
f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
369
|
-
)
|
370
|
-
|
371
|
-
# NOTE: Remove next datetime from queue.
|
372
|
-
queue.remove(next_time)
|
373
|
-
|
374
|
-
time.sleep(0.15)
|
375
|
-
return Result(
|
376
|
-
status=0,
|
377
|
-
context={
|
378
|
-
"params": params,
|
379
|
-
"release": {"status": "skipped", "cron": [str(on.cronjob)]},
|
380
|
-
},
|
381
|
-
)
|
382
|
-
|
383
|
-
logger.debug(
|
384
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
385
|
-
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
386
|
-
)
|
387
|
-
|
388
|
-
# NOTE: Release when the time is nearly to schedule time.
|
389
|
-
while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
|
390
|
-
sleep_interval + 5
|
391
|
-
): # pragma: no cov
|
392
|
-
logger.debug(
|
393
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
394
|
-
f"Sleep until: {duration}"
|
395
|
-
)
|
396
|
-
time.sleep(sleep_interval)
|
397
|
-
|
398
|
-
time.sleep(0.5)
|
399
|
-
|
400
|
-
# NOTE: Release parameter that use to change if params has
|
401
|
-
# templating.
|
402
|
-
release_params: DictData = {
|
403
|
-
"release": {
|
404
|
-
"logical_date": next_time,
|
405
|
-
},
|
406
|
-
}
|
407
|
-
|
408
|
-
# WARNING: Re-create workflow object that use new running workflow
|
409
|
-
# ID.
|
410
|
-
runner: Self = self.get_running_id(run_id=self.new_run_id)
|
411
|
-
rs: Result = runner.execute(
|
412
|
-
params=param2template(params, release_params),
|
413
|
-
)
|
414
|
-
logger.debug(
|
415
|
-
f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
416
|
-
f"End release {next_time:%Y-%m-%d %H:%M:%S}"
|
417
|
-
)
|
418
|
-
|
419
|
-
# NOTE: Delete a copied workflow instance for saving memory.
|
420
|
-
del runner
|
421
|
-
|
422
|
-
rs.set_parent_run_id(self.run_id)
|
423
|
-
rs_log: Log = log.model_validate(
|
424
|
-
{
|
425
|
-
"name": self.name,
|
426
|
-
"on": str(on.cronjob),
|
427
|
-
"release": next_time,
|
428
|
-
"context": rs.context,
|
429
|
-
"parent_run_id": rs.run_id,
|
430
|
-
"run_id": rs.run_id,
|
431
|
-
}
|
432
|
-
)
|
433
|
-
# NOTE: Saving execution result to destination of the input log object.
|
434
|
-
rs_log.save(excluded=None)
|
435
|
-
|
436
|
-
queue.remove(next_time)
|
437
|
-
time.sleep(0.05)
|
438
|
-
return Result(
|
439
|
-
status=0,
|
440
|
-
context={
|
441
|
-
"params": params,
|
442
|
-
"release": {"status": "run", "cron": [str(on.cronjob)]},
|
443
|
-
},
|
444
|
-
)
|
445
|
-
|
446
|
-
def poke(
|
447
|
-
self,
|
448
|
-
params: DictData | None = None,
|
449
|
-
*,
|
450
|
-
log: Log | None = None,
|
451
|
-
) -> list[Result]:
|
452
|
-
"""Poke workflow with threading executor pool for executing with all its
|
453
|
-
schedules that was set on the `on` value. This method will observe its
|
454
|
-
schedule that nearing to run with the ``self.release()`` method.
|
455
|
-
|
456
|
-
:param params: A parameters that want to pass to the release method.
|
457
|
-
:param log: A log object that want to use on this poking process.
|
458
|
-
:rtype: list[Result]
|
459
|
-
"""
|
460
|
-
logger.info(
|
461
|
-
f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
|
462
|
-
)
|
463
|
-
|
464
|
-
# NOTE: If this workflow does not set the on schedule, it will return
|
465
|
-
# empty result.
|
466
|
-
if len(self.on) == 0:
|
467
|
-
return []
|
468
|
-
|
469
|
-
params: DictData = params or {}
|
470
|
-
queue: list[datetime] = []
|
471
|
-
results: list[Result] = []
|
472
|
-
|
473
|
-
with ThreadPoolExecutor(
|
474
|
-
max_workers=config.max_poking_pool_worker,
|
475
|
-
thread_name_prefix="wf_poking_",
|
476
|
-
) as executor:
|
477
|
-
futures: list[Future] = []
|
478
|
-
for on in self.on:
|
479
|
-
futures.append(
|
480
|
-
executor.submit(
|
481
|
-
self.release,
|
482
|
-
on,
|
483
|
-
params=params,
|
484
|
-
log=log,
|
485
|
-
queue=queue,
|
486
|
-
)
|
487
|
-
)
|
488
|
-
delay(second=0.15)
|
489
|
-
|
490
|
-
# WARNING: This poking method does not allow to use fail-fast logic
|
491
|
-
# to catching parallel execution result.
|
492
|
-
for future in as_completed(futures):
|
493
|
-
results.append(future.result(timeout=60))
|
494
|
-
|
495
|
-
if len(queue) > 0: # pragma: no cov
|
496
|
-
logger.error(
|
497
|
-
f"({self.run_id}) [POKING]: Log Queue does empty when poking "
|
498
|
-
f"process was finishing."
|
499
|
-
)
|
500
|
-
|
501
|
-
return results
|
502
|
-
|
503
|
-
def execute_job(
|
504
|
-
self,
|
505
|
-
job_id: str,
|
506
|
-
params: DictData,
|
507
|
-
*,
|
508
|
-
raise_error: bool = True,
|
509
|
-
) -> Result:
|
510
|
-
"""Workflow Job execution with passing dynamic parameters from the
|
511
|
-
workflow execution to the target job.
|
512
|
-
|
513
|
-
This execution is the minimum level of execution of this workflow
|
514
|
-
model. It different with ``self.execute`` because this method run only
|
515
|
-
one job and return with context of this job data.
|
516
|
-
|
517
|
-
:param job_id: A job ID that want to execute.
|
518
|
-
:param params: A params that was parameterized from workflow execution.
|
519
|
-
:param raise_error: A flag that raise error instead catching to result
|
520
|
-
if it get exception from job execution.
|
521
|
-
:rtype: Result
|
522
|
-
"""
|
523
|
-
# VALIDATE: check a job ID that exists in this workflow or not.
|
524
|
-
if job_id not in self.jobs:
|
525
|
-
raise WorkflowException(
|
526
|
-
f"The job ID: {job_id} does not exists in {self.name!r} "
|
527
|
-
f"workflow."
|
528
|
-
)
|
529
|
-
|
530
|
-
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
|
531
|
-
|
532
|
-
# IMPORTANT:
|
533
|
-
# Change any job running IDs to this workflow running ID.
|
534
|
-
#
|
535
|
-
try:
|
536
|
-
job: Job = self.jobs[job_id].get_running_id(self.run_id)
|
537
|
-
job.set_outputs(
|
538
|
-
job.execute(params=params).context,
|
539
|
-
to=params,
|
540
|
-
)
|
541
|
-
except JobException as err:
|
542
|
-
logger.error(
|
543
|
-
f"({self.run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
|
544
|
-
)
|
545
|
-
if raise_error:
|
546
|
-
raise WorkflowException(
|
547
|
-
f"Get job execution error {job_id}: JobException: {err}"
|
548
|
-
) from None
|
549
|
-
else:
|
550
|
-
raise NotImplementedError() from None
|
551
|
-
|
552
|
-
return Result(status=0, context=params)
|
553
|
-
|
554
|
-
def execute(
|
555
|
-
self,
|
556
|
-
params: DictData | None = None,
|
557
|
-
*,
|
558
|
-
timeout: int = 60,
|
559
|
-
) -> Result:
|
560
|
-
"""Execute workflow with passing a dynamic parameters to all jobs that
|
561
|
-
included in this workflow model with ``jobs`` field.
|
562
|
-
|
563
|
-
The result of execution process for each jobs and stages on this
|
564
|
-
workflow will keeping in dict which able to catch out with all jobs and
|
565
|
-
stages by dot annotation.
|
566
|
-
|
567
|
-
For example, when I want to use the output from previous stage, I
|
568
|
-
can access it with syntax:
|
569
|
-
|
570
|
-
... ${job-name}.stages.${stage-id}.outputs.${key}
|
571
|
-
|
572
|
-
:param params: An input parameters that use on workflow execution that
|
573
|
-
will parameterize before using it. Default is None.
|
574
|
-
:type params: DictData | None
|
575
|
-
:param timeout: A workflow execution time out in second unit that use
|
576
|
-
for limit time of execution and waiting job dependency. Default is
|
577
|
-
60 seconds.
|
578
|
-
:type timeout: int
|
579
|
-
:rtype: Result
|
580
|
-
"""
|
581
|
-
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
582
|
-
|
583
|
-
# NOTE: I use this condition because this method allow passing empty
|
584
|
-
# params and I do not want to create new dict object.
|
585
|
-
params: DictData = {} if params is None else params
|
586
|
-
ts: float = time.monotonic()
|
587
|
-
rs: Result = Result()
|
588
|
-
|
589
|
-
# NOTE: It should not do anything if it does not have job.
|
590
|
-
if not self.jobs:
|
591
|
-
logger.warning(
|
592
|
-
f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
593
|
-
f"does not have any jobs"
|
594
|
-
)
|
595
|
-
return rs.catch(status=0, context=params)
|
596
|
-
|
597
|
-
# NOTE: Create a job queue that keep the job that want to running after
|
598
|
-
# it dependency condition.
|
599
|
-
jq: Queue = Queue()
|
600
|
-
for job_id in self.jobs:
|
601
|
-
jq.put(job_id)
|
602
|
-
|
603
|
-
# NOTE: Create data context that will pass to any job executions
|
604
|
-
# on this workflow.
|
605
|
-
#
|
606
|
-
# {
|
607
|
-
# 'params': <input-params>,
|
608
|
-
# 'jobs': {},
|
609
|
-
# }
|
610
|
-
#
|
611
|
-
context: DictData = self.parameterize(params)
|
612
|
-
status: int = 0
|
613
|
-
try:
|
614
|
-
if config.max_job_parallel == 1:
|
615
|
-
self.__exec_non_threading(
|
616
|
-
context=context,
|
617
|
-
ts=ts,
|
618
|
-
job_queue=jq,
|
619
|
-
timeout=timeout,
|
620
|
-
)
|
621
|
-
else:
|
622
|
-
self.__exec_threading(
|
623
|
-
context=context,
|
624
|
-
ts=ts,
|
625
|
-
job_queue=jq,
|
626
|
-
worker=config.max_job_parallel,
|
627
|
-
timeout=timeout,
|
628
|
-
)
|
629
|
-
except WorkflowException as err:
|
630
|
-
context.update(
|
631
|
-
{
|
632
|
-
"error": err,
|
633
|
-
"error_message": f"{err.__class__.__name__}: {err}",
|
634
|
-
},
|
635
|
-
)
|
636
|
-
status = 1
|
637
|
-
return rs.catch(status=status, context=context)
|
638
|
-
|
639
|
-
def __exec_threading(
|
640
|
-
self,
|
641
|
-
context: DictData,
|
642
|
-
ts: float,
|
643
|
-
job_queue: Queue,
|
644
|
-
*,
|
645
|
-
worker: int = 2,
|
646
|
-
timeout: int = 600,
|
647
|
-
) -> DictData:
|
648
|
-
"""Workflow execution by threading strategy.
|
649
|
-
|
650
|
-
If a job need dependency, it will check dependency job ID from
|
651
|
-
context data before allow it run.
|
652
|
-
|
653
|
-
:param context: A context workflow data that want to downstream passing.
|
654
|
-
:param ts: A start timestamp that use for checking execute time should
|
655
|
-
timeout.
|
656
|
-
:param job_queue: A job queue object.
|
657
|
-
:param timeout: A second value unit that bounding running time.
|
658
|
-
:param worker: A number of threading executor pool size.
|
659
|
-
:rtype: DictData
|
660
|
-
"""
|
661
|
-
not_time_out_flag: bool = True
|
662
|
-
logger.debug(
|
663
|
-
f"({self.run_id}): [CORE]: Run {self.name} with threading job "
|
664
|
-
f"executor"
|
665
|
-
)
|
666
|
-
|
667
|
-
# IMPORTANT: The job execution can run parallel and waiting by
|
668
|
-
# needed.
|
669
|
-
with ThreadPoolExecutor(max_workers=worker) as executor:
|
670
|
-
futures: list[Future] = []
|
671
|
-
|
672
|
-
while not job_queue.empty() and (
|
673
|
-
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
674
|
-
):
|
675
|
-
job_id: str = job_queue.get()
|
676
|
-
job: Job = self.jobs[job_id]
|
677
|
-
|
678
|
-
if any(need not in context["jobs"] for need in job.needs):
|
679
|
-
job_queue.task_done()
|
680
|
-
job_queue.put(job_id)
|
681
|
-
time.sleep(0.25)
|
682
|
-
continue
|
683
|
-
|
684
|
-
# NOTE: Start workflow job execution with deep copy context data
|
685
|
-
# before release.
|
686
|
-
#
|
687
|
-
# {
|
688
|
-
# 'params': <input-params>,
|
689
|
-
# 'jobs': {},
|
690
|
-
# }
|
691
|
-
futures.append(
|
692
|
-
executor.submit(
|
693
|
-
self.execute_job,
|
694
|
-
job_id,
|
695
|
-
params=context,
|
696
|
-
),
|
697
|
-
)
|
698
|
-
|
699
|
-
# NOTE: Mark this job queue done.
|
700
|
-
job_queue.task_done()
|
701
|
-
|
702
|
-
# NOTE: Wait for all items to finish processing
|
703
|
-
job_queue.join()
|
704
|
-
|
705
|
-
for future in as_completed(futures, timeout=1800):
|
706
|
-
if err := future.exception():
|
707
|
-
logger.error(f"({self.run_id}) [CORE]: {err}")
|
708
|
-
raise WorkflowException(f"{err}")
|
709
|
-
try:
|
710
|
-
future.result(timeout=60)
|
711
|
-
except TimeoutError as err: # pragma: no cove
|
712
|
-
raise WorkflowException(
|
713
|
-
"Timeout when getting result from future"
|
714
|
-
) from err
|
715
|
-
|
716
|
-
if not_time_out_flag:
|
717
|
-
return context
|
718
|
-
|
719
|
-
# NOTE: Raise timeout error.
|
720
|
-
logger.warning( # pragma: no cov
|
721
|
-
f"({self.run_id}) [WORKFLOW]: Execution of workflow, {self.name!r} "
|
722
|
-
f", was timeout"
|
723
|
-
)
|
724
|
-
raise WorkflowException( # pragma: no cov
|
725
|
-
f"Execution of workflow: {self.name} was timeout"
|
726
|
-
)
|
727
|
-
|
728
|
-
def __exec_non_threading(
|
729
|
-
self,
|
730
|
-
context: DictData,
|
731
|
-
ts: float,
|
732
|
-
job_queue: Queue,
|
733
|
-
*,
|
734
|
-
timeout: int = 600,
|
735
|
-
) -> DictData:
|
736
|
-
"""Workflow execution with non-threading strategy that use sequential
|
737
|
-
job running and waiting previous job was run successful.
|
738
|
-
|
739
|
-
If a job need dependency, it will check dependency job ID from
|
740
|
-
context data before allow it run.
|
741
|
-
|
742
|
-
:param context: A context workflow data that want to downstream passing.
|
743
|
-
:param ts: A start timestamp that use for checking execute time should
|
744
|
-
timeout.
|
745
|
-
:param timeout: A second value unit that bounding running time.
|
746
|
-
:rtype: DictData
|
747
|
-
"""
|
748
|
-
not_time_out_flag: bool = True
|
749
|
-
logger.debug(
|
750
|
-
f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
|
751
|
-
f"executor"
|
752
|
-
)
|
753
|
-
|
754
|
-
while not job_queue.empty() and (
|
755
|
-
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
756
|
-
):
|
757
|
-
job_id: str = job_queue.get()
|
758
|
-
job: Job = self.jobs[job_id]
|
759
|
-
|
760
|
-
# NOTE: Waiting dependency job run successful before release.
|
761
|
-
if any(need not in context["jobs"] for need in job.needs):
|
762
|
-
job_queue.task_done()
|
763
|
-
job_queue.put(job_id)
|
764
|
-
time.sleep(0.05)
|
765
|
-
continue
|
766
|
-
|
767
|
-
# NOTE: Start workflow job execution with deep copy context data
|
768
|
-
# before release. This job execution process will running until
|
769
|
-
# done before checking all execution timeout or not.
|
770
|
-
#
|
771
|
-
# {
|
772
|
-
# 'params': <input-params>,
|
773
|
-
# 'jobs': {},
|
774
|
-
# }
|
775
|
-
self.execute_job(job_id=job_id, params=context)
|
776
|
-
|
777
|
-
# NOTE: Mark this job queue done.
|
778
|
-
job_queue.task_done()
|
779
|
-
|
780
|
-
# NOTE: Wait for all items to finish processing
|
781
|
-
job_queue.join()
|
782
|
-
|
783
|
-
if not_time_out_flag:
|
784
|
-
return context
|
785
|
-
|
786
|
-
# NOTE: Raise timeout error.
|
787
|
-
logger.warning( # pragma: no cov
|
788
|
-
f"({self.run_id}) [WORKFLOW]: Execution of workflow was timeout"
|
789
|
-
)
|
790
|
-
raise WorkflowException( # pragma: no cov
|
791
|
-
f"Execution of workflow: {self.name} was timeout"
|
792
|
-
)
|
793
|
-
|
794
|
-
|
795
81
|
class ScheduleWorkflow(BaseModel):
|
796
82
|
"""Schedule Workflow Pydantic model that use to keep workflow model for the
|
797
83
|
Schedule model. it should not use Workflow model directly because on the
|
@@ -799,6 +85,10 @@ class ScheduleWorkflow(BaseModel):
|
|
799
85
|
model.
|
800
86
|
"""
|
801
87
|
|
88
|
+
alias: Optional[str] = Field(
|
89
|
+
default=None,
|
90
|
+
description="An alias name of workflow.",
|
91
|
+
)
|
802
92
|
name: str = Field(description="A workflow name.")
|
803
93
|
on: list[On] = Field(
|
804
94
|
default_factory=list,
|
@@ -806,7 +96,7 @@ class ScheduleWorkflow(BaseModel):
|
|
806
96
|
)
|
807
97
|
params: DictData = Field(
|
808
98
|
default_factory=dict,
|
809
|
-
description="A parameters that want to use
|
99
|
+
description="A parameters that want to use in workflow execution.",
|
810
100
|
)
|
811
101
|
|
812
102
|
@model_validator(mode="before")
|
@@ -817,16 +107,17 @@ class ScheduleWorkflow(BaseModel):
|
|
817
107
|
"""
|
818
108
|
values["name"] = values["name"].replace(" ", "_")
|
819
109
|
|
110
|
+
if not values.get("alias"):
|
111
|
+
values["alias"] = values["name"]
|
112
|
+
|
820
113
|
cls.__bypass_on(values)
|
821
114
|
return values
|
822
115
|
|
823
116
|
@classmethod
|
824
|
-
def __bypass_on(
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
) -> DictData:
|
829
|
-
"""Bypass the on data to loaded config data.
|
117
|
+
def __bypass_on(cls, data: DictData) -> DictData:
|
118
|
+
"""Bypass and prepare the on data to loaded config data.
|
119
|
+
|
120
|
+
:param data: A data that want to validate for model initialization.
|
830
121
|
|
831
122
|
:rtype: DictData
|
832
123
|
"""
|
@@ -841,15 +132,32 @@ class ScheduleWorkflow(BaseModel):
|
|
841
132
|
# NOTE: Pass on value to Loader and keep on model object to on
|
842
133
|
# field.
|
843
134
|
data["on"] = [
|
844
|
-
(
|
845
|
-
Loader(n, externals=(externals or {})).data
|
846
|
-
if isinstance(n, str)
|
847
|
-
else n
|
848
|
-
)
|
135
|
+
Loader(n, externals={}).data if isinstance(n, str) else n
|
849
136
|
for n in on
|
850
137
|
]
|
851
138
|
return data
|
852
139
|
|
140
|
+
@field_validator("on", mode="after")
|
141
|
+
def __on_no_dup__(cls, value: list[On]) -> list[On]:
|
142
|
+
"""Validate the on fields should not contain duplicate values and if it
|
143
|
+
contain every minute value, it should has only one on value.
|
144
|
+
|
145
|
+
:rtype: list[On]
|
146
|
+
"""
|
147
|
+
set_ons: set[str] = {str(on.cronjob) for on in value}
|
148
|
+
if len(set_ons) != len(value):
|
149
|
+
raise ValueError(
|
150
|
+
"The on fields should not contain duplicate on value."
|
151
|
+
)
|
152
|
+
|
153
|
+
# WARNING:
|
154
|
+
# if '* * * * *' in set_ons and len(set_ons) > 1:
|
155
|
+
# raise ValueError(
|
156
|
+
# "If it has every minute cronjob on value, it should has only "
|
157
|
+
# "one value in the on field."
|
158
|
+
# )
|
159
|
+
return value
|
160
|
+
|
853
161
|
|
854
162
|
class Schedule(BaseModel):
|
855
163
|
"""Schedule Pydantic Model that use to run with scheduler package. It does
|
@@ -868,6 +176,15 @@ class Schedule(BaseModel):
|
|
868
176
|
description="A list of ScheduleWorkflow models.",
|
869
177
|
)
|
870
178
|
|
179
|
+
@field_validator("desc", mode="after")
|
180
|
+
def __dedent_desc__(cls, value: str) -> str:
|
181
|
+
"""Prepare description string that was created on a template.
|
182
|
+
|
183
|
+
:param value: A description string value that want to dedent.
|
184
|
+
:rtype: str
|
185
|
+
"""
|
186
|
+
return dedent(value)
|
187
|
+
|
871
188
|
@classmethod
|
872
189
|
def from_loader(
|
873
190
|
cls,
|
@@ -881,6 +198,7 @@ class Schedule(BaseModel):
|
|
881
198
|
:param name: A schedule name that want to pass to Loader object.
|
882
199
|
:param externals: An external parameters that want to pass to Loader
|
883
200
|
object.
|
201
|
+
|
884
202
|
:rtype: Self
|
885
203
|
"""
|
886
204
|
loader: Loader = Loader(name, externals=(externals or {}))
|
@@ -900,7 +218,6 @@ class Schedule(BaseModel):
|
|
900
218
|
self,
|
901
219
|
start_date: datetime,
|
902
220
|
queue: dict[str, list[datetime]],
|
903
|
-
running: dict[str, list[datetime]],
|
904
221
|
*,
|
905
222
|
externals: DictData | None = None,
|
906
223
|
) -> list[WorkflowTaskData]:
|
@@ -909,44 +226,44 @@ class Schedule(BaseModel):
|
|
909
226
|
|
910
227
|
:param start_date: A start date that get from the workflow schedule.
|
911
228
|
:param queue: A mapping of name and list of datetime for queue.
|
912
|
-
:param running: A mapping of name and list of datetime for running.
|
913
229
|
:param externals: An external parameters that pass to the Loader object.
|
914
230
|
|
915
231
|
:rtype: list[WorkflowTaskData]
|
232
|
+
:return: Return the list of WorkflowTaskData object from the specific
|
233
|
+
input datetime that mapping with the on field.
|
916
234
|
"""
|
917
235
|
|
918
236
|
# NOTE: Create pair of workflow and on.
|
919
237
|
workflow_tasks: list[WorkflowTaskData] = []
|
920
|
-
|
238
|
+
extras: DictData = externals or {}
|
239
|
+
|
240
|
+
for sch_wf in self.workflows:
|
921
241
|
|
922
|
-
|
923
|
-
wf: Workflow = Workflow.from_loader(wfs.name, externals=externals)
|
242
|
+
wf: Workflow = Workflow.from_loader(sch_wf.name, externals=extras)
|
924
243
|
|
925
244
|
# NOTE: Create default list of release datetime.
|
926
|
-
|
927
|
-
|
245
|
+
if sch_wf.alias not in queue:
|
246
|
+
queue[sch_wf.alias]: list[datetime] = []
|
928
247
|
|
929
|
-
#
|
930
|
-
# Schedule object.
|
931
|
-
|
248
|
+
# IMPORTANT: Create the default 'on' value if it does not passing
|
249
|
+
# the on field to the Schedule object.
|
250
|
+
ons: list[On] = wf.on.copy() if len(sch_wf.on) == 0 else sch_wf.on
|
932
251
|
|
933
|
-
for on in
|
934
|
-
on_gen: CronRunner = on.generate(start_date)
|
935
|
-
next_running_date = on_gen.next
|
252
|
+
for on in ons:
|
936
253
|
|
937
|
-
|
938
|
-
|
254
|
+
# NOTE: Create CronRunner instance from the start_date param.
|
255
|
+
runner: CronRunner = on.generate(start_date)
|
256
|
+
next_running_date = runner.next
|
939
257
|
|
940
|
-
|
941
|
-
|
258
|
+
while next_running_date in queue[sch_wf.alias]:
|
259
|
+
next_running_date = runner.next
|
942
260
|
|
943
261
|
workflow_tasks.append(
|
944
262
|
WorkflowTaskData(
|
263
|
+
alias=sch_wf.alias,
|
945
264
|
workflow=wf,
|
946
|
-
|
947
|
-
params=
|
948
|
-
queue=queue,
|
949
|
-
running=running,
|
265
|
+
runner=runner,
|
266
|
+
params=sch_wf.params,
|
950
267
|
),
|
951
268
|
)
|
952
269
|
|
@@ -967,10 +284,10 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
|
|
967
284
|
:rtype: DecoratorCancelJob
|
968
285
|
"""
|
969
286
|
|
970
|
-
def decorator(func: ReturnCancelJob) -> ReturnCancelJob:
|
287
|
+
def decorator(func: ReturnCancelJob) -> ReturnCancelJob: # pragma: no cov
|
971
288
|
try:
|
972
289
|
# NOTE: Check the function that want to handle is method or not.
|
973
|
-
if inspect.ismethod(func):
|
290
|
+
if inspect.ismethod(func):
|
974
291
|
|
975
292
|
@wraps(func)
|
976
293
|
def wrapper(self, *args, **kwargs):
|
@@ -984,7 +301,7 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
|
|
984
301
|
|
985
302
|
return wrapper
|
986
303
|
|
987
|
-
except Exception as err:
|
304
|
+
except Exception as err:
|
988
305
|
logger.exception(err)
|
989
306
|
if cancel_on_failure:
|
990
307
|
return CancelJob
|
@@ -993,158 +310,12 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
|
|
993
310
|
return decorator
|
994
311
|
|
995
312
|
|
996
|
-
@dataclass(frozen=True)
|
997
|
-
class WorkflowTaskData:
|
998
|
-
"""Workflow task dataclass that use to keep mapping data and objects for
|
999
|
-
passing in multithreading task.
|
1000
|
-
"""
|
1001
|
-
|
1002
|
-
workflow: Workflow
|
1003
|
-
on: On
|
1004
|
-
params: DictData = field(compare=False, hash=False)
|
1005
|
-
queue: dict[str, list[datetime]] = field(compare=False, hash=False)
|
1006
|
-
running: dict[str, list[datetime]] = field(compare=False, hash=False)
|
1007
|
-
|
1008
|
-
@catch_exceptions(cancel_on_failure=True)
|
1009
|
-
def release(
|
1010
|
-
self,
|
1011
|
-
log: Log | None = None,
|
1012
|
-
*,
|
1013
|
-
waiting_sec: int = 60,
|
1014
|
-
sleep_interval: int = 15,
|
1015
|
-
) -> None: # pragma: no cov
|
1016
|
-
"""Workflow release, it will use with the same logic of
|
1017
|
-
`workflow.release` method.
|
1018
|
-
|
1019
|
-
:param log: A log object for saving result logging from workflow
|
1020
|
-
execution process.
|
1021
|
-
:param waiting_sec: A second period value that allow workflow execute.
|
1022
|
-
:param sleep_interval: A second value that want to waiting until time
|
1023
|
-
to execute.
|
1024
|
-
"""
|
1025
|
-
log: Log = log or FileLog
|
1026
|
-
wf: Workflow = self.workflow
|
1027
|
-
on: On = self.on
|
1028
|
-
|
1029
|
-
gen: CronRunner = on.generate(
|
1030
|
-
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
1031
|
-
)
|
1032
|
-
cron_tz: ZoneInfo = gen.tz
|
1033
|
-
|
1034
|
-
# NOTE: get next schedule time that generate from now.
|
1035
|
-
next_time: datetime = gen.next
|
1036
|
-
|
1037
|
-
# NOTE: get next utils it does not running.
|
1038
|
-
while log.is_pointed(wf.name, next_time, queue=self.running[wf.name]):
|
1039
|
-
next_time: datetime = gen.next
|
1040
|
-
|
1041
|
-
logger.debug(
|
1042
|
-
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
1043
|
-
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
1044
|
-
)
|
1045
|
-
heappush(self.running[wf.name], next_time)
|
1046
|
-
|
1047
|
-
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
1048
|
-
logger.debug(
|
1049
|
-
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
1050
|
-
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
1051
|
-
)
|
1052
|
-
|
1053
|
-
# NOTE: Add this next running datetime that not in period to queue
|
1054
|
-
# and remove it to running.
|
1055
|
-
self.running[wf.name].remove(next_time)
|
1056
|
-
heappush(self.queue[wf.name], next_time)
|
1057
|
-
|
1058
|
-
time.sleep(0.2)
|
1059
|
-
return
|
1060
|
-
|
1061
|
-
logger.debug(
|
1062
|
-
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
1063
|
-
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
1064
|
-
)
|
1065
|
-
|
1066
|
-
# NOTE: Release when the time is nearly to schedule time.
|
1067
|
-
while (duration := get_diff_sec(next_time, tz=config.tz)) > (
|
1068
|
-
sleep_interval + 5
|
1069
|
-
):
|
1070
|
-
logger.debug(
|
1071
|
-
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
1072
|
-
f": Sleep until: {duration}"
|
1073
|
-
)
|
1074
|
-
time.sleep(15)
|
1075
|
-
|
1076
|
-
time.sleep(0.5)
|
1077
|
-
|
1078
|
-
# NOTE: Release parameter that use to change if params has
|
1079
|
-
# templating.
|
1080
|
-
release_params: DictData = {
|
1081
|
-
"release": {
|
1082
|
-
"logical_date": next_time,
|
1083
|
-
},
|
1084
|
-
}
|
1085
|
-
|
1086
|
-
# WARNING:
|
1087
|
-
# Re-create workflow object that use new running workflow ID.
|
1088
|
-
#
|
1089
|
-
runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
|
1090
|
-
rs: Result = runner.execute(
|
1091
|
-
params=param2template(self.params, release_params),
|
1092
|
-
)
|
1093
|
-
logger.debug(
|
1094
|
-
f"({runner.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
1095
|
-
f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
1096
|
-
)
|
1097
|
-
|
1098
|
-
del runner
|
1099
|
-
|
1100
|
-
# NOTE: Set parent ID on this result.
|
1101
|
-
rs.set_parent_run_id(wf.run_id)
|
1102
|
-
|
1103
|
-
# NOTE: Save result to log object saving.
|
1104
|
-
rs_log: Log = log.model_validate(
|
1105
|
-
{
|
1106
|
-
"name": wf.name,
|
1107
|
-
"on": str(on.cronjob),
|
1108
|
-
"release": next_time,
|
1109
|
-
"context": rs.context,
|
1110
|
-
"parent_run_id": rs.run_id,
|
1111
|
-
"run_id": rs.run_id,
|
1112
|
-
}
|
1113
|
-
)
|
1114
|
-
rs_log.save(excluded=None)
|
1115
|
-
|
1116
|
-
# NOTE: remove this release date from running
|
1117
|
-
self.running[wf.name].remove(next_time)
|
1118
|
-
|
1119
|
-
# IMPORTANT:
|
1120
|
-
# Add the next running datetime to workflow queue
|
1121
|
-
finish_time: datetime = datetime.now(tz=cron_tz).replace(
|
1122
|
-
second=0, microsecond=0
|
1123
|
-
)
|
1124
|
-
future_running_time: datetime = gen.next
|
1125
|
-
while (
|
1126
|
-
future_running_time in self.running[wf.name]
|
1127
|
-
or future_running_time in self.queue[wf.name]
|
1128
|
-
or future_running_time < finish_time
|
1129
|
-
): # pragma: no cov
|
1130
|
-
future_running_time: datetime = gen.next
|
1131
|
-
|
1132
|
-
heappush(self.queue[wf.name], future_running_time)
|
1133
|
-
logger.debug(f"[CORE]: {'-' * 100}")
|
1134
|
-
|
1135
|
-
def __eq__(self, other) -> bool:
|
1136
|
-
if isinstance(other, WorkflowTaskData):
|
1137
|
-
return (
|
1138
|
-
self.workflow.name == other.workflow.name
|
1139
|
-
and self.on.cronjob == other.on.cronjob
|
1140
|
-
)
|
1141
|
-
return NotImplemented
|
1142
|
-
|
1143
|
-
|
1144
313
|
@catch_exceptions(cancel_on_failure=True) # pragma: no cov
|
1145
|
-
def
|
314
|
+
def workflow_task_release(
|
1146
315
|
workflow_tasks: list[WorkflowTaskData],
|
1147
316
|
stop: datetime,
|
317
|
+
queue,
|
318
|
+
running,
|
1148
319
|
threads: dict[str, Thread],
|
1149
320
|
) -> CancelJob | None:
|
1150
321
|
"""Workflow task generator that create release pair of workflow and on to
|
@@ -1154,13 +325,14 @@ def workflow_task(
|
|
1154
325
|
|
1155
326
|
:param workflow_tasks:
|
1156
327
|
:param stop: A stop datetime object that force stop running scheduler.
|
328
|
+
:param queue:
|
329
|
+
:param running:
|
1157
330
|
:param threads:
|
1158
331
|
:rtype: CancelJob | None
|
1159
332
|
"""
|
1160
|
-
|
1161
|
-
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
333
|
+
current_date: datetime = datetime.now(tz=config.tz)
|
1162
334
|
|
1163
|
-
if
|
335
|
+
if current_date > stop.replace(tzinfo=config.tz):
|
1164
336
|
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
1165
337
|
while len(threads) > 0:
|
1166
338
|
logger.warning(
|
@@ -1188,45 +360,43 @@ def workflow_task(
|
|
1188
360
|
# NOTE: Get incoming datetime queue.
|
1189
361
|
logger.debug(
|
1190
362
|
f"[WORKFLOW]: Current queue: {task.workflow.name!r} : "
|
1191
|
-
f"{list(queue2str(
|
363
|
+
f"{list(queue2str(queue[task.alias]))}"
|
1192
364
|
)
|
1193
365
|
|
1194
|
-
# NOTE: Create minute unit value for any scheduler datetime that
|
1195
|
-
# checking a workflow task should run in this datetime.
|
1196
|
-
current_running_time: datetime = start_date_minute.astimezone(
|
1197
|
-
tz=ZoneInfo(task.on.tz)
|
1198
|
-
)
|
1199
366
|
if (
|
1200
|
-
len(
|
1201
|
-
and
|
1202
|
-
) or (
|
1203
|
-
task.on.next(current_running_time)
|
1204
|
-
!= task.queue[task.workflow.name][0]
|
367
|
+
len(queue[task.alias]) > 0
|
368
|
+
and task.runner.date != queue[task.alias][0]
|
1205
369
|
):
|
1206
370
|
logger.debug(
|
1207
371
|
f"[WORKFLOW]: Skip schedule "
|
1208
|
-
f"{
|
1209
|
-
f"for : {task.workflow.name!r} : {task.
|
372
|
+
f"{task.runner.date:%Y-%m-%d %H:%M:%S} "
|
373
|
+
f"for : {task.workflow.name!r} : {task.runner.cron}"
|
1210
374
|
)
|
1211
375
|
continue
|
1212
|
-
|
376
|
+
|
377
|
+
elif len(queue[task.alias]) == 0:
|
1213
378
|
logger.warning(
|
1214
379
|
f"[WORKFLOW]: Queue is empty for : {task.workflow.name!r} : "
|
1215
|
-
f"{task.
|
380
|
+
f"{task.runner.cron}"
|
1216
381
|
)
|
1217
382
|
continue
|
1218
383
|
|
1219
384
|
# NOTE: Remove this datetime from queue.
|
1220
|
-
|
385
|
+
queue[task.alias].pop(0)
|
1221
386
|
|
1222
387
|
# NOTE: Create thread name that able to tracking with observe schedule
|
1223
388
|
# job.
|
1224
389
|
thread_name: str = (
|
1225
|
-
f"{task.workflow.name}|{str(task.
|
1226
|
-
f"{
|
390
|
+
f"{task.workflow.name}|{str(task.runner.cron)}|"
|
391
|
+
f"{task.runner.date:%Y%m%d%H%M}"
|
1227
392
|
)
|
393
|
+
|
1228
394
|
wf_thread: Thread = Thread(
|
1229
|
-
target=task.release,
|
395
|
+
target=catch_exceptions(cancel_on_failure=True)(task.release),
|
396
|
+
kwargs={
|
397
|
+
"queue": queue,
|
398
|
+
"running": running,
|
399
|
+
},
|
1230
400
|
name=thread_name,
|
1231
401
|
daemon=True,
|
1232
402
|
)
|
@@ -1277,7 +447,7 @@ def workflow_control(
|
|
1277
447
|
"Should install schedule package before use this module."
|
1278
448
|
) from None
|
1279
449
|
|
1280
|
-
|
450
|
+
scheduler: Scheduler = Scheduler()
|
1281
451
|
start_date: datetime = datetime.now(tz=config.tz)
|
1282
452
|
|
1283
453
|
# NOTE: Design workflow queue caching.
|
@@ -1285,7 +455,6 @@ def workflow_control(
|
|
1285
455
|
# {"workflow-name": [<release-datetime>, <release-datetime>, ...]}
|
1286
456
|
#
|
1287
457
|
wf_queue: dict[str, list[datetime]] = {}
|
1288
|
-
wf_running: dict[str, list[datetime]] = {}
|
1289
458
|
thread_releases: dict[str, Thread] = {}
|
1290
459
|
|
1291
460
|
start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
|
@@ -1295,31 +464,33 @@ def workflow_control(
|
|
1295
464
|
# NOTE: Create pair of workflow and on from schedule model.
|
1296
465
|
workflow_tasks: list[WorkflowTaskData] = []
|
1297
466
|
for name in schedules:
|
1298
|
-
|
467
|
+
schedule: Schedule = Schedule.from_loader(name, externals=externals)
|
468
|
+
|
469
|
+
# NOTE: Create a workflow task data instance from schedule object.
|
1299
470
|
workflow_tasks.extend(
|
1300
|
-
|
471
|
+
schedule.tasks(
|
1301
472
|
start_date_waiting,
|
1302
473
|
queue=wf_queue,
|
1303
|
-
running=wf_running,
|
1304
474
|
externals=externals,
|
1305
475
|
),
|
1306
476
|
)
|
1307
477
|
|
1308
478
|
# NOTE: This schedule job will start every minute at :02 seconds.
|
1309
479
|
(
|
1310
|
-
|
480
|
+
scheduler.every(1)
|
1311
481
|
.minutes.at(":02")
|
1312
482
|
.do(
|
1313
|
-
|
483
|
+
workflow_task_release,
|
1314
484
|
workflow_tasks=workflow_tasks,
|
1315
485
|
stop=(stop or (start_date + config.stop_boundary_delta)),
|
486
|
+
queue=wf_queue,
|
1316
487
|
threads=thread_releases,
|
1317
488
|
)
|
1318
489
|
.tag("control")
|
1319
490
|
)
|
1320
491
|
|
1321
492
|
# NOTE: Checking zombie task with schedule job will start every 5 minute.
|
1322
|
-
|
493
|
+
scheduler.every(5).minutes.at(":10").do(
|
1323
494
|
workflow_monitor,
|
1324
495
|
threads=thread_releases,
|
1325
496
|
).tag("monitor")
|
@@ -1327,10 +498,12 @@ def workflow_control(
|
|
1327
498
|
# NOTE: Start running schedule
|
1328
499
|
logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
|
1329
500
|
while True:
|
1330
|
-
|
501
|
+
scheduler.run_pending()
|
1331
502
|
time.sleep(1)
|
1332
|
-
|
1333
|
-
|
503
|
+
|
504
|
+
# NOTE: Break the scheduler when the control job does not exists.
|
505
|
+
if not scheduler.get_jobs("control"):
|
506
|
+
scheduler.clear("monitor")
|
1334
507
|
logger.warning(
|
1335
508
|
f"[WORKFLOW]: Workflow release thread: {thread_releases}"
|
1336
509
|
)
|
@@ -1340,9 +513,6 @@ def workflow_control(
|
|
1340
513
|
logger.warning(
|
1341
514
|
f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
|
1342
515
|
)
|
1343
|
-
logger.warning(
|
1344
|
-
f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
|
1345
|
-
)
|
1346
516
|
return schedules
|
1347
517
|
|
1348
518
|
|
@@ -1367,14 +537,14 @@ def workflow_runner(
|
|
1367
537
|
|
1368
538
|
The current workflow logic that split to process will be below diagram:
|
1369
539
|
|
1370
|
-
PIPELINES ==> process 01 ==> schedule
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
==> process 02 ==> schedule
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
540
|
+
PIPELINES ==> process 01 ==> schedule --> thread of release
|
541
|
+
workflow task 01 01
|
542
|
+
--> thread of release
|
543
|
+
workflow task 01 02
|
544
|
+
==> process 02 ==> schedule --> thread of release
|
545
|
+
workflow task 02 01
|
546
|
+
--> thread of release
|
547
|
+
workflow task 02 02
|
1378
548
|
==> ...
|
1379
549
|
"""
|
1380
550
|
excluded: list[str] = excluded or []
|