ddeutil-workflow 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -2
- ddeutil/workflow/api.py +84 -16
- ddeutil/workflow/cli.py +14 -14
- ddeutil/workflow/exceptions.py +6 -6
- ddeutil/workflow/job.py +572 -0
- ddeutil/workflow/log.py +10 -10
- ddeutil/workflow/repeat.py +4 -2
- ddeutil/workflow/route.py +165 -36
- ddeutil/workflow/scheduler.py +733 -110
- ddeutil/workflow/stage.py +12 -12
- ddeutil/workflow/utils.py +4 -4
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/METADATA +66 -70
- ddeutil_workflow-0.0.11.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/WHEEL +1 -1
- ddeutil/workflow/pipeline.py +0 -1186
- ddeutil_workflow-0.0.10.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -11,39 +11,47 @@ import logging
|
|
11
11
|
import os
|
12
12
|
import time
|
13
13
|
from collections.abc import Iterator
|
14
|
-
from concurrent.futures import
|
15
|
-
|
14
|
+
from concurrent.futures import (
|
15
|
+
Future,
|
16
|
+
ProcessPoolExecutor,
|
17
|
+
ThreadPoolExecutor,
|
18
|
+
as_completed,
|
19
|
+
)
|
20
|
+
from dataclasses import dataclass, field
|
16
21
|
from datetime import datetime, timedelta
|
17
22
|
from functools import wraps
|
18
23
|
from heapq import heappush
|
24
|
+
from queue import Queue
|
25
|
+
from textwrap import dedent
|
19
26
|
from threading import Thread
|
20
27
|
from typing import Optional
|
21
28
|
from zoneinfo import ZoneInfo
|
22
29
|
|
23
30
|
from dotenv import load_dotenv
|
24
31
|
from pydantic import BaseModel, Field
|
25
|
-
from pydantic.functional_validators import model_validator
|
32
|
+
from pydantic.functional_validators import field_validator, model_validator
|
26
33
|
from typing_extensions import Self
|
27
34
|
|
28
35
|
try:
|
29
|
-
from schedule import CancelJob
|
36
|
+
from schedule import CancelJob
|
30
37
|
except ImportError:
|
31
|
-
|
32
|
-
"Should install schedule package before use this module."
|
33
|
-
) from None
|
38
|
+
CancelJob = None
|
34
39
|
|
35
40
|
from .__types import DictData
|
36
41
|
from .cron import CronRunner
|
37
|
-
from .exceptions import WorkflowException
|
42
|
+
from .exceptions import JobException, WorkflowException
|
43
|
+
from .job import Job
|
38
44
|
from .log import FileLog, Log, get_logger
|
39
45
|
from .on import On
|
40
|
-
from .pipeline import Pipeline
|
41
46
|
from .utils import (
|
42
47
|
Loader,
|
48
|
+
Param,
|
43
49
|
Result,
|
44
50
|
batch,
|
45
51
|
delay,
|
52
|
+
gen_id,
|
46
53
|
get_diff_sec,
|
54
|
+
has_template,
|
47
55
|
param2template,
|
48
56
|
)
|
49
57
|
|
@@ -53,23 +61,630 @@ logging.getLogger("schedule").setLevel(logging.INFO)
|
|
53
61
|
|
54
62
|
|
55
63
|
__all__ = (
|
56
|
-
"
|
64
|
+
"Workflow",
|
65
|
+
"WorkflowSchedule",
|
66
|
+
"WorkflowTask",
|
57
67
|
"Schedule",
|
58
|
-
"
|
68
|
+
"workflow_runner",
|
69
|
+
"workflow_task",
|
59
70
|
)
|
60
71
|
|
61
72
|
|
62
|
-
class
|
63
|
-
"""
|
73
|
+
class Workflow(BaseModel):
|
74
|
+
"""Workflow Model this is the main future of this project because it use to
|
75
|
+
be workflow data for running everywhere that you want or using it to
|
76
|
+
scheduler task in background. It use lightweight coding line from Pydantic
|
77
|
+
Model and enhance execute method on it.
|
78
|
+
"""
|
64
79
|
|
65
|
-
name: str = Field(description="A
|
80
|
+
name: str = Field(description="A workflow name.")
|
81
|
+
desc: Optional[str] = Field(
|
82
|
+
default=None,
|
83
|
+
description=(
|
84
|
+
"A workflow description that can be string of markdown content."
|
85
|
+
),
|
86
|
+
)
|
87
|
+
params: dict[str, Param] = Field(
|
88
|
+
default_factory=dict,
|
89
|
+
description="A parameters that want to use on this workflow.",
|
90
|
+
)
|
91
|
+
on: list[On] = Field(
|
92
|
+
default_factory=list,
|
93
|
+
description="A list of On instance for this workflow schedule.",
|
94
|
+
)
|
95
|
+
jobs: dict[str, Job] = Field(
|
96
|
+
default_factory=dict,
|
97
|
+
description="A mapping of job ID and job model that already loaded.",
|
98
|
+
)
|
99
|
+
run_id: Optional[str] = Field(
|
100
|
+
default=None,
|
101
|
+
description="A running workflow ID.",
|
102
|
+
repr=False,
|
103
|
+
exclude=True,
|
104
|
+
)
|
105
|
+
|
106
|
+
@property
|
107
|
+
def new_run_id(self) -> str:
|
108
|
+
"""Running ID of this workflow that always generate new unique value."""
|
109
|
+
return gen_id(self.name, unique=True)
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def from_loader(
|
113
|
+
cls,
|
114
|
+
name: str,
|
115
|
+
externals: DictData | None = None,
|
116
|
+
) -> Self:
|
117
|
+
"""Create Workflow instance from the Loader object that only receive
|
118
|
+
an input workflow name. The loader object will use this workflow name to
|
119
|
+
searching configuration data of this workflow model in conf path.
|
120
|
+
|
121
|
+
:param name: A workflow name that want to pass to Loader object.
|
122
|
+
:param externals: An external parameters that want to pass to Loader
|
123
|
+
object.
|
124
|
+
:rtype: Self
|
125
|
+
"""
|
126
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
127
|
+
|
128
|
+
# NOTE: Validate the config type match with current connection model
|
129
|
+
if loader.type != cls:
|
130
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
131
|
+
|
132
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
133
|
+
|
134
|
+
# NOTE: Add name to loader data
|
135
|
+
loader_data["name"] = name.replace(" ", "_")
|
136
|
+
|
137
|
+
# NOTE: Prepare `on` data
|
138
|
+
cls.__bypass_on(loader_data)
|
139
|
+
return cls.model_validate(obj=loader_data)
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
143
|
+
"""Bypass the on data to loaded config data."""
|
144
|
+
if on := data.pop("on", []):
|
145
|
+
if isinstance(on, str):
|
146
|
+
on = [on]
|
147
|
+
if any(not isinstance(i, (dict, str)) for i in on):
|
148
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
149
|
+
|
150
|
+
# NOTE: Pass on value to Loader and keep on model object to on field
|
151
|
+
data["on"] = [
|
152
|
+
(
|
153
|
+
Loader(n, externals=(externals or {})).data
|
154
|
+
if isinstance(n, str)
|
155
|
+
else n
|
156
|
+
)
|
157
|
+
for n in on
|
158
|
+
]
|
159
|
+
return data
|
160
|
+
|
161
|
+
@model_validator(mode="before")
|
162
|
+
def __prepare_params(cls, values: DictData) -> DictData:
|
163
|
+
"""Prepare the params key."""
|
164
|
+
# NOTE: Prepare params type if it passing with only type value.
|
165
|
+
if params := values.pop("params", {}):
|
166
|
+
values["params"] = {
|
167
|
+
p: (
|
168
|
+
{"type": params[p]}
|
169
|
+
if isinstance(params[p], str)
|
170
|
+
else params[p]
|
171
|
+
)
|
172
|
+
for p in params
|
173
|
+
}
|
174
|
+
return values
|
175
|
+
|
176
|
+
@field_validator("desc", mode="after")
|
177
|
+
def ___prepare_desc(cls, value: str) -> str:
|
178
|
+
"""Prepare description string that was created on a template."""
|
179
|
+
return dedent(value)
|
180
|
+
|
181
|
+
@model_validator(mode="after")
|
182
|
+
def __validate_jobs_need_and_prepare_running_id(self):
|
183
|
+
"""Validate each need job in any jobs should exists."""
|
184
|
+
for job in self.jobs:
|
185
|
+
if not_exist := [
|
186
|
+
need for need in self.jobs[job].needs if need not in self.jobs
|
187
|
+
]:
|
188
|
+
raise WorkflowException(
|
189
|
+
f"This needed jobs: {not_exist} do not exist in this "
|
190
|
+
f"workflow, {self.name!r}"
|
191
|
+
)
|
192
|
+
|
193
|
+
# NOTE: update a job id with its job id from workflow template
|
194
|
+
self.jobs[job].id = job
|
195
|
+
|
196
|
+
if self.run_id is None:
|
197
|
+
self.run_id = self.new_run_id
|
198
|
+
|
199
|
+
# VALIDATE: Validate workflow name should not dynamic with params
|
200
|
+
# template.
|
201
|
+
if has_template(self.name):
|
202
|
+
raise ValueError(
|
203
|
+
f"Workflow name should not has any template, please check, "
|
204
|
+
f"{self.name!r}."
|
205
|
+
)
|
206
|
+
|
207
|
+
return self
|
208
|
+
|
209
|
+
def get_running_id(self, run_id: str) -> Self:
|
210
|
+
"""Return Workflow model object that changing workflow running ID with
|
211
|
+
an input running ID.
|
212
|
+
|
213
|
+
:param run_id: A replace workflow running ID.
|
214
|
+
:rtype: Self
|
215
|
+
"""
|
216
|
+
return self.model_copy(update={"run_id": run_id})
|
217
|
+
|
218
|
+
def job(self, name: str) -> Job:
|
219
|
+
"""Return Job model that exists on this workflow.
|
220
|
+
|
221
|
+
:param name: A job name that want to get from a mapping of job models.
|
222
|
+
:type name: str
|
223
|
+
|
224
|
+
:rtype: Job
|
225
|
+
:returns: A job model that exists on this workflow by input name.
|
226
|
+
"""
|
227
|
+
if name not in self.jobs:
|
228
|
+
raise ValueError(
|
229
|
+
f"A Job {name!r} does not exists in this workflow, "
|
230
|
+
f"{self.name!r}"
|
231
|
+
)
|
232
|
+
return self.jobs[name]
|
233
|
+
|
234
|
+
def parameterize(self, params: DictData) -> DictData:
|
235
|
+
"""Prepare parameters before passing to execution process. This method
|
236
|
+
will create jobs key to params mapping that will keep any result from
|
237
|
+
job execution.
|
238
|
+
|
239
|
+
:param params: A parameter mapping that receive from workflow execution.
|
240
|
+
:rtype: DictData
|
241
|
+
"""
|
242
|
+
# VALIDATE: Incoming params should have keys that set on this workflow.
|
243
|
+
if check_key := tuple(
|
244
|
+
f"{k!r}"
|
245
|
+
for k in self.params
|
246
|
+
if (k not in params and self.params[k].required)
|
247
|
+
):
|
248
|
+
raise WorkflowException(
|
249
|
+
f"Required Param on this workflow setting does not set: "
|
250
|
+
f"{', '.join(check_key)}."
|
251
|
+
)
|
252
|
+
|
253
|
+
# NOTE: mapping type of param before adding it to params variable.
|
254
|
+
return {
|
255
|
+
"params": (
|
256
|
+
params
|
257
|
+
| {
|
258
|
+
k: self.params[k].receive(params[k])
|
259
|
+
for k in params
|
260
|
+
if k in self.params
|
261
|
+
}
|
262
|
+
),
|
263
|
+
"jobs": {},
|
264
|
+
}
|
265
|
+
|
266
|
+
def release(
|
267
|
+
self,
|
268
|
+
on: On,
|
269
|
+
params: DictData,
|
270
|
+
queue: list[datetime],
|
271
|
+
*,
|
272
|
+
waiting_sec: int = 60,
|
273
|
+
sleep_interval: int = 15,
|
274
|
+
log: Log = None,
|
275
|
+
) -> Result:
|
276
|
+
"""Start running workflow with the on schedule in period of 30 minutes.
|
277
|
+
That mean it will still running at background 30 minutes until the
|
278
|
+
schedule matching with its time.
|
279
|
+
|
280
|
+
This method allow workflow use log object to save the execution
|
281
|
+
result to log destination like file log to local `/logs` directory.
|
282
|
+
|
283
|
+
:param on: An on schedule value.
|
284
|
+
:param params: A workflow parameter that pass to execute method.
|
285
|
+
:param queue: A list of release time that already running.
|
286
|
+
:param waiting_sec: A second period value that allow workflow execute.
|
287
|
+
:param sleep_interval: A second value that want to waiting until time
|
288
|
+
to execute.
|
289
|
+
:param log: A log object that want to save execution result.
|
290
|
+
:rtype: Result
|
291
|
+
"""
|
292
|
+
logger.debug(
|
293
|
+
f"({self.run_id}) [CORE]: {self.name!r}: {on.cronjob} : run with "
|
294
|
+
f"queue id: {id(queue)}"
|
295
|
+
)
|
296
|
+
log: Log = log or FileLog
|
297
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
298
|
+
gen: CronRunner = on.generate(
|
299
|
+
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
300
|
+
+ timedelta(seconds=1)
|
301
|
+
)
|
302
|
+
cron_tz: ZoneInfo = gen.tz
|
303
|
+
|
304
|
+
# NOTE: get next schedule time that generate from now.
|
305
|
+
next_time: datetime = gen.next
|
306
|
+
|
307
|
+
# NOTE: get next utils it does not logger.
|
308
|
+
while log.is_pointed(self.name, next_time, queue=queue):
|
309
|
+
next_time: datetime = gen.next
|
310
|
+
|
311
|
+
# NOTE: push this next running time to log queue
|
312
|
+
heappush(queue, next_time)
|
313
|
+
|
314
|
+
# VALIDATE: Check the different time between the next schedule time and
|
315
|
+
# now that less than waiting period (second unit).
|
316
|
+
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
317
|
+
logger.debug(
|
318
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
319
|
+
f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
320
|
+
)
|
321
|
+
|
322
|
+
# NOTE: Remove next datetime from queue.
|
323
|
+
queue.remove(next_time)
|
324
|
+
|
325
|
+
time.sleep(0.15)
|
326
|
+
return Result(
|
327
|
+
status=0,
|
328
|
+
context={
|
329
|
+
"params": params,
|
330
|
+
"poking": {"skipped": [str(on.cronjob)], "run": []},
|
331
|
+
},
|
332
|
+
)
|
333
|
+
|
334
|
+
logger.debug(
|
335
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
336
|
+
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
337
|
+
)
|
338
|
+
|
339
|
+
# NOTE: Release when the time is nearly to schedule time.
|
340
|
+
while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
|
341
|
+
sleep_interval + 5
|
342
|
+
):
|
343
|
+
logger.debug(
|
344
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
345
|
+
f"Sleep until: {duration}"
|
346
|
+
)
|
347
|
+
time.sleep(sleep_interval)
|
348
|
+
|
349
|
+
time.sleep(0.5)
|
350
|
+
|
351
|
+
# NOTE: Release parameter that use to change if params has
|
352
|
+
# templating.
|
353
|
+
release_params: DictData = {
|
354
|
+
"release": {
|
355
|
+
"logical_date": next_time,
|
356
|
+
},
|
357
|
+
}
|
358
|
+
|
359
|
+
# WARNING: Re-create workflow object that use new running workflow
|
360
|
+
# ID.
|
361
|
+
runner: Self = self.get_running_id(run_id=self.new_run_id)
|
362
|
+
rs: Result = runner.execute(
|
363
|
+
params=param2template(params, release_params),
|
364
|
+
)
|
365
|
+
logger.debug(
|
366
|
+
f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
367
|
+
f"End release {next_time:%Y-%m-%d %H:%M:%S}"
|
368
|
+
)
|
369
|
+
|
370
|
+
# NOTE: Delete a copied workflow instance for saving memory.
|
371
|
+
del runner
|
372
|
+
|
373
|
+
rs.set_parent_run_id(self.run_id)
|
374
|
+
rs_log: Log = log.model_validate(
|
375
|
+
{
|
376
|
+
"name": self.name,
|
377
|
+
"on": str(on.cronjob),
|
378
|
+
"release": next_time,
|
379
|
+
"context": rs.context,
|
380
|
+
"parent_run_id": rs.run_id,
|
381
|
+
"run_id": rs.run_id,
|
382
|
+
}
|
383
|
+
)
|
384
|
+
# NOTE: Saving execution result to destination of the input log object.
|
385
|
+
rs_log.save(excluded=None)
|
386
|
+
|
387
|
+
queue.remove(next_time)
|
388
|
+
time.sleep(0.05)
|
389
|
+
return Result(
|
390
|
+
status=0,
|
391
|
+
context={
|
392
|
+
"params": params,
|
393
|
+
"poking": {"skipped": [], "run": [str(on.cronjob)]},
|
394
|
+
},
|
395
|
+
)
|
396
|
+
|
397
|
+
def poke(
|
398
|
+
self,
|
399
|
+
params: DictData | None = None,
|
400
|
+
*,
|
401
|
+
log: Log | None = None,
|
402
|
+
) -> list[Result]:
|
403
|
+
"""Poke workflow with threading executor pool for executing with all its
|
404
|
+
schedules that was set on the `on` value. This method will observe its
|
405
|
+
schedule that nearing to run with the ``self.release()`` method.
|
406
|
+
|
407
|
+
:param params: A parameters that want to pass to the release method.
|
408
|
+
:param log: A log object that want to use on this poking process.
|
409
|
+
:rtype: list[Result]
|
410
|
+
"""
|
411
|
+
logger.info(
|
412
|
+
f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
|
413
|
+
)
|
414
|
+
|
415
|
+
# NOTE: If this workflow does not set the on schedule, it will return
|
416
|
+
# empty result.
|
417
|
+
if len(self.on) == 0:
|
418
|
+
return []
|
419
|
+
|
420
|
+
params: DictData = params or {}
|
421
|
+
queue: list[datetime] = []
|
422
|
+
results: list[Result] = []
|
423
|
+
|
424
|
+
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
|
425
|
+
with ThreadPoolExecutor(max_workers=worker) as executor:
|
426
|
+
# TODO: If I want to run infinite loop.
|
427
|
+
futures: list[Future] = []
|
428
|
+
for on in self.on:
|
429
|
+
futures.append(
|
430
|
+
executor.submit(
|
431
|
+
self.release,
|
432
|
+
on,
|
433
|
+
params=params,
|
434
|
+
log=log,
|
435
|
+
queue=queue,
|
436
|
+
)
|
437
|
+
)
|
438
|
+
delay(second=0.15)
|
439
|
+
|
440
|
+
# WARNING: This poking method does not allow to use fail-fast logic
|
441
|
+
# to catching parallel execution result.
|
442
|
+
for future in as_completed(futures):
|
443
|
+
results.append(future.result(timeout=60))
|
444
|
+
|
445
|
+
if len(queue) > 0:
|
446
|
+
logger.error(
|
447
|
+
f"({self.run_id}) [POKING]: Log Queue does empty when poking "
|
448
|
+
f"process was finishing."
|
449
|
+
)
|
450
|
+
|
451
|
+
return results
|
452
|
+
|
453
|
+
def execute_job(
|
454
|
+
self,
|
455
|
+
job: str,
|
456
|
+
params: DictData,
|
457
|
+
) -> Result:
|
458
|
+
"""Job Executor that use on workflow executor.
|
459
|
+
|
460
|
+
:param job: A job ID that want to execute.
|
461
|
+
:param params: A params that was parameterized from workflow execution.
|
462
|
+
:rtype: Result
|
463
|
+
"""
|
464
|
+
# VALIDATE: check a job ID that exists in this workflow or not.
|
465
|
+
if job not in self.jobs:
|
466
|
+
raise WorkflowException(
|
467
|
+
f"The job ID: {job} does not exists on {self.name!r} workflow."
|
468
|
+
)
|
469
|
+
try:
|
470
|
+
logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job!r}")
|
471
|
+
|
472
|
+
# IMPORTANT:
|
473
|
+
# Change any job running IDs to this workflow running ID.
|
474
|
+
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
475
|
+
j_rs: Result = job_obj.execute(params=params)
|
476
|
+
|
477
|
+
except JobException as err:
|
478
|
+
raise WorkflowException(f"{job}: JobException: {err}") from None
|
479
|
+
|
480
|
+
return Result(
|
481
|
+
status=j_rs.status,
|
482
|
+
context={job: job_obj.set_outputs(j_rs.context)},
|
483
|
+
)
|
484
|
+
|
485
|
+
def execute(
|
486
|
+
self,
|
487
|
+
params: DictData | None = None,
|
488
|
+
*,
|
489
|
+
timeout: int = 60,
|
490
|
+
) -> Result:
|
491
|
+
"""Execute workflow with passing dynamic parameters to any jobs that
|
492
|
+
included in the workflow.
|
493
|
+
|
494
|
+
:param params: An input parameters that use on workflow execution that
|
495
|
+
will parameterize before using it.
|
496
|
+
:param timeout: A workflow execution time out in second unit that use
|
497
|
+
for limit time of execution and waiting job dependency.
|
498
|
+
:rtype: Result
|
499
|
+
|
500
|
+
See Also:
|
501
|
+
---
|
502
|
+
|
503
|
+
The result of execution process for each jobs and stages on this
|
504
|
+
workflow will keeping in dict which able to catch out with all jobs and
|
505
|
+
stages by dot annotation.
|
506
|
+
|
507
|
+
For example, when I want to use the output from previous stage, I
|
508
|
+
can access it with syntax:
|
509
|
+
|
510
|
+
... ${job-name}.stages.${stage-id}.outputs.${key}
|
511
|
+
|
512
|
+
"""
|
513
|
+
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
514
|
+
params: DictData = params or {}
|
515
|
+
ts: float = time.monotonic()
|
516
|
+
|
517
|
+
# NOTE: It should not do anything if it does not have job.
|
518
|
+
if not self.jobs:
|
519
|
+
logger.warning(
|
520
|
+
f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
521
|
+
f"does not have any jobs"
|
522
|
+
)
|
523
|
+
return Result(status=0, context=params)
|
524
|
+
|
525
|
+
# NOTE: Create a job queue that keep the job that want to running after
|
526
|
+
# it dependency condition.
|
527
|
+
jq: Queue = Queue()
|
528
|
+
for job_id in self.jobs:
|
529
|
+
jq.put(job_id)
|
530
|
+
|
531
|
+
# NOTE: Create result context that will pass this context to any
|
532
|
+
# execution dependency.
|
533
|
+
context: DictData = self.parameterize(params)
|
534
|
+
try:
|
535
|
+
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
|
536
|
+
(
|
537
|
+
self.__exec_non_threading(context, ts, jq, timeout=timeout)
|
538
|
+
if worker == 1
|
539
|
+
else self.__exec_threading(
|
540
|
+
context, ts, jq, worker=worker, timeout=timeout
|
541
|
+
)
|
542
|
+
)
|
543
|
+
return Result(status=0, context=context)
|
544
|
+
except WorkflowException as err:
|
545
|
+
context.update(
|
546
|
+
{"error_message": f"{err.__class__.__name__}: {err}"}
|
547
|
+
)
|
548
|
+
return Result(status=1, context=context)
|
549
|
+
|
550
|
+
def __exec_threading(
|
551
|
+
self,
|
552
|
+
context: DictData,
|
553
|
+
ts: float,
|
554
|
+
job_queue: Queue,
|
555
|
+
*,
|
556
|
+
worker: int = 2,
|
557
|
+
timeout: int = 600,
|
558
|
+
) -> DictData:
|
559
|
+
"""Workflow threading execution.
|
560
|
+
|
561
|
+
:param context: A context workflow data that want to downstream passing.
|
562
|
+
:param ts: A start timestamp that use for checking execute time should
|
563
|
+
timeout.
|
564
|
+
:param timeout: A second value unit that bounding running time.
|
565
|
+
:param worker: A number of threading executor pool size.
|
566
|
+
:rtype: DictData
|
567
|
+
"""
|
568
|
+
not_time_out_flag: bool = True
|
569
|
+
logger.debug(
|
570
|
+
f"({self.run_id}): [CORE]: Run {self.name} with threading job "
|
571
|
+
f"executor"
|
572
|
+
)
|
573
|
+
|
574
|
+
# IMPORTANT: The job execution can run parallel and waiting by
|
575
|
+
# needed.
|
576
|
+
with ThreadPoolExecutor(max_workers=worker) as executor:
|
577
|
+
futures: list[Future] = []
|
578
|
+
|
579
|
+
while not job_queue.empty() and (
|
580
|
+
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
581
|
+
):
|
582
|
+
job_id: str = job_queue.get()
|
583
|
+
job: Job = self.jobs[job_id]
|
584
|
+
|
585
|
+
if any(need not in context["jobs"] for need in job.needs):
|
586
|
+
job_queue.put(job_id)
|
587
|
+
time.sleep(0.25)
|
588
|
+
continue
|
589
|
+
|
590
|
+
futures.append(
|
591
|
+
executor.submit(
|
592
|
+
self.execute_job,
|
593
|
+
job_id,
|
594
|
+
params=copy.deepcopy(context),
|
595
|
+
),
|
596
|
+
)
|
597
|
+
job_queue.task_done()
|
598
|
+
|
599
|
+
# NOTE: Wait for all items to finish processing
|
600
|
+
job_queue.join()
|
601
|
+
|
602
|
+
for future in as_completed(futures):
|
603
|
+
if err := future.exception():
|
604
|
+
logger.error(f"{err}")
|
605
|
+
raise WorkflowException(f"{err}")
|
606
|
+
|
607
|
+
# NOTE: Update job result to workflow result.
|
608
|
+
context["jobs"].update(future.result(timeout=20).conext)
|
609
|
+
|
610
|
+
if not_time_out_flag:
|
611
|
+
return context
|
612
|
+
|
613
|
+
# NOTE: Raise timeout error.
|
614
|
+
logger.warning(
|
615
|
+
f"({self.run_id}) [WORKFLOW]: Execution of workflow, {self.name!r} "
|
616
|
+
f", was timeout"
|
617
|
+
)
|
618
|
+
raise WorkflowException(
|
619
|
+
f"Execution of workflow: {self.name} was timeout"
|
620
|
+
)
|
621
|
+
|
622
|
+
def __exec_non_threading(
|
623
|
+
self,
|
624
|
+
context: DictData,
|
625
|
+
ts: float,
|
626
|
+
job_queue: Queue,
|
627
|
+
*,
|
628
|
+
timeout: int = 600,
|
629
|
+
) -> DictData:
|
630
|
+
"""Workflow non-threading execution that use sequential job running
|
631
|
+
and waiting previous run successful.
|
632
|
+
|
633
|
+
:param context: A context workflow data that want to downstream passing.
|
634
|
+
:param ts: A start timestamp that use for checking execute time should
|
635
|
+
timeout.
|
636
|
+
:param timeout: A second value unit that bounding running time.
|
637
|
+
:rtype: DictData
|
638
|
+
"""
|
639
|
+
not_time_out_flag: bool = True
|
640
|
+
logger.debug(
|
641
|
+
f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
|
642
|
+
f"executor"
|
643
|
+
)
|
644
|
+
|
645
|
+
while not job_queue.empty() and (
|
646
|
+
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
647
|
+
):
|
648
|
+
job_id: str = job_queue.get()
|
649
|
+
job: Job = self.jobs[job_id]
|
650
|
+
|
651
|
+
# NOTE:
|
652
|
+
if any(need not in context["jobs"] for need in job.needs):
|
653
|
+
job_queue.put(job_id)
|
654
|
+
time.sleep(0.25)
|
655
|
+
continue
|
656
|
+
|
657
|
+
# NOTE: Start job execution.
|
658
|
+
job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
|
659
|
+
context["jobs"].update(job_rs.context)
|
660
|
+
job_queue.task_done()
|
661
|
+
|
662
|
+
# NOTE: Wait for all items to finish processing
|
663
|
+
job_queue.join()
|
664
|
+
|
665
|
+
if not_time_out_flag:
|
666
|
+
return context
|
667
|
+
|
668
|
+
# NOTE: Raise timeout error.
|
669
|
+
logger.warning(
|
670
|
+
f"({self.run_id}) [WORKFLOW]: Execution of workflow was timeout"
|
671
|
+
)
|
672
|
+
raise WorkflowException(
|
673
|
+
f"Execution of workflow: {self.name} was timeout"
|
674
|
+
)
|
675
|
+
|
676
|
+
|
677
|
+
class WorkflowSchedule(BaseModel):
|
678
|
+
"""Workflow schedule Pydantic Model."""
|
679
|
+
|
680
|
+
name: str = Field(description="A workflow name.")
|
66
681
|
on: list[On] = Field(
|
67
682
|
default_factory=list,
|
68
683
|
description="An override On instance value.",
|
69
684
|
)
|
70
685
|
params: DictData = Field(
|
71
686
|
default_factory=dict,
|
72
|
-
description="A parameters that want to use to
|
687
|
+
description="A parameters that want to use to workflow execution.",
|
73
688
|
)
|
74
689
|
|
75
690
|
@model_validator(mode="before")
|
@@ -106,7 +721,7 @@ class PipelineSchedule(BaseModel):
|
|
106
721
|
|
107
722
|
class Schedule(BaseModel):
|
108
723
|
"""Schedule Pydantic Model that use to run with scheduler package. It does
|
109
|
-
not equal the on value in
|
724
|
+
not equal the on value in Workflow model but it use same logic to running
|
110
725
|
release date with crontab interval.
|
111
726
|
"""
|
112
727
|
|
@@ -116,9 +731,9 @@ class Schedule(BaseModel):
|
|
116
731
|
"A schedule description that can be string of markdown content."
|
117
732
|
),
|
118
733
|
)
|
119
|
-
|
734
|
+
workflows: list[WorkflowSchedule] = Field(
|
120
735
|
default_factory=list,
|
121
|
-
description="A list of
|
736
|
+
description="A list of WorkflowSchedule models.",
|
122
737
|
)
|
123
738
|
|
124
739
|
@classmethod
|
@@ -145,49 +760,48 @@ class Schedule(BaseModel):
|
|
145
760
|
start_date: datetime,
|
146
761
|
queue: dict[str, list[datetime]],
|
147
762
|
running: dict[str, list[datetime]],
|
763
|
+
*,
|
148
764
|
externals: DictData | None = None,
|
149
|
-
) -> list[
|
765
|
+
) -> list[WorkflowTask]:
|
150
766
|
"""Generate Task from the current datetime.
|
151
767
|
|
152
768
|
:param start_date: A start date that get from the workflow schedule.
|
153
|
-
:param queue:
|
154
|
-
:param running:
|
769
|
+
:param queue: A mapping of name and list of datetime for queue.
|
770
|
+
:param running: A mapping of name and list of datetime for running.
|
155
771
|
:param externals: An external parameters that pass to the Loader object.
|
156
|
-
:rtype: list[
|
772
|
+
:rtype: list[WorkflowTask]
|
157
773
|
"""
|
158
774
|
|
159
|
-
# NOTE: Create pair of
|
160
|
-
|
775
|
+
# NOTE: Create pair of workflow and on.
|
776
|
+
workflow_tasks: list[WorkflowTask] = []
|
161
777
|
externals: DictData = externals or {}
|
162
778
|
|
163
|
-
for
|
164
|
-
|
165
|
-
pipe.name, externals=externals
|
166
|
-
)
|
779
|
+
for wfs in self.workflows:
|
780
|
+
wf: Workflow = Workflow.from_loader(wfs.name, externals=externals)
|
167
781
|
|
168
782
|
# NOTE: Create default list of release datetime.
|
169
|
-
queue[
|
170
|
-
running[
|
783
|
+
queue[wfs.name]: list[datetime] = []
|
784
|
+
running[wfs.name]: list[datetime] = []
|
171
785
|
|
172
|
-
for on in
|
786
|
+
for on in wf.on:
|
173
787
|
on_gen = on.generate(start_date)
|
174
788
|
next_running_date = on_gen.next
|
175
|
-
while next_running_date in queue[
|
789
|
+
while next_running_date in queue[wfs.name]:
|
176
790
|
next_running_date = on_gen.next
|
177
791
|
|
178
|
-
heappush(queue[
|
792
|
+
heappush(queue[wfs.name], next_running_date)
|
179
793
|
|
180
|
-
|
181
|
-
|
182
|
-
|
794
|
+
workflow_tasks.append(
|
795
|
+
WorkflowTask(
|
796
|
+
workflow=wf,
|
183
797
|
on=on,
|
184
|
-
params=
|
798
|
+
params=wfs.params,
|
185
799
|
queue=queue,
|
186
800
|
running=running,
|
187
801
|
),
|
188
802
|
)
|
189
803
|
|
190
|
-
return
|
804
|
+
return workflow_tasks
|
191
805
|
|
192
806
|
|
193
807
|
def catch_exceptions(cancel_on_failure=False):
|
@@ -227,27 +841,27 @@ def catch_exceptions_method(cancel_on_failure=False):
|
|
227
841
|
|
228
842
|
|
229
843
|
@dataclass(frozen=True)
|
230
|
-
class
|
231
|
-
"""
|
844
|
+
class WorkflowTask:
|
845
|
+
"""Workflow task dataclass that use to keep mapping data and objects for
|
232
846
|
passing in multithreading task.
|
233
847
|
"""
|
234
848
|
|
235
|
-
|
849
|
+
workflow: Workflow
|
236
850
|
on: On
|
237
|
-
params: DictData
|
238
|
-
queue: list[datetime]
|
239
|
-
running: list[datetime]
|
851
|
+
params: DictData = field(compare=False, hash=False)
|
852
|
+
queue: list[datetime] = field(compare=False, hash=False)
|
853
|
+
running: list[datetime] = field(compare=False, hash=False)
|
240
854
|
|
241
855
|
@catch_exceptions_method(cancel_on_failure=True)
|
242
856
|
def release(self, log: Log | None = None) -> None:
|
243
|
-
"""
|
244
|
-
`
|
857
|
+
"""Workflow release, it will use with the same logic of
|
858
|
+
`workflow.release` method.
|
245
859
|
|
246
860
|
:param log: A log object.
|
247
861
|
"""
|
248
862
|
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
249
863
|
log: Log = log or FileLog
|
250
|
-
|
864
|
+
wf: Workflow = self.workflow
|
251
865
|
on: On = self.on
|
252
866
|
|
253
867
|
gen: CronRunner = on.generate(
|
@@ -259,40 +873,38 @@ class PipelineTask:
|
|
259
873
|
next_time: datetime = gen.next
|
260
874
|
|
261
875
|
# NOTE: get next utils it does not running.
|
262
|
-
while log.is_pointed(
|
263
|
-
pipeline.name, next_time, queue=self.running[pipeline.name]
|
264
|
-
):
|
876
|
+
while log.is_pointed(wf.name, next_time, queue=self.running[wf.name]):
|
265
877
|
next_time: datetime = gen.next
|
266
878
|
|
267
879
|
logger.debug(
|
268
|
-
f"({
|
880
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
269
881
|
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
270
882
|
)
|
271
|
-
heappush(self.running[
|
883
|
+
heappush(self.running[wf.name], next_time)
|
272
884
|
|
273
885
|
if get_diff_sec(next_time, tz=cron_tz) > 55:
|
274
886
|
logger.debug(
|
275
|
-
f"({
|
887
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
276
888
|
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
277
889
|
)
|
278
890
|
|
279
891
|
# NOTE: Add this next running datetime that not in period to queue
|
280
892
|
# and remove it to running.
|
281
|
-
self.running[
|
282
|
-
heappush(self.queue[
|
893
|
+
self.running[wf.name].remove(next_time)
|
894
|
+
heappush(self.queue[wf.name], next_time)
|
283
895
|
|
284
896
|
time.sleep(0.2)
|
285
897
|
return
|
286
898
|
|
287
899
|
logger.debug(
|
288
|
-
f"({
|
900
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
289
901
|
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
290
902
|
)
|
291
903
|
|
292
904
|
# NOTE: Release when the time is nearly to schedule time.
|
293
905
|
while (duration := get_diff_sec(next_time, tz=tz)) > (15 + 5):
|
294
906
|
logger.debug(
|
295
|
-
f"({
|
907
|
+
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
|
296
908
|
f": Sleep until: {duration}"
|
297
909
|
)
|
298
910
|
time.sleep(15)
|
@@ -307,26 +919,26 @@ class PipelineTask:
|
|
307
919
|
},
|
308
920
|
}
|
309
921
|
|
310
|
-
# WARNING: Re-create
|
922
|
+
# WARNING: Re-create workflow object that use new running workflow
|
311
923
|
# ID.
|
312
|
-
runner:
|
924
|
+
runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
|
313
925
|
rs: Result = runner.execute(
|
314
926
|
params=param2template(self.params, release_params),
|
315
927
|
)
|
316
928
|
logger.debug(
|
317
|
-
f"({runner.run_id}) [CORE]: {
|
929
|
+
f"({runner.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
318
930
|
f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
319
931
|
)
|
320
932
|
|
321
933
|
del runner
|
322
934
|
|
323
935
|
# NOTE: Set parent ID on this result.
|
324
|
-
rs.set_parent_run_id(
|
936
|
+
rs.set_parent_run_id(wf.run_id)
|
325
937
|
|
326
938
|
# NOTE: Save result to log object saving.
|
327
939
|
rs_log: Log = log.model_validate(
|
328
940
|
{
|
329
|
-
"name":
|
941
|
+
"name": wf.name,
|
330
942
|
"on": str(on.cronjob),
|
331
943
|
"release": next_time,
|
332
944
|
"context": rs.context,
|
@@ -337,24 +949,31 @@ class PipelineTask:
|
|
337
949
|
rs_log.save(excluded=None)
|
338
950
|
|
339
951
|
# NOTE: remove this release date from running
|
340
|
-
self.running[
|
952
|
+
self.running[wf.name].remove(next_time)
|
341
953
|
|
342
954
|
# IMPORTANT:
|
343
|
-
# Add the next running datetime to
|
955
|
+
# Add the next running datetime to workflow queue
|
344
956
|
finish_time: datetime = datetime.now(tz=cron_tz).replace(
|
345
957
|
second=0, microsecond=0
|
346
958
|
)
|
347
959
|
future_running_time: datetime = gen.next
|
348
960
|
while (
|
349
|
-
future_running_time in self.running[
|
350
|
-
or future_running_time in self.queue[
|
961
|
+
future_running_time in self.running[wf.name]
|
962
|
+
or future_running_time in self.queue[wf.name]
|
351
963
|
or future_running_time < finish_time
|
352
964
|
):
|
353
965
|
future_running_time: datetime = gen.next
|
354
966
|
|
355
|
-
heappush(self.queue[
|
967
|
+
heappush(self.queue[wf.name], future_running_time)
|
356
968
|
logger.debug(f"[CORE]: {'-' * 100}")
|
357
969
|
|
970
|
+
def __eq__(self, other):
|
971
|
+
if isinstance(other, WorkflowTask):
|
972
|
+
return (
|
973
|
+
self.workflow.name == other.workflow.name
|
974
|
+
and self.on.cronjob == other.on.cronjob
|
975
|
+
)
|
976
|
+
|
358
977
|
|
359
978
|
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
360
979
|
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
@@ -362,16 +981,16 @@ def queue2str(queue: list[datetime]) -> Iterator[str]:
|
|
362
981
|
|
363
982
|
@catch_exceptions(cancel_on_failure=True)
|
364
983
|
def workflow_task(
|
365
|
-
|
984
|
+
workflow_tasks: list[WorkflowTask],
|
366
985
|
stop: datetime,
|
367
986
|
threads: dict[str, Thread],
|
368
987
|
) -> CancelJob | None:
|
369
|
-
"""Workflow task generator that create release pair of
|
988
|
+
"""Workflow task generator that create release pair of workflow and on to
|
370
989
|
the threading in background.
|
371
990
|
|
372
991
|
This workflow task will start every minute at :02 second.
|
373
992
|
|
374
|
-
:param
|
993
|
+
:param workflow_tasks:
|
375
994
|
:param stop:
|
376
995
|
:param threads:
|
377
996
|
:rtype: CancelJob | None
|
@@ -380,11 +999,11 @@ def workflow_task(
|
|
380
999
|
start_date: datetime = datetime.now(tz=tz)
|
381
1000
|
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
382
1001
|
|
383
|
-
if start_date > stop:
|
1002
|
+
if start_date > stop.replace(tzinfo=tz):
|
384
1003
|
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
385
1004
|
while len(threads) > 0:
|
386
1005
|
logger.warning(
|
387
|
-
"[WORKFLOW]: Waiting
|
1006
|
+
"[WORKFLOW]: Waiting workflow release thread that still "
|
388
1007
|
"running in background."
|
389
1008
|
)
|
390
1009
|
time.sleep(15)
|
@@ -392,68 +1011,68 @@ def workflow_task(
|
|
392
1011
|
return CancelJob
|
393
1012
|
|
394
1013
|
# IMPORTANT:
|
395
|
-
# Filter
|
1014
|
+
# Filter workflow & on that should to run with `workflow_release`
|
396
1015
|
# function. It will deplicate running with different schedule value
|
397
1016
|
# because I use current time in this condition.
|
398
1017
|
#
|
399
|
-
# For example, if a
|
1018
|
+
# For example, if a workflow A queue has '00:02:00' time that
|
400
1019
|
# should to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
|
401
1020
|
# This condition will release with 2 threading job.
|
402
1021
|
#
|
403
1022
|
# '00:02:00' --> '*/2 * * * *' --> running
|
404
1023
|
# --> '*/35 * * * *' --> skip
|
405
1024
|
#
|
406
|
-
for task in
|
1025
|
+
for task in workflow_tasks:
|
407
1026
|
|
408
1027
|
# NOTE: Get incoming datetime queue.
|
409
1028
|
logger.debug(
|
410
|
-
f"[WORKFLOW]: Current queue: {task.
|
411
|
-
f"{list(queue2str(task.queue[task.
|
1029
|
+
f"[WORKFLOW]: Current queue: {task.workflow.name!r} : "
|
1030
|
+
f"{list(queue2str(task.queue[task.workflow.name]))}"
|
412
1031
|
)
|
413
1032
|
|
414
1033
|
# NOTE: Create minute unit value for any scheduler datetime that
|
415
|
-
# checking a
|
1034
|
+
# checking a workflow task should run in this datetime.
|
416
1035
|
current_running_time: datetime = start_date_minute.astimezone(
|
417
1036
|
tz=ZoneInfo(task.on.tz)
|
418
1037
|
)
|
419
1038
|
if (
|
420
|
-
len(task.queue[task.
|
421
|
-
and current_running_time != task.queue[task.
|
1039
|
+
len(task.queue[task.workflow.name]) > 0
|
1040
|
+
and current_running_time != task.queue[task.workflow.name][0]
|
422
1041
|
) or (
|
423
1042
|
task.on.next(current_running_time)
|
424
|
-
!= task.queue[task.
|
1043
|
+
!= task.queue[task.workflow.name][0]
|
425
1044
|
):
|
426
1045
|
logger.debug(
|
427
1046
|
f"[WORKFLOW]: Skip schedule "
|
428
1047
|
f"{current_running_time:%Y-%m-%d %H:%M:%S} "
|
429
|
-
f"for : {task.
|
1048
|
+
f"for : {task.workflow.name!r} : {task.on.cronjob}"
|
430
1049
|
)
|
431
1050
|
continue
|
432
|
-
elif len(task.queue[task.
|
1051
|
+
elif len(task.queue[task.workflow.name]) == 0:
|
433
1052
|
logger.warning(
|
434
|
-
f"[WORKFLOW]: Queue is empty for : {task.
|
1053
|
+
f"[WORKFLOW]: Queue is empty for : {task.workflow.name!r} : "
|
435
1054
|
f"{task.on.cronjob}"
|
436
1055
|
)
|
437
1056
|
continue
|
438
1057
|
|
439
1058
|
# NOTE: Remove this datetime from queue.
|
440
|
-
task.queue[task.
|
1059
|
+
task.queue[task.workflow.name].pop(0)
|
441
1060
|
|
442
1061
|
# NOTE: Create thread name that able to tracking with observe schedule
|
443
1062
|
# job.
|
444
1063
|
thread_name: str = (
|
445
|
-
f"{task.
|
1064
|
+
f"{task.workflow.name}|{str(task.on.cronjob)}|"
|
446
1065
|
f"{current_running_time:%Y%m%d%H%M}"
|
447
1066
|
)
|
448
|
-
|
1067
|
+
wf_thread: Thread = Thread(
|
449
1068
|
target=task.release,
|
450
1069
|
name=thread_name,
|
451
1070
|
daemon=True,
|
452
1071
|
)
|
453
1072
|
|
454
|
-
threads[thread_name] =
|
1073
|
+
threads[thread_name] = wf_thread
|
455
1074
|
|
456
|
-
|
1075
|
+
wf_thread.start()
|
457
1076
|
|
458
1077
|
delay()
|
459
1078
|
|
@@ -468,7 +1087,7 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
|
468
1087
|
:rtype: None
|
469
1088
|
"""
|
470
1089
|
logger.debug(
|
471
|
-
"[MONITOR]: Start checking long running
|
1090
|
+
"[MONITOR]: Start checking long running workflow release task."
|
472
1091
|
)
|
473
1092
|
snapshot_threads = list(threads.keys())
|
474
1093
|
for t_name in snapshot_threads:
|
@@ -485,18 +1104,25 @@ def workflow_control(
|
|
485
1104
|
) -> list[str]:
|
486
1105
|
"""Workflow scheduler control.
|
487
1106
|
|
488
|
-
:param schedules: A list of
|
1107
|
+
:param schedules: A list of workflow names that want to schedule running.
|
489
1108
|
:param stop: An datetime value that use to stop running schedule.
|
490
1109
|
:param externals: An external parameters that pass to Loader.
|
491
1110
|
:rtype: list[str]
|
492
1111
|
"""
|
1112
|
+
try:
|
1113
|
+
from schedule import Scheduler
|
1114
|
+
except ImportError:
|
1115
|
+
raise ImportError(
|
1116
|
+
"Should install schedule package before use this module."
|
1117
|
+
) from None
|
1118
|
+
|
493
1119
|
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
494
1120
|
schedule: Scheduler = Scheduler()
|
495
1121
|
start_date: datetime = datetime.now(tz=tz)
|
496
1122
|
|
497
1123
|
# NOTE: Design workflow queue caching.
|
498
1124
|
# ---
|
499
|
-
# {"
|
1125
|
+
# {"workflow-name": [<release-datetime>, <release-datetime>, ...]}
|
500
1126
|
#
|
501
1127
|
wf_queue: dict[str, list[datetime]] = {}
|
502
1128
|
wf_running: dict[str, list[datetime]] = {}
|
@@ -506,18 +1132,20 @@ def workflow_control(
|
|
506
1132
|
second=0, microsecond=0
|
507
1133
|
)
|
508
1134
|
|
509
|
-
# NOTE: Create pair of
|
510
|
-
|
1135
|
+
# NOTE: Create pair of workflow and on from schedule model.
|
1136
|
+
workflow_tasks: list[WorkflowTask] = []
|
511
1137
|
for name in schedules:
|
512
1138
|
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
513
|
-
|
514
|
-
sch.tasks(
|
1139
|
+
workflow_tasks.extend(
|
1140
|
+
sch.tasks(
|
1141
|
+
start_date_waiting, wf_queue, wf_running, externals=externals
|
1142
|
+
),
|
515
1143
|
)
|
516
1144
|
|
517
1145
|
# NOTE: This schedule job will start every minute at :02 seconds.
|
518
1146
|
schedule.every(1).minutes.at(":02").do(
|
519
1147
|
workflow_task,
|
520
|
-
|
1148
|
+
workflow_tasks=workflow_tasks,
|
521
1149
|
stop=stop
|
522
1150
|
or (
|
523
1151
|
start_date
|
@@ -545,7 +1173,7 @@ def workflow_control(
|
|
545
1173
|
if not schedule.get_jobs("control"):
|
546
1174
|
schedule.clear("monitor")
|
547
1175
|
logger.warning(
|
548
|
-
f"[WORKFLOW]:
|
1176
|
+
f"[WORKFLOW]: Workflow release thread: {thread_releases}"
|
549
1177
|
)
|
550
1178
|
logger.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
|
551
1179
|
break
|
@@ -559,33 +1187,33 @@ def workflow_control(
|
|
559
1187
|
return schedules
|
560
1188
|
|
561
1189
|
|
562
|
-
def
|
1190
|
+
def workflow_runner(
|
563
1191
|
stop: datetime | None = None,
|
564
1192
|
externals: DictData | None = None,
|
565
1193
|
excluded: list[str] | None = None,
|
566
1194
|
) -> list[str]:
|
567
1195
|
"""Workflow application that running multiprocessing schedule with chunk of
|
568
|
-
|
1196
|
+
workflows that exists in config path.
|
569
1197
|
|
570
1198
|
:param stop:
|
571
1199
|
:param excluded:
|
572
1200
|
:param externals:
|
573
1201
|
:rtype: list[str]
|
574
1202
|
|
575
|
-
This function will get all
|
1203
|
+
This function will get all workflows that include on value that was
|
576
1204
|
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
577
1205
|
value to multiprocess executor pool.
|
578
1206
|
|
579
1207
|
The current workflow logic:
|
580
1208
|
---
|
581
1209
|
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
582
|
-
|
1210
|
+
workflow task 01 01
|
583
1211
|
--> thread of release
|
584
|
-
|
1212
|
+
workflow task 01 02
|
585
1213
|
==> process 02 ==> schedule 1 minute --> thread of release
|
586
|
-
|
1214
|
+
workflow task 02 01
|
587
1215
|
--> thread of release
|
588
|
-
|
1216
|
+
workflow task 02 02
|
589
1217
|
==> ...
|
590
1218
|
"""
|
591
1219
|
excluded: list[str] = excluded or []
|
@@ -613,8 +1241,3 @@ def workflow(
|
|
613
1241
|
raise WorkflowException(str(err)) from err
|
614
1242
|
results.extend(future.result(timeout=1))
|
615
1243
|
return results
|
616
|
-
|
617
|
-
|
618
|
-
if __name__ == "__main__":
|
619
|
-
workflow_rs: list[str] = workflow()
|
620
|
-
logger.info(f"Application run success: {workflow_rs}")
|