ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +51 -0
- ddeutil/workflow/cron.py +713 -0
- ddeutil/workflow/loader.py +65 -13
- ddeutil/workflow/log.py +147 -49
- ddeutil/workflow/on.py +18 -15
- ddeutil/workflow/pipeline.py +389 -140
- ddeutil/workflow/repeat.py +9 -5
- ddeutil/workflow/route.py +30 -37
- ddeutil/workflow/scheduler.py +398 -659
- ddeutil/workflow/stage.py +145 -73
- ddeutil/workflow/utils.py +133 -42
- ddeutil_workflow-0.0.9.dist-info/METADATA +273 -0
- ddeutil_workflow-0.0.9.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -45
- ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
- ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/top_level.txt +0 -0
ddeutil/workflow/pipeline.py
CHANGED
@@ -12,23 +12,32 @@ import time
|
|
12
12
|
from concurrent.futures import (
|
13
13
|
FIRST_EXCEPTION,
|
14
14
|
Future,
|
15
|
-
ProcessPoolExecutor,
|
16
15
|
ThreadPoolExecutor,
|
17
16
|
as_completed,
|
18
17
|
wait,
|
19
18
|
)
|
20
|
-
from datetime import datetime
|
21
|
-
from
|
19
|
+
from datetime import datetime, timedelta
|
20
|
+
from heapq import heappush
|
22
21
|
from pickle import PickleError
|
23
22
|
from queue import Queue
|
23
|
+
from textwrap import dedent
|
24
|
+
from threading import Event
|
24
25
|
from typing import Optional
|
25
26
|
from zoneinfo import ZoneInfo
|
26
27
|
|
27
28
|
from pydantic import BaseModel, Field
|
28
|
-
from pydantic.functional_validators import model_validator
|
29
|
+
from pydantic.functional_validators import field_validator, model_validator
|
29
30
|
from typing_extensions import Self
|
30
31
|
|
31
|
-
from .__types import
|
32
|
+
from .__types import (
|
33
|
+
DictData,
|
34
|
+
DictStr,
|
35
|
+
Matrix,
|
36
|
+
MatrixExclude,
|
37
|
+
MatrixInclude,
|
38
|
+
TupleStr,
|
39
|
+
)
|
40
|
+
from .cron import CronRunner
|
32
41
|
from .exceptions import (
|
33
42
|
JobException,
|
34
43
|
PipelineException,
|
@@ -36,17 +45,26 @@ from .exceptions import (
|
|
36
45
|
UtilException,
|
37
46
|
)
|
38
47
|
from .loader import Loader
|
48
|
+
from .log import FileLog, Log
|
39
49
|
from .on import On
|
40
|
-
from .scheduler import CronRunner
|
41
50
|
from .stage import Stage
|
42
51
|
from .utils import (
|
43
52
|
Param,
|
44
53
|
Result,
|
45
54
|
cross_product,
|
46
55
|
dash2underscore,
|
56
|
+
delay,
|
47
57
|
filter_func,
|
48
58
|
gen_id,
|
49
59
|
get_diff_sec,
|
60
|
+
has_template,
|
61
|
+
param2template,
|
62
|
+
)
|
63
|
+
|
64
|
+
__all__: TupleStr = (
|
65
|
+
"Strategy",
|
66
|
+
"Job",
|
67
|
+
"Pipeline",
|
50
68
|
)
|
51
69
|
|
52
70
|
|
@@ -166,9 +184,15 @@ class Job(BaseModel):
|
|
166
184
|
... }
|
167
185
|
"""
|
168
186
|
|
169
|
-
id: Optional[str] = Field(default=None)
|
170
|
-
desc: Optional[str] = Field(
|
171
|
-
|
187
|
+
id: Optional[str] = Field(default=None, description="A job ID.")
|
188
|
+
desc: Optional[str] = Field(
|
189
|
+
default=None,
|
190
|
+
description="A job description that can be string of markdown content.",
|
191
|
+
)
|
192
|
+
runs_on: Optional[str] = Field(
|
193
|
+
default=None,
|
194
|
+
description="A target executor node for this job use to execution.",
|
195
|
+
)
|
172
196
|
stages: list[Stage] = Field(
|
173
197
|
default_factory=list,
|
174
198
|
description="A list of Stage of this job.",
|
@@ -182,7 +206,9 @@ class Job(BaseModel):
|
|
182
206
|
description="A strategy matrix that want to generate.",
|
183
207
|
)
|
184
208
|
run_id: Optional[str] = Field(
|
185
|
-
default=None,
|
209
|
+
default=None,
|
210
|
+
description="A running job ID.",
|
211
|
+
repr=False,
|
186
212
|
)
|
187
213
|
|
188
214
|
@model_validator(mode="before")
|
@@ -193,12 +219,31 @@ class Job(BaseModel):
|
|
193
219
|
dash2underscore("runs-on", values)
|
194
220
|
return values
|
195
221
|
|
222
|
+
@field_validator("desc", mode="after")
|
223
|
+
def ___prepare_desc(cls, value: str) -> str:
|
224
|
+
"""Prepare description string that was created on a template."""
|
225
|
+
return dedent(value)
|
226
|
+
|
196
227
|
@model_validator(mode="after")
|
197
228
|
def __prepare_running_id(self):
|
198
229
|
if self.run_id is None:
|
199
230
|
self.run_id = gen_id(self.id or "", unique=True)
|
231
|
+
|
232
|
+
# VALIDATE: Validate job id should not dynamic with params template.
|
233
|
+
if has_template(self.id):
|
234
|
+
raise ValueError("Job ID should not has any template.")
|
235
|
+
|
200
236
|
return self
|
201
237
|
|
238
|
+
def get_running_id(self, run_id: str) -> Self:
|
239
|
+
"""Return Job model object that changing job running ID with an
|
240
|
+
input running ID.
|
241
|
+
|
242
|
+
:param run_id: A replace job running ID.
|
243
|
+
:rtype: Self
|
244
|
+
"""
|
245
|
+
return self.model_copy(update={"run_id": run_id})
|
246
|
+
|
202
247
|
def stage(self, stage_id: str) -> Stage:
|
203
248
|
"""Return stage model that match with an input stage ID."""
|
204
249
|
for stage in self.stages:
|
@@ -209,7 +254,6 @@ class Job(BaseModel):
|
|
209
254
|
def set_outputs(self, output: DictData) -> DictData:
|
210
255
|
if len(output) > 1 and self.strategy.is_set():
|
211
256
|
return {"strategies": output}
|
212
|
-
|
213
257
|
return output[next(iter(output))]
|
214
258
|
|
215
259
|
def strategy_execute(
|
@@ -262,7 +306,7 @@ class Job(BaseModel):
|
|
262
306
|
for stage in self.stages:
|
263
307
|
|
264
308
|
# IMPORTANT: Change any stage running IDs to this job running ID.
|
265
|
-
stage
|
309
|
+
stage: Stage = stage.get_running_id(self.run_id)
|
266
310
|
|
267
311
|
_st_name: str = stage.id or stage.name
|
268
312
|
|
@@ -303,7 +347,11 @@ class Job(BaseModel):
|
|
303
347
|
context={
|
304
348
|
gen_id(strategy): {
|
305
349
|
"matrix": strategy,
|
306
|
-
|
350
|
+
# NOTE: If job strategy executor use multithreading,
|
351
|
+
# it will not filter function object from context.
|
352
|
+
# ---
|
353
|
+
# "stages": filter_func(context.pop("stages", {})),
|
354
|
+
"stages": context.pop("stages", {}),
|
307
355
|
"error": {
|
308
356
|
"message": (
|
309
357
|
"Process Event stopped before execution"
|
@@ -314,7 +362,7 @@ class Job(BaseModel):
|
|
314
362
|
)
|
315
363
|
try:
|
316
364
|
rs: Result = stage.execute(params=context)
|
317
|
-
stage.set_outputs(rs.context,
|
365
|
+
stage.set_outputs(rs.context, to=context)
|
318
366
|
except (StageException, UtilException) as err:
|
319
367
|
logging.error(
|
320
368
|
f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
|
@@ -323,6 +371,11 @@ class Job(BaseModel):
|
|
323
371
|
f"Get stage execution error: {err.__class__.__name__}: "
|
324
372
|
f"{err}"
|
325
373
|
) from None
|
374
|
+
|
375
|
+
# NOTE: Remove new stage object that was created from
|
376
|
+
# ``get_running_id`` method.
|
377
|
+
del stage
|
378
|
+
|
326
379
|
return Result(
|
327
380
|
status=0,
|
328
381
|
context={
|
@@ -359,53 +412,74 @@ class Job(BaseModel):
|
|
359
412
|
context=strategy_context,
|
360
413
|
)
|
361
414
|
|
362
|
-
# WARNING: (WF001) I got error that raise when use
|
363
|
-
# ``ProcessPoolExecutor``;
|
364
|
-
# ---
|
365
|
-
# _pickle.PicklingError: Can't pickle
|
366
|
-
# <function ??? at 0x000001F0BE80F160>: attribute lookup ???
|
367
|
-
# on ddeutil.workflow.stage failed
|
415
|
+
# # WARNING: (WF001) I got error that raise when use
|
416
|
+
# # ``ProcessPoolExecutor``;
|
417
|
+
# # ---
|
418
|
+
# # _pickle.PicklingError: Can't pickle
|
419
|
+
# # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
|
420
|
+
# # on ddeutil.workflow.stage failed
|
421
|
+
# #
|
422
|
+
# # from multiprocessing import Event, Manager
|
423
|
+
# with Manager() as manager:
|
424
|
+
# event: Event = manager.Event()
|
368
425
|
#
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
426
|
+
# # NOTE: Start process pool executor for running strategy executor
|
427
|
+
# # in parallel mode.
|
428
|
+
# with ProcessPoolExecutor(
|
429
|
+
# max_workers=self.strategy.max_parallel
|
430
|
+
# ) as executor:
|
431
|
+
# futures: list[Future] = [
|
432
|
+
# executor.submit(
|
433
|
+
# self.strategy_execute,
|
434
|
+
# strategy,
|
435
|
+
# params=copy.deepcopy(params),
|
436
|
+
# event=event,
|
437
|
+
# )
|
438
|
+
# for strategy in self.strategy.make()
|
439
|
+
# ]
|
440
|
+
# if self.strategy.fail_fast:
|
441
|
+
# rs = self.__catch_fail_fast(event, futures)
|
442
|
+
# else:
|
443
|
+
# rs = self.__catch_all_completed(futures)
|
444
|
+
|
445
|
+
# NOTE: Create event for cancel executor stop running.
|
446
|
+
event: Event = Event()
|
447
|
+
|
448
|
+
with ThreadPoolExecutor(
|
449
|
+
max_workers=self.strategy.max_parallel
|
450
|
+
) as executor:
|
451
|
+
futures: list[Future] = [
|
452
|
+
executor.submit(
|
453
|
+
self.strategy_execute,
|
454
|
+
strategy,
|
455
|
+
params=copy.deepcopy(params),
|
456
|
+
event=event,
|
457
|
+
)
|
458
|
+
for strategy in self.strategy.make()
|
459
|
+
]
|
460
|
+
if self.strategy.fail_fast:
|
461
|
+
rs: Result = self.__catch_fail_fast(event, futures)
|
462
|
+
else:
|
463
|
+
rs: Result = self.__catch_all_completed(futures)
|
390
464
|
return Result(
|
391
465
|
status=0,
|
392
466
|
context=rs.context,
|
393
467
|
)
|
394
468
|
|
395
|
-
def __catch_fail_fast(self, event: Event,
|
396
|
-
"""Job parallel pool
|
397
|
-
stop all not done
|
398
|
-
running
|
469
|
+
def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
|
470
|
+
"""Job parallel pool futures catching with fail-fast mode. That will
|
471
|
+
stop all not done futures if it receive the first exception from all
|
472
|
+
running futures.
|
399
473
|
|
400
474
|
:param event:
|
401
|
-
:param
|
475
|
+
:param futures: A list of futures.
|
402
476
|
:rtype: Result
|
403
477
|
"""
|
404
478
|
strategy_context: DictData = {}
|
405
479
|
# NOTE: Get results from a collection of tasks with a
|
406
480
|
# timeout that has the first exception.
|
407
481
|
done, not_done = wait(
|
408
|
-
|
482
|
+
futures, timeout=1800, return_when=FIRST_EXCEPTION
|
409
483
|
)
|
410
484
|
nd: str = (
|
411
485
|
f", the strategies do not run is {not_done}" if not_done else ""
|
@@ -416,37 +490,38 @@ class Job(BaseModel):
|
|
416
490
|
event.set()
|
417
491
|
|
418
492
|
# NOTE: Cancel any scheduled tasks
|
419
|
-
for future in
|
493
|
+
for future in futures:
|
420
494
|
future.cancel()
|
421
495
|
|
422
496
|
status: int = 0
|
423
|
-
for
|
424
|
-
if
|
497
|
+
for future in done:
|
498
|
+
if future.exception():
|
425
499
|
status = 1
|
426
500
|
logging.error(
|
427
501
|
f"({self.run_id}) [JOB]: One stage failed with: "
|
428
|
-
f"{
|
502
|
+
f"{future.exception()}, shutting down this future."
|
429
503
|
)
|
430
|
-
elif
|
504
|
+
elif future.cancelled():
|
431
505
|
continue
|
432
506
|
else:
|
433
|
-
rs: Result =
|
507
|
+
rs: Result = future.result(timeout=60)
|
434
508
|
strategy_context.update(rs.context)
|
435
509
|
return Result(
|
436
510
|
status=status,
|
437
511
|
context=strategy_context,
|
438
512
|
)
|
439
513
|
|
440
|
-
def __catch_all_completed(self,
|
441
|
-
"""Job parallel pool
|
514
|
+
def __catch_all_completed(self, futures: list[Future]) -> Result:
|
515
|
+
"""Job parallel pool futures catching with all-completed mode.
|
442
516
|
|
443
|
-
:param
|
517
|
+
:param futures: A list of futures.
|
518
|
+
:rtype: Result
|
444
519
|
"""
|
445
520
|
strategy_context: DictData = {}
|
446
521
|
status: int = 0
|
447
|
-
for
|
522
|
+
for future in as_completed(futures):
|
448
523
|
try:
|
449
|
-
rs: Result =
|
524
|
+
rs: Result = future.result(timeout=60)
|
450
525
|
strategy_context.update(rs.context)
|
451
526
|
except PickleError as err:
|
452
527
|
# NOTE: (WF001) I do not want to fix this issue because
|
@@ -459,8 +534,8 @@ class Job(BaseModel):
|
|
459
534
|
except TimeoutError:
|
460
535
|
status = 1
|
461
536
|
logging.warning("Task is hanging. Attempting to kill.")
|
462
|
-
|
463
|
-
if not
|
537
|
+
future.cancel()
|
538
|
+
if not future.cancelled():
|
464
539
|
logging.warning("Failed to cancel the task.")
|
465
540
|
else:
|
466
541
|
logging.warning("Task canceled successfully.")
|
@@ -475,7 +550,7 @@ class Job(BaseModel):
|
|
475
550
|
|
476
551
|
|
477
552
|
class Pipeline(BaseModel):
|
478
|
-
"""Pipeline Model this is the main
|
553
|
+
"""Pipeline Model this is the main future of this project because it use to
|
479
554
|
be workflow data for running everywhere that you want. It use lightweight
|
480
555
|
coding line to execute it.
|
481
556
|
"""
|
@@ -484,8 +559,7 @@ class Pipeline(BaseModel):
|
|
484
559
|
desc: Optional[str] = Field(
|
485
560
|
default=None,
|
486
561
|
description=(
|
487
|
-
"A pipeline description that
|
488
|
-
"content."
|
562
|
+
"A pipeline description that can be string of markdown content."
|
489
563
|
),
|
490
564
|
)
|
491
565
|
params: dict[str, Param] = Field(
|
@@ -501,20 +575,30 @@ class Pipeline(BaseModel):
|
|
501
575
|
description="A mapping of job ID and job model that already loaded.",
|
502
576
|
)
|
503
577
|
run_id: Optional[str] = Field(
|
504
|
-
default=None,
|
578
|
+
default=None,
|
579
|
+
description="A running pipeline ID.",
|
580
|
+
repr=False,
|
505
581
|
)
|
506
582
|
|
583
|
+
@property
|
584
|
+
def new_run_id(self) -> str:
|
585
|
+
"""Running ID of this pipeline that always generate new unique value."""
|
586
|
+
return gen_id(self.name, unique=True)
|
587
|
+
|
507
588
|
@classmethod
|
508
589
|
def from_loader(
|
509
590
|
cls,
|
510
591
|
name: str,
|
511
592
|
externals: DictData | None = None,
|
512
593
|
) -> Self:
|
513
|
-
"""Create Pipeline instance from the Loader object
|
594
|
+
"""Create Pipeline instance from the Loader object that only receive
|
595
|
+
an input pipeline name. The loader object will use this pipeline name to
|
596
|
+
searching configuration data of this pipeline model in conf path.
|
514
597
|
|
515
598
|
:param name: A pipeline name that want to pass to Loader object.
|
516
599
|
:param externals: An external parameters that want to pass to Loader
|
517
600
|
object.
|
601
|
+
:rtype: Self
|
518
602
|
"""
|
519
603
|
loader: Loader = Loader(name, externals=(externals or {}))
|
520
604
|
loader_data: DictData = copy.deepcopy(loader.data)
|
@@ -537,6 +621,8 @@ class Pipeline(BaseModel):
|
|
537
621
|
on = [on]
|
538
622
|
if any(not isinstance(i, (dict, str)) for i in on):
|
539
623
|
raise TypeError("The ``on`` key should be list of str or dict")
|
624
|
+
|
625
|
+
# NOTE: Pass on value to Loader and keep on model object to on field
|
540
626
|
data["on"] = [
|
541
627
|
(
|
542
628
|
Loader(n, externals=(externals or {})).data
|
@@ -562,25 +648,48 @@ class Pipeline(BaseModel):
|
|
562
648
|
}
|
563
649
|
return values
|
564
650
|
|
651
|
+
@field_validator("desc", mode="after")
|
652
|
+
def ___prepare_desc(cls, value: str) -> str:
|
653
|
+
"""Prepare description string that was created on a template."""
|
654
|
+
return dedent(value)
|
655
|
+
|
565
656
|
@model_validator(mode="after")
|
566
657
|
def __validate_jobs_need_and_prepare_running_id(self):
|
658
|
+
"""Validate each need job in any jobs should exists."""
|
567
659
|
for job in self.jobs:
|
568
660
|
if not_exist := [
|
569
661
|
need for need in self.jobs[job].needs if need not in self.jobs
|
570
662
|
]:
|
571
663
|
raise PipelineException(
|
572
664
|
f"This needed jobs: {not_exist} do not exist in this "
|
573
|
-
f"pipeline."
|
665
|
+
f"pipeline, {self.name!r}"
|
574
666
|
)
|
575
667
|
|
576
668
|
# NOTE: update a job id with its job id from pipeline template
|
577
669
|
self.jobs[job].id = job
|
578
670
|
|
579
671
|
if self.run_id is None:
|
580
|
-
self.run_id =
|
672
|
+
self.run_id = self.new_run_id
|
673
|
+
|
674
|
+
# VALIDATE: Validate pipeline name should not dynamic with params
|
675
|
+
# template.
|
676
|
+
if has_template(self.name):
|
677
|
+
raise ValueError(
|
678
|
+
f"Pipeline name should not has any template, please check, "
|
679
|
+
f"{self.name!r}."
|
680
|
+
)
|
581
681
|
|
582
682
|
return self
|
583
683
|
|
684
|
+
def get_running_id(self, run_id: str) -> Self:
|
685
|
+
"""Return Pipeline model object that changing pipeline running ID with
|
686
|
+
an input running ID.
|
687
|
+
|
688
|
+
:param run_id: A replace pipeline running ID.
|
689
|
+
:rtype: Self
|
690
|
+
"""
|
691
|
+
return self.model_copy(update={"run_id": run_id})
|
692
|
+
|
584
693
|
def job(self, name: str) -> Job:
|
585
694
|
"""Return Job model that exists on this pipeline.
|
586
695
|
|
@@ -591,7 +700,10 @@ class Pipeline(BaseModel):
|
|
591
700
|
:returns: A job model that exists on this pipeline by input name.
|
592
701
|
"""
|
593
702
|
if name not in self.jobs:
|
594
|
-
raise ValueError(
|
703
|
+
raise ValueError(
|
704
|
+
f"A Job {name!r} does not exists in this pipeline, "
|
705
|
+
f"{self.name!r}"
|
706
|
+
)
|
595
707
|
return self.jobs[name]
|
596
708
|
|
597
709
|
def parameterize(self, params: DictData) -> DictData:
|
@@ -629,52 +741,146 @@ class Pipeline(BaseModel):
|
|
629
741
|
def release(
|
630
742
|
self,
|
631
743
|
on: On,
|
632
|
-
params: DictData
|
744
|
+
params: DictData,
|
633
745
|
*,
|
634
|
-
waiting_sec: int =
|
635
|
-
sleep_interval: int =
|
636
|
-
|
746
|
+
waiting_sec: int = 55,
|
747
|
+
sleep_interval: int = 15,
|
748
|
+
log: Log = None,
|
749
|
+
lq: list[datetime] = None,
|
750
|
+
) -> Result:
|
637
751
|
"""Start running pipeline with the on schedule in period of 30 minutes.
|
638
752
|
That mean it will still running at background 30 minutes until the
|
639
753
|
schedule matching with its time.
|
754
|
+
|
755
|
+
This method allow pipeline use log object to save the execution
|
756
|
+
result to log destination like file log to local /logs directory.
|
757
|
+
|
758
|
+
:rtype: Result
|
640
759
|
"""
|
641
|
-
|
642
|
-
|
760
|
+
delay()
|
761
|
+
log: Log = log or FileLog
|
762
|
+
current_running_time = datetime.now()
|
763
|
+
if not (
|
764
|
+
latest_running_time := log.latest_point(name=self.name, queue=lq)
|
765
|
+
) or (
|
766
|
+
latest_running_time.replace(tzinfo=ZoneInfo(on.tz))
|
767
|
+
< current_running_time.replace(tzinfo=ZoneInfo(on.tz))
|
768
|
+
):
|
769
|
+
latest_running_time: datetime = current_running_time.replace(
|
770
|
+
tzinfo=ZoneInfo(on.tz)
|
771
|
+
)
|
772
|
+
else:
|
773
|
+
latest_running_time: datetime = latest_running_time.replace(
|
774
|
+
tzinfo=ZoneInfo(on.tz)
|
775
|
+
)
|
643
776
|
|
644
|
-
gen: CronRunner = on.generate(
|
777
|
+
gen: CronRunner = on.generate(
|
778
|
+
latest_running_time + timedelta(seconds=1)
|
779
|
+
)
|
645
780
|
tz: ZoneInfo = gen.tz
|
781
|
+
|
782
|
+
# NOTE: get next schedule time that generate from now.
|
646
783
|
next_running_time: datetime = gen.next
|
647
784
|
|
648
|
-
|
785
|
+
# NOTE: get next utils it does not logging.
|
786
|
+
# while log.is_pointed(self.name, next_running_time, queue=lq):
|
787
|
+
# next_running_time: datetime = gen.next
|
788
|
+
while log.is_pointed(self.name, next_running_time, queue=lq):
|
789
|
+
next_running_time: datetime = gen.next
|
790
|
+
|
791
|
+
heappush(lq, next_running_time)
|
792
|
+
|
793
|
+
# VALIDATE: Check the different time between the next schedule time and
|
794
|
+
# now that less than waiting period (second unit).
|
795
|
+
if get_diff_sec(next_running_time, tz=tz) <= waiting_sec:
|
649
796
|
logging.debug(
|
650
|
-
f"[CORE]: {self.name}
|
651
|
-
f"{next_running_time:%Y-%m-%d %H:%M:%S}"
|
797
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
798
|
+
f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
|
652
799
|
)
|
653
800
|
|
654
801
|
# NOTE: Release when the time is nearly to schedule time.
|
655
|
-
while (duration := get_diff_sec(next_running_time, tz=tz)) >
|
656
|
-
|
802
|
+
while (duration := get_diff_sec(next_running_time, tz=tz)) > (
|
803
|
+
sleep_interval + 5
|
804
|
+
):
|
657
805
|
logging.debug(
|
658
|
-
f"[CORE]: {self.name!r} :
|
806
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
807
|
+
f"Sleep until: {duration}"
|
659
808
|
)
|
809
|
+
time.sleep(sleep_interval)
|
660
810
|
|
661
|
-
time.sleep(
|
662
|
-
rs: Result = self.execute(params=params)
|
663
|
-
logging.debug(f"{rs.context}")
|
811
|
+
time.sleep(0.5)
|
664
812
|
|
665
|
-
|
666
|
-
|
813
|
+
# NOTE: Release parameter that use to change if params has
|
814
|
+
# templating.
|
815
|
+
release_params: DictData = {
|
816
|
+
"release": {
|
817
|
+
"logical_date": next_running_time,
|
818
|
+
},
|
819
|
+
}
|
820
|
+
|
821
|
+
# WARNING: Re-create pipeline object that use new running pipeline
|
822
|
+
# ID.
|
823
|
+
pipeline: Self = self.get_running_id(run_id=self.new_run_id)
|
824
|
+
rs: Result = pipeline.execute(
|
825
|
+
params=param2template(params, release_params),
|
826
|
+
)
|
827
|
+
logging.debug(
|
828
|
+
f"({pipeline.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
829
|
+
f"End release"
|
830
|
+
)
|
667
831
|
|
668
|
-
|
669
|
-
|
670
|
-
|
832
|
+
del pipeline
|
833
|
+
|
834
|
+
rs.set_parent_run_id(self.run_id)
|
835
|
+
rs_log: Log = log.model_validate(
|
836
|
+
{
|
837
|
+
"name": self.name,
|
838
|
+
"on": str(on.cronjob),
|
839
|
+
"release": next_running_time,
|
840
|
+
"context": rs.context,
|
841
|
+
"parent_run_id": rs.run_id,
|
842
|
+
"run_id": rs.run_id,
|
843
|
+
}
|
844
|
+
)
|
845
|
+
rs_log.save()
|
846
|
+
else:
|
847
|
+
logging.debug(
|
848
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
849
|
+
f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
|
850
|
+
)
|
851
|
+
rs = Result(status=0, context={"params": params})
|
852
|
+
|
853
|
+
if lq is None:
|
854
|
+
return rs
|
855
|
+
|
856
|
+
lq.remove(next_running_time)
|
857
|
+
time.sleep(0.25)
|
858
|
+
return rs
|
859
|
+
|
860
|
+
def poke(
|
861
|
+
self,
|
862
|
+
params: DictData | None = None,
|
863
|
+
*,
|
864
|
+
log: Log | None = None,
|
865
|
+
) -> list[Result]:
|
866
|
+
"""Poke pipeline with threading executor pool for executing with all its
|
867
|
+
schedules that was set on the `on` value. This method will observe its
|
868
|
+
schedule that nearing to run with the ``self.release()`` method.
|
869
|
+
|
870
|
+
:param params: A parameters that want to pass to the release method.
|
871
|
+
:param log: A log object that want to use on this poking process.
|
872
|
+
:rtype: list[Result]
|
671
873
|
"""
|
672
874
|
params: DictData = params or {}
|
673
|
-
logging.info(
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
875
|
+
logging.info(f"({self.run_id}) [CORE]: Start Poking: {self.name!r} ...")
|
876
|
+
results: list[Result] = []
|
877
|
+
log_queue: list[datetime] = []
|
878
|
+
|
879
|
+
# NOTE: If this pipeline does not set schedule, it will return empty
|
880
|
+
# result.
|
881
|
+
if len(self.on) == 0:
|
882
|
+
return results
|
883
|
+
|
678
884
|
with ThreadPoolExecutor(
|
679
885
|
max_workers=int(
|
680
886
|
os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
|
@@ -685,13 +891,22 @@ class Pipeline(BaseModel):
|
|
685
891
|
self.release,
|
686
892
|
on,
|
687
893
|
params=params,
|
894
|
+
log=log,
|
895
|
+
lq=log_queue,
|
688
896
|
)
|
689
897
|
for on in self.on
|
690
898
|
]
|
691
899
|
for future in as_completed(futures):
|
692
|
-
rs = future.result()
|
693
|
-
logging.info(rs)
|
900
|
+
rs: Result = future.result()
|
901
|
+
logging.info(rs.context.get("params", {}))
|
694
902
|
results.append(rs)
|
903
|
+
|
904
|
+
if len(log_queue) > 0:
|
905
|
+
logging.error(
|
906
|
+
f"({self.run_id}) [CORE]: Log Queue does empty when poke "
|
907
|
+
f"is finishing."
|
908
|
+
)
|
909
|
+
|
695
910
|
return results
|
696
911
|
|
697
912
|
def job_execute(
|
@@ -700,6 +915,7 @@ class Pipeline(BaseModel):
|
|
700
915
|
params: DictData,
|
701
916
|
) -> Result:
|
702
917
|
"""Job Executor that use on pipeline executor.
|
918
|
+
|
703
919
|
:param job: A job ID that want to execute.
|
704
920
|
:param params: A params that was parameterized from pipeline execution.
|
705
921
|
"""
|
@@ -708,14 +924,17 @@ class Pipeline(BaseModel):
|
|
708
924
|
raise PipelineException(
|
709
925
|
f"The job ID: {job} does not exists on {self.name!r} pipeline."
|
710
926
|
)
|
711
|
-
|
712
927
|
try:
|
713
928
|
logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
|
714
|
-
|
929
|
+
|
930
|
+
# IMPORTANT:
|
931
|
+
# Change any job running IDs to this pipeline running ID.
|
932
|
+
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
715
933
|
j_rs: Result = job_obj.execute(params=params)
|
934
|
+
|
716
935
|
except JobException as err:
|
717
936
|
raise PipelineException(
|
718
|
-
f"The job ID: {job} get
|
937
|
+
f"The job ID: {job} get error: {err.__class__.__name__}:"
|
719
938
|
f"\n{err}"
|
720
939
|
) from None
|
721
940
|
return Result(
|
@@ -738,9 +957,8 @@ class Pipeline(BaseModel):
|
|
738
957
|
for limit time of execution and waiting job dependency.
|
739
958
|
:rtype: Result
|
740
959
|
|
741
|
-
---
|
742
|
-
|
743
960
|
See Also:
|
961
|
+
---
|
744
962
|
|
745
963
|
The result of execution process for each jobs and stages on this
|
746
964
|
pipeline will keeping in dict which able to catch out with all jobs and
|
@@ -752,10 +970,7 @@ class Pipeline(BaseModel):
|
|
752
970
|
... ${job-name}.stages.${stage-id}.outputs.${key}
|
753
971
|
|
754
972
|
"""
|
755
|
-
logging.info(
|
756
|
-
f"[CORE]: Start Execute: {self.name}:"
|
757
|
-
f"{gen_id(self.name, unique=True)}"
|
758
|
-
)
|
973
|
+
logging.info(f"({self.run_id}) [CORE]: Start Execute: {self.name} ...")
|
759
974
|
params: DictData = params or {}
|
760
975
|
|
761
976
|
# NOTE: It should not do anything if it does not have job.
|
@@ -777,15 +992,15 @@ class Pipeline(BaseModel):
|
|
777
992
|
rs: Result = Result(context=self.parameterize(params))
|
778
993
|
try:
|
779
994
|
rs.receive(
|
780
|
-
self.__exec_non_threading(rs,
|
995
|
+
self.__exec_non_threading(rs, ts, timeout=timeout)
|
781
996
|
if (
|
782
997
|
worker := int(
|
783
|
-
os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "
|
998
|
+
os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2")
|
784
999
|
)
|
785
1000
|
)
|
786
1001
|
== 1
|
787
1002
|
else self.__exec_threading(
|
788
|
-
rs,
|
1003
|
+
rs, ts, worker=worker, timeout=timeout
|
789
1004
|
)
|
790
1005
|
)
|
791
1006
|
return rs
|
@@ -797,14 +1012,30 @@ class Pipeline(BaseModel):
|
|
797
1012
|
def __exec_threading(
|
798
1013
|
self,
|
799
1014
|
rs: Result,
|
800
|
-
job_queue: Queue,
|
801
1015
|
ts: float,
|
802
1016
|
*,
|
803
|
-
worker: int =
|
1017
|
+
worker: int = 2,
|
804
1018
|
timeout: int = 600,
|
805
1019
|
) -> Result:
|
806
|
-
"""Pipeline threading execution.
|
1020
|
+
"""Pipeline threading execution.
|
1021
|
+
|
1022
|
+
:param rs:
|
1023
|
+
:param ts:
|
1024
|
+
:param timeout: A second value unit that bounding running time.
|
1025
|
+
:param worker: A number of threading executor pool size.
|
1026
|
+
:rtype: Result
|
1027
|
+
"""
|
807
1028
|
not_time_out_flag: bool = True
|
1029
|
+
logging.debug(
|
1030
|
+
f"({self.run_id}): [CORE]: Run {self.name} with threading job "
|
1031
|
+
f"executor"
|
1032
|
+
)
|
1033
|
+
|
1034
|
+
# NOTE: Create a job queue that keep the job that want to running after
|
1035
|
+
# it dependency condition.
|
1036
|
+
job_queue: Queue = Queue()
|
1037
|
+
for job_id in self.jobs:
|
1038
|
+
job_queue.put(job_id)
|
808
1039
|
|
809
1040
|
# IMPORTANT: The job execution can run parallel and waiting by
|
810
1041
|
# needed.
|
@@ -816,10 +1047,6 @@ class Pipeline(BaseModel):
|
|
816
1047
|
job_id: str = job_queue.get()
|
817
1048
|
job: Job = self.jobs[job_id]
|
818
1049
|
|
819
|
-
# IMPORTANT:
|
820
|
-
# Change any job running IDs to this pipeline running ID.
|
821
|
-
job.run_id = self.run_id
|
822
|
-
|
823
1050
|
if any(need not in rs.context["jobs"] for need in job.needs):
|
824
1051
|
job_queue.put(job_id)
|
825
1052
|
time.sleep(0.5)
|
@@ -832,6 +1059,10 @@ class Pipeline(BaseModel):
|
|
832
1059
|
params=copy.deepcopy(rs.context),
|
833
1060
|
),
|
834
1061
|
)
|
1062
|
+
job_queue.task_done()
|
1063
|
+
|
1064
|
+
# NOTE: Wait for all items to finish processing
|
1065
|
+
job_queue.join()
|
835
1066
|
|
836
1067
|
for future in as_completed(futures):
|
837
1068
|
if err := future.exception():
|
@@ -841,37 +1072,49 @@ class Pipeline(BaseModel):
|
|
841
1072
|
# NOTE: Update job result to pipeline result.
|
842
1073
|
rs.receive_jobs(future.result(timeout=20))
|
843
1074
|
|
844
|
-
if
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
)
|
851
|
-
|
852
|
-
|
1075
|
+
if not_time_out_flag:
|
1076
|
+
rs.status = 0
|
1077
|
+
return rs
|
1078
|
+
|
1079
|
+
# NOTE: Raise timeout error.
|
1080
|
+
logging.warning(
|
1081
|
+
f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
|
1082
|
+
)
|
1083
|
+
raise PipelineException(
|
1084
|
+
f"Execution of pipeline: {self.name} was timeout"
|
1085
|
+
)
|
853
1086
|
|
854
1087
|
def __exec_non_threading(
|
855
1088
|
self,
|
856
1089
|
rs: Result,
|
857
|
-
job_queue: Queue,
|
858
1090
|
ts: float,
|
859
1091
|
*,
|
860
1092
|
timeout: int = 600,
|
861
1093
|
) -> Result:
|
862
|
-
"""Pipeline non-threading execution.
|
1094
|
+
"""Pipeline non-threading execution.
|
1095
|
+
|
1096
|
+
:param rs:
|
1097
|
+
:param ts:
|
1098
|
+
:param timeout: A second value unit that bounding running time.
|
1099
|
+
:rtype: Result
|
1100
|
+
"""
|
863
1101
|
not_time_out_flag: bool = True
|
864
|
-
logging.
|
1102
|
+
logging.debug(
|
1103
|
+
f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
|
1104
|
+
f"executor"
|
1105
|
+
)
|
1106
|
+
# NOTE: Create a job queue that keep the job that want to running after
|
1107
|
+
# it dependency condition.
|
1108
|
+
job_queue: Queue = Queue()
|
1109
|
+
for job_id in self.jobs:
|
1110
|
+
job_queue.put(job_id)
|
1111
|
+
|
865
1112
|
while not job_queue.empty() and (
|
866
1113
|
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
867
1114
|
):
|
868
1115
|
job_id: str = job_queue.get()
|
869
1116
|
job: Job = self.jobs[job_id]
|
870
1117
|
|
871
|
-
# IMPORTANT:
|
872
|
-
# Change any job running IDs to this pipeline running ID.
|
873
|
-
job.run_id = self.run_id
|
874
|
-
|
875
1118
|
# NOTE:
|
876
1119
|
if any(need not in rs.context["jobs"] for need in job.needs):
|
877
1120
|
job_queue.put(job_id)
|
@@ -881,13 +1124,19 @@ class Pipeline(BaseModel):
|
|
881
1124
|
# NOTE: Start job execution.
|
882
1125
|
job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
|
883
1126
|
rs.context["jobs"].update(job_rs.context)
|
1127
|
+
job_queue.task_done()
|
884
1128
|
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
1129
|
+
# NOTE: Wait for all items to finish processing
|
1130
|
+
job_queue.join()
|
1131
|
+
|
1132
|
+
if not_time_out_flag:
|
1133
|
+
rs.status = 0
|
1134
|
+
return rs
|
1135
|
+
|
1136
|
+
# NOTE: Raise timeout error.
|
1137
|
+
logging.warning(
|
1138
|
+
f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
|
1139
|
+
)
|
1140
|
+
raise PipelineException(
|
1141
|
+
f"Execution of pipeline: {self.name} was timeout"
|
1142
|
+
)
|