ddeutil-workflow 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__types.py +1 -0
- ddeutil/workflow/conf.py +7 -0
- ddeutil/workflow/job.py +82 -78
- ddeutil/workflow/on.py +3 -0
- ddeutil/workflow/stage.py +19 -11
- ddeutil/workflow/utils.py +18 -2
- ddeutil/workflow/workflow.py +296 -172
- {ddeutil_workflow-0.0.20.dist-info → ddeutil_workflow-0.0.22.dist-info}/METADATA +3 -1
- ddeutil_workflow-0.0.22.dist-info/RECORD +22 -0
- ddeutil_workflow-0.0.20.dist-info/RECORD +0 -22
- {ddeutil_workflow-0.0.20.dist-info → ddeutil_workflow-0.0.22.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.20.dist-info → ddeutil_workflow-0.0.22.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.20.dist-info → ddeutil_workflow-0.0.22.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.20.dist-info → ddeutil_workflow-0.0.22.dist-info}/top_level.txt +0 -0
ddeutil/workflow/workflow.py
CHANGED
@@ -3,8 +3,7 @@
|
|
3
3
|
# Licensed under the MIT License. See LICENSE in the project root for
|
4
4
|
# license information.
|
5
5
|
# ------------------------------------------------------------------------------
|
6
|
-
"""
|
7
|
-
The main schedule running is ``workflow_runner`` function that trigger the
|
6
|
+
"""The main schedule running is ``workflow_runner`` function that trigger the
|
8
7
|
multiprocess of ``workflow_control`` function for listing schedules on the
|
9
8
|
config by ``Loader.finds(Schedule)``.
|
10
9
|
|
@@ -12,6 +11,7 @@ config by ``Loader.finds(Schedule)``.
|
|
12
11
|
functions; ``workflow_task``, and ``workflow_monitor``.
|
13
12
|
|
14
13
|
``workflow_control`` --- Every minute at :02 --> ``workflow_task``
|
14
|
+
|
15
15
|
--- Every 5 minutes --> ``workflow_monitor``
|
16
16
|
|
17
17
|
The ``workflow_task`` will run ``task.release`` method in threading object
|
@@ -49,6 +49,7 @@ from .on import On
|
|
49
49
|
from .utils import (
|
50
50
|
Param,
|
51
51
|
Result,
|
52
|
+
cut_id,
|
52
53
|
delay,
|
53
54
|
gen_id,
|
54
55
|
get_diff_sec,
|
@@ -70,7 +71,7 @@ __all__: TupleStr = (
|
|
70
71
|
@total_ordering
|
71
72
|
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
72
73
|
class WorkflowRelease:
|
73
|
-
"""Workflow release
|
74
|
+
"""Workflow release Pydantic dataclass object."""
|
74
75
|
|
75
76
|
date: datetime
|
76
77
|
offset: float
|
@@ -86,6 +87,12 @@ class WorkflowRelease:
|
|
86
87
|
|
87
88
|
@classmethod
|
88
89
|
def from_dt(cls, dt: datetime) -> Self:
|
90
|
+
"""Construct WorkflowRelease via datetime object only.
|
91
|
+
|
92
|
+
:param dt: A datetime object.
|
93
|
+
|
94
|
+
:rtype: Self
|
95
|
+
"""
|
89
96
|
return cls(
|
90
97
|
date=dt,
|
91
98
|
offset=0,
|
@@ -95,6 +102,9 @@ class WorkflowRelease:
|
|
95
102
|
)
|
96
103
|
|
97
104
|
def __eq__(self, other: WorkflowRelease | datetime) -> bool:
|
105
|
+
"""Override equal property that will compare only the same type or
|
106
|
+
datetime.
|
107
|
+
"""
|
98
108
|
if isinstance(other, self.__class__):
|
99
109
|
return self.date == other.date
|
100
110
|
elif isinstance(other, datetime):
|
@@ -102,6 +112,9 @@ class WorkflowRelease:
|
|
102
112
|
return NotImplemented
|
103
113
|
|
104
114
|
def __lt__(self, other: WorkflowRelease | datetime) -> bool:
|
115
|
+
"""Override equal property that will compare only the same type or
|
116
|
+
datetime.
|
117
|
+
"""
|
105
118
|
if isinstance(other, self.__class__):
|
106
119
|
return self.date < other.date
|
107
120
|
elif isinstance(other, datetime):
|
@@ -111,47 +124,84 @@ class WorkflowRelease:
|
|
111
124
|
|
112
125
|
@dataclass
|
113
126
|
class WorkflowQueue:
|
114
|
-
"""Workflow Queue object."""
|
127
|
+
"""Workflow Queue object that is management of WorkflowRelease objects."""
|
115
128
|
|
116
129
|
queue: list[WorkflowRelease] = field(default_factory=list)
|
117
130
|
running: list[WorkflowRelease] = field(default_factory=list)
|
118
131
|
complete: list[WorkflowRelease] = field(default_factory=list)
|
119
132
|
|
133
|
+
@classmethod
|
134
|
+
def from_list(
|
135
|
+
cls, queue: list[datetime] | list[WorkflowRelease] | None
|
136
|
+
) -> Self:
|
137
|
+
"""Construct WorkflowQueue object from an input queue value that passing
|
138
|
+
with list of datetime or list of WorkflowRelease.
|
139
|
+
|
140
|
+
:raise TypeError: If the type of an input queue does not valid.
|
141
|
+
|
142
|
+
:rtype: Self
|
143
|
+
"""
|
144
|
+
if queue is None:
|
145
|
+
return cls()
|
146
|
+
elif isinstance(queue, list):
|
147
|
+
|
148
|
+
if all(isinstance(q, datetime) for q in queue):
|
149
|
+
return cls(queue=[WorkflowRelease.from_dt(q) for q in queue])
|
150
|
+
|
151
|
+
elif all(isinstance(q, WorkflowRelease) for q in queue):
|
152
|
+
return cls(queue=queue)
|
153
|
+
|
154
|
+
raise TypeError(
|
155
|
+
"Type of the queue does not valid with WorkflowQueue "
|
156
|
+
"or list of datetime or list of WorkflowRelease."
|
157
|
+
)
|
158
|
+
|
120
159
|
@property
|
121
160
|
def is_queued(self) -> bool:
|
122
|
-
"""Return True if it has
|
161
|
+
"""Return True if it has workflow release object in the queue.
|
162
|
+
|
163
|
+
:rtype: bool
|
164
|
+
"""
|
123
165
|
return len(self.queue) > 0
|
124
166
|
|
125
|
-
def check_queue(self,
|
167
|
+
def check_queue(self, value: WorkflowRelease) -> bool:
|
126
168
|
"""Check a WorkflowRelease value already exists in list of tracking
|
127
169
|
queues.
|
128
170
|
|
129
|
-
:param
|
171
|
+
:param value: A WorkflowRelease object that want to check it already in
|
172
|
+
queues.
|
173
|
+
|
174
|
+
:rtype: bool
|
130
175
|
"""
|
131
176
|
return (
|
132
|
-
(
|
133
|
-
or (
|
134
|
-
or (
|
177
|
+
(value in self.queue)
|
178
|
+
or (value in self.running)
|
179
|
+
or (value in self.complete)
|
135
180
|
)
|
136
181
|
|
137
|
-
def push_queue(self,
|
138
|
-
|
182
|
+
def push_queue(self, value: WorkflowRelease) -> Self:
|
183
|
+
"""Push data to the queue."""
|
184
|
+
heappush(self.queue, value)
|
139
185
|
return self
|
140
186
|
|
141
|
-
def push_running(self,
|
142
|
-
|
187
|
+
def push_running(self, value: WorkflowRelease) -> Self:
|
188
|
+
"""Push data to the running."""
|
189
|
+
heappush(self.running, value)
|
143
190
|
return self
|
144
191
|
|
145
|
-
def remove_running(self,
|
146
|
-
|
147
|
-
|
192
|
+
def remove_running(self, value: WorkflowRelease) -> Self:
|
193
|
+
"""Remove data on the running if it exists."""
|
194
|
+
if value in self.running:
|
195
|
+
self.running.remove(value)
|
148
196
|
|
149
197
|
|
150
198
|
class Workflow(BaseModel):
|
151
|
-
"""Workflow Pydantic
|
152
|
-
|
153
|
-
|
154
|
-
|
199
|
+
"""Workflow Pydantic model.
|
200
|
+
|
201
|
+
This is the main future of this project because it use to be workflow
|
202
|
+
data for running everywhere that you want or using it to scheduler task in
|
203
|
+
background. It use lightweight coding line from Pydantic Model and enhance
|
204
|
+
execute method on it.
|
155
205
|
"""
|
156
206
|
|
157
207
|
name: str = Field(description="A workflow name.")
|
@@ -184,9 +234,12 @@ class Workflow(BaseModel):
|
|
184
234
|
an input workflow name. The loader object will use this workflow name to
|
185
235
|
searching configuration data of this workflow model in conf path.
|
186
236
|
|
237
|
+
:raise ValueError: If the type does not match with current object.
|
238
|
+
|
187
239
|
:param name: A workflow name that want to pass to Loader object.
|
188
240
|
:param externals: An external parameters that want to pass to Loader
|
189
241
|
object.
|
242
|
+
|
190
243
|
:rtype: Self
|
191
244
|
"""
|
192
245
|
loader: Loader = Loader(name, externals=(externals or {}))
|
@@ -235,7 +288,7 @@ class Workflow(BaseModel):
|
|
235
288
|
|
236
289
|
@model_validator(mode="before")
|
237
290
|
def __prepare_model_before__(cls, values: DictData) -> DictData:
|
238
|
-
"""Prepare the params key."""
|
291
|
+
"""Prepare the params key in the data model before validating."""
|
239
292
|
# NOTE: Prepare params type if it passing with only type value.
|
240
293
|
if params := values.pop("params", {}):
|
241
294
|
values["params"] = {
|
@@ -258,9 +311,17 @@ class Workflow(BaseModel):
|
|
258
311
|
return dedent(value)
|
259
312
|
|
260
313
|
@field_validator("on", mode="after")
|
261
|
-
def
|
314
|
+
def __on_no_dup_and_reach_limit__(cls, value: list[On]) -> list[On]:
|
262
315
|
"""Validate the on fields should not contain duplicate values and if it
|
263
|
-
contain every minute value
|
316
|
+
contain the every minute value more than one value, it will remove to
|
317
|
+
only one value.
|
318
|
+
|
319
|
+
:raise ValueError: If it has some duplicate value.
|
320
|
+
|
321
|
+
:param value: A list of on object.
|
322
|
+
|
323
|
+
:rtype: list[On]
|
324
|
+
"""
|
264
325
|
set_ons: set[str] = {str(on.cronjob) for on in value}
|
265
326
|
if len(set_ons) != len(value):
|
266
327
|
raise ValueError(
|
@@ -273,12 +334,21 @@ class Workflow(BaseModel):
|
|
273
334
|
# "If it has every minute cronjob on value, it should has only "
|
274
335
|
# "one value in the on field."
|
275
336
|
# )
|
337
|
+
|
338
|
+
if len(set_ons) > config.max_on_per_workflow:
|
339
|
+
raise ValueError(
|
340
|
+
f"The number of the on should not more than "
|
341
|
+
f"{config.max_on_per_workflow} crontab."
|
342
|
+
)
|
276
343
|
return value
|
277
344
|
|
278
345
|
@model_validator(mode="after")
|
279
346
|
def __validate_jobs_need__(self) -> Self:
|
280
347
|
"""Validate each need job in any jobs should exists.
|
281
348
|
|
349
|
+
:raise WorkflowException: If it has not exists need value in this
|
350
|
+
workflow job.
|
351
|
+
|
282
352
|
:rtype: Self
|
283
353
|
"""
|
284
354
|
for job in self.jobs:
|
@@ -368,22 +438,21 @@ class Workflow(BaseModel):
|
|
368
438
|
self,
|
369
439
|
release: datetime | WorkflowRelease,
|
370
440
|
params: DictData,
|
371
|
-
run_id: str | None = None,
|
372
441
|
*,
|
442
|
+
run_id: str | None = None,
|
373
443
|
log: type[Log] = None,
|
374
|
-
queue:
|
444
|
+
queue: (
|
445
|
+
WorkflowQueue | list[datetime] | list[WorkflowRelease] | None
|
446
|
+
) = None,
|
375
447
|
) -> Result:
|
376
448
|
"""Release the workflow execution with overriding parameter with the
|
377
449
|
release templating that include logical date (release date), execution
|
378
450
|
date, or running id to the params.
|
379
451
|
|
380
452
|
This method allow workflow use log object to save the execution
|
381
|
-
result to log destination like file log to local `/logs` directory.
|
453
|
+
result to log destination like file log to the local `/logs` directory.
|
382
454
|
|
383
|
-
|
384
|
-
the queue object.
|
385
|
-
|
386
|
-
:param release: A release datetime.
|
455
|
+
:param release: A release datetime or WorkflowRelease object.
|
387
456
|
:param params: A workflow parameter that pass to execute method.
|
388
457
|
:param queue: A list of release time that already queue.
|
389
458
|
:param run_id: A workflow running ID for this release.
|
@@ -394,23 +463,23 @@ class Workflow(BaseModel):
|
|
394
463
|
"""
|
395
464
|
log: type[Log] = log or FileLog
|
396
465
|
run_id: str = run_id or gen_id(self.name, unique=True)
|
466
|
+
rs_release: Result = Result(run_id=run_id)
|
397
467
|
|
398
468
|
# VALIDATE: Change queue value to WorkflowQueue object.
|
399
|
-
if queue is None:
|
400
|
-
queue: WorkflowQueue = WorkflowQueue()
|
401
|
-
elif isinstance(queue, list):
|
402
|
-
queue: WorkflowQueue = WorkflowQueue(queue=queue)
|
469
|
+
if queue is None or isinstance(queue, list):
|
470
|
+
queue: WorkflowQueue = WorkflowQueue.from_list(queue)
|
403
471
|
|
404
472
|
# VALIDATE: Change release value to WorkflowRelease object.
|
405
473
|
if isinstance(release, datetime):
|
406
474
|
release: WorkflowRelease = WorkflowRelease.from_dt(release)
|
407
475
|
|
408
476
|
logger.debug(
|
409
|
-
f"({run_id}) [RELEASE]: {self.name!r} : "
|
410
|
-
f"
|
477
|
+
f"({cut_id(run_id)}) [RELEASE]: {self.name!r} : Start release - "
|
478
|
+
f"{release.date:%Y-%m-%d %H:%M:%S}"
|
411
479
|
)
|
412
480
|
|
413
|
-
# NOTE: Release
|
481
|
+
# NOTE: Release parameters that use to templating on the schedule
|
482
|
+
# config data.
|
414
483
|
release_params: DictData = {
|
415
484
|
"release": {
|
416
485
|
"logical_date": release.date,
|
@@ -420,14 +489,14 @@ class Workflow(BaseModel):
|
|
420
489
|
}
|
421
490
|
}
|
422
491
|
|
423
|
-
#
|
492
|
+
# NOTE: Execute workflow with templating params from release mapping.
|
424
493
|
rs: Result = self.execute(
|
425
494
|
params=param2template(params, release_params),
|
426
495
|
run_id=run_id,
|
427
496
|
)
|
428
497
|
logger.debug(
|
429
|
-
f"({run_id}) [RELEASE]: {self.name!r} : "
|
430
|
-
f"
|
498
|
+
f"({cut_id(run_id)}) [RELEASE]: {self.name!r} : End release - "
|
499
|
+
f"{release.date:%Y-%m-%d %H:%M:%S}"
|
431
500
|
)
|
432
501
|
|
433
502
|
rs.set_parent_run_id(run_id)
|
@@ -449,16 +518,13 @@ class Workflow(BaseModel):
|
|
449
518
|
queue.remove_running(release)
|
450
519
|
heappush(queue.complete, release)
|
451
520
|
|
452
|
-
return
|
521
|
+
return rs_release.catch(
|
453
522
|
status=0,
|
454
523
|
context={
|
455
524
|
"params": params,
|
456
|
-
"release": {
|
457
|
-
|
458
|
-
"logical_date": release.date,
|
459
|
-
},
|
525
|
+
"release": {"status": "success", "logical_date": release.date},
|
526
|
+
"outputs": rs.context,
|
460
527
|
},
|
461
|
-
run_id=run_id,
|
462
528
|
)
|
463
529
|
|
464
530
|
def queue_poking(
|
@@ -467,14 +533,20 @@ class Workflow(BaseModel):
|
|
467
533
|
end_date: datetime,
|
468
534
|
queue: WorkflowQueue,
|
469
535
|
log: type[Log],
|
536
|
+
*,
|
537
|
+
force_run: bool = False,
|
470
538
|
) -> WorkflowQueue:
|
471
539
|
"""Generate queue of datetime from the cron runner that initialize from
|
472
540
|
the on field. with offset value.
|
473
541
|
|
474
|
-
:param offset:
|
475
|
-
:param end_date:
|
476
|
-
:param queue:
|
477
|
-
:param log:
|
542
|
+
:param offset: A offset in second unit for time travel.
|
543
|
+
:param end_date: An end datetime object.
|
544
|
+
:param queue: A workflow queue object.
|
545
|
+
:param log: A log class that want to making log object.
|
546
|
+
:param force_run: A flag that allow to release workflow if the log with
|
547
|
+
that release was pointed.
|
548
|
+
|
549
|
+
:rtype: WorkflowQueue
|
478
550
|
"""
|
479
551
|
for on in self.on:
|
480
552
|
|
@@ -482,6 +554,7 @@ class Workflow(BaseModel):
|
|
482
554
|
get_dt_now(tz=config.tz, offset=offset).replace(microsecond=0)
|
483
555
|
)
|
484
556
|
|
557
|
+
# NOTE: Skip this runner date if it more than the end date.
|
485
558
|
if runner.date > end_date:
|
486
559
|
continue
|
487
560
|
|
@@ -493,8 +566,9 @@ class Workflow(BaseModel):
|
|
493
566
|
type="poking",
|
494
567
|
)
|
495
568
|
|
496
|
-
while queue.check_queue(
|
569
|
+
while queue.check_queue(workflow_release) or (
|
497
570
|
log.is_pointed(name=self.name, release=workflow_release.date)
|
571
|
+
and not force_run
|
498
572
|
):
|
499
573
|
workflow_release = WorkflowRelease(
|
500
574
|
date=runner.next,
|
@@ -507,36 +581,50 @@ class Workflow(BaseModel):
|
|
507
581
|
if runner.date > end_date:
|
508
582
|
continue
|
509
583
|
|
584
|
+
# NOTE: Push the WorkflowRelease object to queue.
|
510
585
|
queue.push_queue(workflow_release)
|
586
|
+
|
511
587
|
return queue
|
512
588
|
|
513
589
|
def poke(
|
514
590
|
self,
|
515
591
|
start_date: datetime | None = None,
|
516
592
|
params: DictData | None = None,
|
593
|
+
*,
|
517
594
|
run_id: str | None = None,
|
518
595
|
periods: int = 1,
|
519
|
-
*,
|
520
596
|
log: Log | None = None,
|
597
|
+
force_run: bool = False,
|
598
|
+
timeout: int = 1800,
|
521
599
|
) -> list[Result]:
|
522
|
-
"""Poke workflow with
|
523
|
-
|
524
|
-
|
600
|
+
"""Poke this workflow with start datetime value that passing to its
|
601
|
+
``on`` field with threading executor pool for executing with all its
|
602
|
+
schedules that was set on the `on` value.
|
603
|
+
|
604
|
+
This method will observe its schedule that nearing to run with the
|
525
605
|
``self.release()`` method.
|
526
606
|
|
527
607
|
:param start_date: A start datetime object.
|
528
608
|
:param params: A parameters that want to pass to the release method.
|
529
609
|
:param run_id: A workflow running ID for this poke.
|
530
|
-
:param periods: A periods
|
610
|
+
:param periods: A periods in minutes value that use to run this poking.
|
531
611
|
:param log: A log object that want to use on this poking process.
|
612
|
+
:param force_run: A flag that allow to release workflow if the log with
|
613
|
+
that release was pointed.
|
614
|
+
:param timeout: A second value for timeout while waiting all futures
|
615
|
+
run completely.
|
532
616
|
|
533
617
|
:rtype: list[Result]
|
618
|
+
:return: A list of all results that return from ``self.release`` method.
|
534
619
|
"""
|
620
|
+
log: type[Log] = log or FileLog
|
621
|
+
run_id: str = run_id or gen_id(self.name, unique=True)
|
622
|
+
|
535
623
|
# NOTE: If this workflow does not set the on schedule, it will return
|
536
624
|
# empty result.
|
537
625
|
if len(self.on) == 0:
|
538
626
|
logger.info(
|
539
|
-
f"({run_id}) [POKING]: {self.name!r} does not have any "
|
627
|
+
f"({cut_id(run_id)}) [POKING]: {self.name!r} does not have any "
|
540
628
|
f"schedule to run."
|
541
629
|
)
|
542
630
|
return []
|
@@ -556,82 +644,93 @@ class Workflow(BaseModel):
|
|
556
644
|
start_date: datetime = current_date
|
557
645
|
offset: float = 0
|
558
646
|
|
647
|
+
# NOTE: End date is use to stop generate queue with an input periods
|
648
|
+
# value.
|
559
649
|
end_date: datetime = start_date + timedelta(minutes=periods)
|
560
650
|
|
561
|
-
log: type[Log] = log or FileLog
|
562
|
-
run_id: str = run_id or gen_id(self.name, unique=True)
|
563
651
|
logger.info(
|
564
|
-
f"({run_id}) [POKING]: Start Poking: {self.name!r} from "
|
652
|
+
f"({cut_id(run_id)}) [POKING]: Start Poking: {self.name!r} from "
|
565
653
|
f"{start_date:%Y-%m-%d %H:%M:%S} to {end_date:%Y-%m-%d %H:%M:%S}"
|
566
654
|
)
|
567
655
|
|
568
|
-
params: DictData = params
|
569
|
-
|
656
|
+
params: DictData = {} if params is None else params
|
657
|
+
wf_queue: WorkflowQueue = WorkflowQueue()
|
570
658
|
results: list[Result] = []
|
571
659
|
futures: list[Future] = []
|
572
660
|
|
661
|
+
# NOTE: Make queue to the workflow queue object.
|
573
662
|
self.queue_poking(
|
574
|
-
offset,
|
663
|
+
offset,
|
664
|
+
end_date=end_date,
|
665
|
+
queue=wf_queue,
|
666
|
+
log=log,
|
667
|
+
force_run=force_run,
|
575
668
|
)
|
576
|
-
|
577
|
-
if len(workflow_queue.queue) == 0:
|
669
|
+
if not wf_queue.is_queued:
|
578
670
|
logger.info(
|
579
|
-
f"({run_id}) [POKING]: {self.name!r} does not have
|
580
|
-
f"queue
|
671
|
+
f"({cut_id(run_id)}) [POKING]: {self.name!r} does not have "
|
672
|
+
f"any queue."
|
581
673
|
)
|
582
674
|
return []
|
583
675
|
|
676
|
+
# NOTE: Start create the thread pool executor for running this poke
|
677
|
+
# process.
|
584
678
|
with ThreadPoolExecutor(
|
585
679
|
max_workers=config.max_poking_pool_worker,
|
586
|
-
thread_name_prefix="
|
680
|
+
thread_name_prefix="wf_poking_",
|
587
681
|
) as executor:
|
588
682
|
|
589
|
-
while
|
683
|
+
while wf_queue.is_queued:
|
684
|
+
|
685
|
+
# NOTE: Pop the latest WorkflowRelease object from queue.
|
686
|
+
release: WorkflowRelease = heappop(wf_queue.queue)
|
590
687
|
|
591
|
-
wf_release: WorkflowRelease = heappop(workflow_queue.queue)
|
592
688
|
if (
|
593
|
-
|
689
|
+
release.date - get_dt_now(tz=config.tz, offset=offset)
|
594
690
|
).total_seconds() > 60:
|
595
691
|
logger.debug(
|
596
|
-
f"({run_id}) [POKING]:
|
597
|
-
f"release has diff time more than 60 seconds "
|
692
|
+
f"({cut_id(run_id)}) [POKING]: Wait because the latest "
|
693
|
+
f"release has diff time more than 60 seconds ..."
|
598
694
|
)
|
599
|
-
heappush(
|
695
|
+
heappush(wf_queue.queue, release)
|
600
696
|
delay(60)
|
697
|
+
|
698
|
+
# WARNING: I already call queue poking again because issue
|
699
|
+
# about the every minute crontab.
|
601
700
|
self.queue_poking(
|
602
|
-
offset,
|
701
|
+
offset,
|
702
|
+
end_date,
|
703
|
+
queue=wf_queue,
|
704
|
+
log=log,
|
705
|
+
force_run=force_run,
|
603
706
|
)
|
604
707
|
continue
|
605
708
|
|
606
|
-
# NOTE: Push the
|
607
|
-
|
709
|
+
# NOTE: Push the latest WorkflowRelease to the running queue.
|
710
|
+
wf_queue.push_running(release)
|
608
711
|
|
609
712
|
futures.append(
|
610
713
|
executor.submit(
|
611
714
|
self.release,
|
612
|
-
release=
|
715
|
+
release=release,
|
613
716
|
params=params,
|
614
717
|
log=log,
|
615
|
-
queue=
|
718
|
+
queue=wf_queue,
|
616
719
|
)
|
617
720
|
)
|
618
721
|
|
619
722
|
self.queue_poking(
|
620
|
-
offset,
|
723
|
+
offset,
|
724
|
+
end_date,
|
725
|
+
queue=wf_queue,
|
726
|
+
log=log,
|
727
|
+
force_run=force_run,
|
621
728
|
)
|
622
729
|
|
623
730
|
# WARNING: This poking method does not allow to use fail-fast
|
624
731
|
# logic to catching parallel execution result.
|
625
|
-
for future in as_completed(futures):
|
626
|
-
|
627
|
-
results.append(rs.set_parent_run_id(run_id))
|
628
|
-
|
629
|
-
while len(workflow_queue.running) > 0: # pragma: no cov
|
630
|
-
logger.warning(
|
631
|
-
f"({run_id}) [POKING]: Running does empty when poking "
|
632
|
-
f"process was finishing."
|
633
|
-
)
|
634
|
-
delay(10)
|
732
|
+
for future in as_completed(futures, timeout=timeout):
|
733
|
+
results.append(future.result().set_parent_run_id(run_id))
|
635
734
|
|
636
735
|
return results
|
637
736
|
|
@@ -639,17 +738,21 @@ class Workflow(BaseModel):
|
|
639
738
|
self,
|
640
739
|
job_id: str,
|
641
740
|
params: DictData,
|
642
|
-
run_id: str | None = None,
|
643
741
|
*,
|
742
|
+
run_id: str | None = None,
|
644
743
|
raise_error: bool = True,
|
645
744
|
) -> Result:
|
646
|
-
"""
|
647
|
-
|
745
|
+
"""Job execution with passing dynamic parameters from the main workflow
|
746
|
+
execution to the target job object via job's ID.
|
648
747
|
|
649
748
|
This execution is the minimum level of execution of this workflow
|
650
749
|
model. It different with ``self.execute`` because this method run only
|
651
750
|
one job and return with context of this job data.
|
652
751
|
|
752
|
+
:raise WorkflowException: If execute with not exist job's ID.
|
753
|
+
:raise WorkflowException: If the job execution raise JobException.
|
754
|
+
:raise NotImplementedError: If set raise_error argument to False.
|
755
|
+
|
653
756
|
:param job_id: A job ID that want to execute.
|
654
757
|
:param params: A params that was parameterized from workflow execution.
|
655
758
|
:param run_id: A workflow running ID for this job execution.
|
@@ -657,20 +760,27 @@ class Workflow(BaseModel):
|
|
657
760
|
if it get exception from job execution.
|
658
761
|
|
659
762
|
:rtype: Result
|
763
|
+
:return: Return the result object that receive the job execution result
|
764
|
+
context.
|
660
765
|
"""
|
661
766
|
run_id: str = run_id or gen_id(self.name, unique=True)
|
767
|
+
rs: Result = Result(run_id=run_id)
|
662
768
|
|
663
769
|
# VALIDATE: check a job ID that exists in this workflow or not.
|
664
770
|
if job_id not in self.jobs:
|
665
771
|
raise WorkflowException(
|
666
|
-
f"The job
|
772
|
+
f"The job: {job_id!r} does not exists in {self.name!r} "
|
667
773
|
f"workflow."
|
668
774
|
)
|
669
775
|
|
670
|
-
logger.info(
|
776
|
+
logger.info(
|
777
|
+
f"({cut_id(run_id)}) [WORKFLOW]: Start execute job: {job_id!r}"
|
778
|
+
)
|
671
779
|
|
672
780
|
# IMPORTANT:
|
673
|
-
#
|
781
|
+
# This execution change all job running IDs to the current workflow
|
782
|
+
# execution running ID (with passing run_id to the job execution
|
783
|
+
# argument).
|
674
784
|
#
|
675
785
|
try:
|
676
786
|
job: Job = self.jobs[job_id]
|
@@ -680,23 +790,25 @@ class Workflow(BaseModel):
|
|
680
790
|
)
|
681
791
|
except JobException as err:
|
682
792
|
logger.error(
|
683
|
-
f"({run_id}) [WORKFLOW]: {err.__class__.__name__}:
|
793
|
+
f"({cut_id(run_id)}) [WORKFLOW]: {err.__class__.__name__}: "
|
794
|
+
f"{err}"
|
684
795
|
)
|
685
796
|
if raise_error:
|
686
797
|
raise WorkflowException(
|
687
798
|
f"Get job execution error {job_id}: JobException: {err}"
|
688
799
|
) from None
|
689
|
-
|
690
|
-
|
800
|
+
raise NotImplementedError(
|
801
|
+
"Handle error from the job execution does not support yet."
|
802
|
+
) from None
|
691
803
|
|
692
|
-
return
|
804
|
+
return rs.catch(status=0, context=params)
|
693
805
|
|
694
806
|
def execute(
|
695
807
|
self,
|
696
808
|
params: DictData,
|
697
|
-
run_id: str | None = None,
|
698
809
|
*,
|
699
|
-
|
810
|
+
run_id: str | None = None,
|
811
|
+
timeout: int = 0,
|
700
812
|
) -> Result:
|
701
813
|
"""Execute workflow with passing a dynamic parameters to all jobs that
|
702
814
|
included in this workflow model with ``jobs`` field.
|
@@ -712,18 +824,20 @@ class Workflow(BaseModel):
|
|
712
824
|
|
713
825
|
:param params: An input parameters that use on workflow execution that
|
714
826
|
will parameterize before using it. Default is None.
|
715
|
-
:type params: DictData
|
827
|
+
:type params: DictData
|
828
|
+
|
716
829
|
:param run_id: A workflow running ID for this job execution.
|
717
|
-
:type run_id: str | None
|
830
|
+
:type run_id: str | None (default: None)
|
718
831
|
:param timeout: A workflow execution time out in second unit that use
|
719
|
-
for limit time of execution and waiting job dependency.
|
720
|
-
|
721
|
-
:type timeout: int
|
832
|
+
for limit time of execution and waiting job dependency.
|
833
|
+
:type timeout: int (default: 0)
|
722
834
|
|
723
835
|
:rtype: Result
|
724
836
|
"""
|
725
837
|
run_id: str = run_id or gen_id(self.name, unique=True)
|
726
|
-
logger.info(
|
838
|
+
logger.info(
|
839
|
+
f"({cut_id(run_id)}) [WORKFLOW]: Start Execute: {self.name!r} ..."
|
840
|
+
)
|
727
841
|
|
728
842
|
# NOTE: I use this condition because this method allow passing empty
|
729
843
|
# params and I do not want to create new dict object.
|
@@ -733,7 +847,7 @@ class Workflow(BaseModel):
|
|
733
847
|
# NOTE: It should not do anything if it does not have job.
|
734
848
|
if not self.jobs:
|
735
849
|
logger.warning(
|
736
|
-
f"({run_id}) [WORKFLOW]: This workflow: {self.name!r} "
|
850
|
+
f"({cut_id(run_id)}) [WORKFLOW]: This workflow: {self.name!r} "
|
737
851
|
f"does not have any jobs"
|
738
852
|
)
|
739
853
|
return rs.catch(status=0, context=params)
|
@@ -769,17 +883,16 @@ class Workflow(BaseModel):
|
|
769
883
|
context=context,
|
770
884
|
ts=ts,
|
771
885
|
job_queue=jq,
|
772
|
-
worker=config.max_job_parallel,
|
773
886
|
timeout=timeout,
|
774
887
|
)
|
775
888
|
except WorkflowException as err:
|
889
|
+
status: int = 1
|
776
890
|
context.update(
|
777
891
|
{
|
778
892
|
"error": err,
|
779
893
|
"error_message": f"{err.__class__.__name__}: {err}",
|
780
894
|
},
|
781
895
|
)
|
782
|
-
status = 1
|
783
896
|
return rs.catch(status=status, context=context)
|
784
897
|
|
785
898
|
def __exec_threading(
|
@@ -789,10 +902,10 @@ class Workflow(BaseModel):
|
|
789
902
|
ts: float,
|
790
903
|
job_queue: Queue,
|
791
904
|
*,
|
792
|
-
|
793
|
-
|
905
|
+
timeout: int = 0,
|
906
|
+
thread_timeout: int = 1800,
|
794
907
|
) -> DictData:
|
795
|
-
"""Workflow execution by threading strategy.
|
908
|
+
"""Workflow execution by threading strategy that use multithreading.
|
796
909
|
|
797
910
|
If a job need dependency, it will check dependency job ID from
|
798
911
|
context data before allow it run.
|
@@ -802,27 +915,31 @@ class Workflow(BaseModel):
|
|
802
915
|
timeout.
|
803
916
|
:param job_queue: A job queue object.
|
804
917
|
:param timeout: A second value unit that bounding running time.
|
805
|
-
:param
|
918
|
+
:param thread_timeout: A timeout to waiting all futures complete.
|
919
|
+
|
806
920
|
:rtype: DictData
|
807
921
|
"""
|
808
|
-
|
922
|
+
not_timeout_flag: bool = True
|
923
|
+
timeout: int = timeout or config.max_job_exec_timeout
|
809
924
|
logger.debug(
|
810
|
-
f"({run_id})
|
811
|
-
f"executor"
|
925
|
+
f"({cut_id(run_id)}) [WORKFLOW]: Run {self.name!r} with threading."
|
812
926
|
)
|
813
927
|
|
814
928
|
# IMPORTANT: The job execution can run parallel and waiting by
|
815
929
|
# needed.
|
816
|
-
with ThreadPoolExecutor(
|
930
|
+
with ThreadPoolExecutor(
|
931
|
+
max_workers=config.max_job_parallel,
|
932
|
+
thread_name_prefix="wf_exec_threading_",
|
933
|
+
) as executor:
|
817
934
|
futures: list[Future] = []
|
818
935
|
|
819
936
|
while not job_queue.empty() and (
|
820
|
-
|
937
|
+
not_timeout_flag := ((time.monotonic() - ts) < timeout)
|
821
938
|
):
|
822
939
|
job_id: str = job_queue.get()
|
823
940
|
job: Job = self.jobs[job_id]
|
824
941
|
|
825
|
-
if
|
942
|
+
if not job.check_needs(context["jobs"]):
|
826
943
|
job_queue.task_done()
|
827
944
|
job_queue.put(job_id)
|
828
945
|
time.sleep(0.25)
|
@@ -831,10 +948,13 @@ class Workflow(BaseModel):
|
|
831
948
|
# NOTE: Start workflow job execution with deep copy context data
|
832
949
|
# before release.
|
833
950
|
#
|
951
|
+
# Context:
|
952
|
+
# ---
|
834
953
|
# {
|
835
954
|
# 'params': <input-params>,
|
836
|
-
# 'jobs': {},
|
955
|
+
# 'jobs': { <job's-id>: ... },
|
837
956
|
# }
|
957
|
+
#
|
838
958
|
futures.append(
|
839
959
|
executor.submit(
|
840
960
|
self.execute_job,
|
@@ -846,31 +966,31 @@ class Workflow(BaseModel):
|
|
846
966
|
# NOTE: Mark this job queue done.
|
847
967
|
job_queue.task_done()
|
848
968
|
|
849
|
-
|
850
|
-
job_queue.join()
|
969
|
+
if not_timeout_flag:
|
851
970
|
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
future.
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
971
|
+
# NOTE: Wait for all items to finish processing by `task_done()`
|
972
|
+
# method.
|
973
|
+
job_queue.join()
|
974
|
+
|
975
|
+
for future in as_completed(futures, timeout=thread_timeout):
|
976
|
+
if err := future.exception():
|
977
|
+
logger.error(f"({cut_id(run_id)}) [WORKFLOW]: {err}")
|
978
|
+
raise WorkflowException(str(err))
|
979
|
+
|
980
|
+
# NOTE: This getting result does not do anything.
|
981
|
+
future.result()
|
982
|
+
|
983
|
+
return context
|
984
|
+
|
985
|
+
for future in futures:
|
986
|
+
future.cancel()
|
865
987
|
|
866
988
|
# NOTE: Raise timeout error.
|
867
|
-
logger.warning(
|
868
|
-
f"({run_id}) [WORKFLOW]: Execution
|
869
|
-
f"
|
870
|
-
)
|
871
|
-
raise WorkflowException( # pragma: no cov
|
872
|
-
f"Execution of workflow: {self.name} was timeout"
|
989
|
+
logger.warning(
|
990
|
+
f"({cut_id(run_id)}) [WORKFLOW]: Execution: {self.name!r} "
|
991
|
+
f"was timeout."
|
873
992
|
)
|
993
|
+
raise WorkflowException(f"Execution: {self.name!r} was timeout.")
|
874
994
|
|
875
995
|
def __exec_non_threading(
|
876
996
|
self,
|
@@ -879,7 +999,7 @@ class Workflow(BaseModel):
|
|
879
999
|
ts: float,
|
880
1000
|
job_queue: Queue,
|
881
1001
|
*,
|
882
|
-
timeout: int =
|
1002
|
+
timeout: int = 0,
|
883
1003
|
) -> DictData:
|
884
1004
|
"""Workflow execution with non-threading strategy that use sequential
|
885
1005
|
job running and waiting previous job was run successful.
|
@@ -891,25 +1011,27 @@ class Workflow(BaseModel):
|
|
891
1011
|
:param ts: A start timestamp that use for checking execute time should
|
892
1012
|
timeout.
|
893
1013
|
:param timeout: A second value unit that bounding running time.
|
1014
|
+
|
894
1015
|
:rtype: DictData
|
895
1016
|
"""
|
896
|
-
|
1017
|
+
not_timeout_flag: bool = True
|
1018
|
+
timeout: int = timeout or config.max_job_exec_timeout
|
897
1019
|
logger.debug(
|
898
|
-
f"({run_id}) [WORKFLOW]: Run {self.name} with
|
899
|
-
f"
|
1020
|
+
f"({cut_id(run_id)}) [WORKFLOW]: Run {self.name} with "
|
1021
|
+
f"non-threading."
|
900
1022
|
)
|
901
1023
|
|
902
1024
|
while not job_queue.empty() and (
|
903
|
-
|
1025
|
+
not_timeout_flag := ((time.monotonic() - ts) < timeout)
|
904
1026
|
):
|
905
1027
|
job_id: str = job_queue.get()
|
906
1028
|
job: Job = self.jobs[job_id]
|
907
1029
|
|
908
1030
|
# NOTE: Waiting dependency job run successful before release.
|
909
|
-
if
|
1031
|
+
if not job.check_needs(context["jobs"]):
|
910
1032
|
job_queue.task_done()
|
911
1033
|
job_queue.put(job_id)
|
912
|
-
time.sleep(0.
|
1034
|
+
time.sleep(0.075)
|
913
1035
|
continue
|
914
1036
|
|
915
1037
|
# NOTE: Start workflow job execution with deep copy context data
|
@@ -925,27 +1047,28 @@ class Workflow(BaseModel):
|
|
925
1047
|
# NOTE: Mark this job queue done.
|
926
1048
|
job_queue.task_done()
|
927
1049
|
|
928
|
-
|
929
|
-
|
1050
|
+
if not_timeout_flag:
|
1051
|
+
|
1052
|
+
# NOTE: Wait for all items to finish processing by `task_done()`
|
1053
|
+
# method.
|
1054
|
+
job_queue.join()
|
930
1055
|
|
931
|
-
if not_time_out_flag:
|
932
1056
|
return context
|
933
1057
|
|
934
1058
|
# NOTE: Raise timeout error.
|
935
|
-
logger.warning(
|
936
|
-
f"({run_id}) [WORKFLOW]: Execution
|
937
|
-
|
938
|
-
raise WorkflowException( # pragma: no cov
|
939
|
-
f"Execution of workflow: {self.name} was timeout"
|
1059
|
+
logger.warning(
|
1060
|
+
f"({cut_id(run_id)}) [WORKFLOW]: Execution: {self.name!r} "
|
1061
|
+
f"was timeout."
|
940
1062
|
)
|
1063
|
+
raise WorkflowException(f"Execution: {self.name!r} was timeout.")
|
941
1064
|
|
942
1065
|
|
943
1066
|
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
944
1067
|
class WorkflowTaskData:
|
945
|
-
"""Workflow task dataclass that use to keep mapping data and
|
946
|
-
passing
|
1068
|
+
"""Workflow task Pydantic dataclass object that use to keep mapping data and
|
1069
|
+
workflow model for passing to the multithreading task.
|
947
1070
|
|
948
|
-
This dataclass
|
1071
|
+
This dataclass object is mapping 1-to-1 with workflow and cron runner
|
949
1072
|
objects.
|
950
1073
|
"""
|
951
1074
|
|
@@ -988,15 +1111,15 @@ class WorkflowTaskData:
|
|
988
1111
|
next_time: datetime = runner.next
|
989
1112
|
|
990
1113
|
logger.debug(
|
991
|
-
f"({run_id}) [CORE]: {self.workflow.name!r} :
|
992
|
-
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
1114
|
+
f"({cut_id(run_id)}) [CORE]: {self.workflow.name!r} : "
|
1115
|
+
f"{runner.cron} : {next_time:%Y-%m-%d %H:%M:%S}"
|
993
1116
|
)
|
994
1117
|
heappush(queue[self.alias], next_time)
|
995
1118
|
start_sec: float = time.monotonic()
|
996
1119
|
|
997
1120
|
if get_diff_sec(next_time, tz=runner.tz) > waiting_sec:
|
998
1121
|
logger.debug(
|
999
|
-
f"({run_id}) [WORKFLOW]: {self.workflow.name!r} : "
|
1122
|
+
f"({cut_id(run_id)}) [WORKFLOW]: {self.workflow.name!r} : "
|
1000
1123
|
f"{runner.cron} "
|
1001
1124
|
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
1002
1125
|
)
|
@@ -1009,8 +1132,8 @@ class WorkflowTaskData:
|
|
1009
1132
|
return
|
1010
1133
|
|
1011
1134
|
logger.debug(
|
1012
|
-
f"({run_id}) [CORE]: {self.workflow.name!r} :
|
1013
|
-
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
1135
|
+
f"({cut_id(run_id)}) [CORE]: {self.workflow.name!r} : "
|
1136
|
+
f"{runner.cron} : Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
1014
1137
|
)
|
1015
1138
|
|
1016
1139
|
# NOTE: Release when the time is nearly to schedule time.
|
@@ -1018,8 +1141,8 @@ class WorkflowTaskData:
|
|
1018
1141
|
sleep_interval + 5
|
1019
1142
|
):
|
1020
1143
|
logger.debug(
|
1021
|
-
f"({run_id}) [CORE]: {self.workflow.name!r} :
|
1022
|
-
f": Sleep until: {duration}"
|
1144
|
+
f"({cut_id(run_id)}) [CORE]: {self.workflow.name!r} : "
|
1145
|
+
f"{runner.cron} : Sleep until: {duration}"
|
1023
1146
|
)
|
1024
1147
|
time.sleep(15)
|
1025
1148
|
|
@@ -1038,8 +1161,8 @@ class WorkflowTaskData:
|
|
1038
1161
|
params=param2template(self.params, release_params),
|
1039
1162
|
)
|
1040
1163
|
logger.debug(
|
1041
|
-
f"({run_id}) [CORE]: {self.workflow.name!r} :
|
1042
|
-
f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
1164
|
+
f"({cut_id(run_id)}) [CORE]: {self.workflow.name!r} : "
|
1165
|
+
f"{runner.cron} : End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
1043
1166
|
)
|
1044
1167
|
|
1045
1168
|
# NOTE: Set parent ID on this result.
|
@@ -1075,7 +1198,8 @@ class WorkflowTaskData:
|
|
1075
1198
|
# NOTE: Queue next release date.
|
1076
1199
|
logger.debug(f"[CORE]: {'-' * 100}")
|
1077
1200
|
|
1078
|
-
def __eq__(self, other) -> bool:
|
1201
|
+
def __eq__(self, other: WorkflowTaskData) -> bool:
|
1202
|
+
"""Override equal property that will compare only the same type."""
|
1079
1203
|
if isinstance(other, WorkflowTaskData):
|
1080
1204
|
return (
|
1081
1205
|
self.workflow.name == other.workflow.name
|