ddeutil-workflow 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -2
- ddeutil/workflow/api.py +84 -16
- ddeutil/workflow/cli.py +14 -14
- ddeutil/workflow/exceptions.py +6 -6
- ddeutil/workflow/job.py +572 -0
- ddeutil/workflow/log.py +10 -10
- ddeutil/workflow/repeat.py +4 -2
- ddeutil/workflow/route.py +165 -36
- ddeutil/workflow/scheduler.py +733 -110
- ddeutil/workflow/stage.py +12 -12
- ddeutil/workflow/utils.py +4 -4
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/METADATA +66 -70
- ddeutil_workflow-0.0.11.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/WHEEL +1 -1
- ddeutil/workflow/pipeline.py +0 -1186
- ddeutil_workflow-0.0.10.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.11.dist-info}/top_level.txt +0 -0
ddeutil/workflow/pipeline.py
DELETED
@@ -1,1186 +0,0 @@
|
|
1
|
-
# ------------------------------------------------------------------------------
|
2
|
-
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
-
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
-
# license information.
|
5
|
-
# ------------------------------------------------------------------------------
|
6
|
-
from __future__ import annotations
|
7
|
-
|
8
|
-
import copy
|
9
|
-
import os
|
10
|
-
import time
|
11
|
-
from concurrent.futures import (
|
12
|
-
FIRST_EXCEPTION,
|
13
|
-
Future,
|
14
|
-
ThreadPoolExecutor,
|
15
|
-
as_completed,
|
16
|
-
wait,
|
17
|
-
)
|
18
|
-
from datetime import datetime, timedelta
|
19
|
-
from heapq import heappush
|
20
|
-
from pickle import PickleError
|
21
|
-
from queue import Queue
|
22
|
-
from textwrap import dedent
|
23
|
-
from threading import Event
|
24
|
-
from typing import Optional
|
25
|
-
from zoneinfo import ZoneInfo
|
26
|
-
|
27
|
-
from pydantic import BaseModel, Field
|
28
|
-
from pydantic.functional_validators import field_validator, model_validator
|
29
|
-
from typing_extensions import Self
|
30
|
-
|
31
|
-
from .__types import (
|
32
|
-
DictData,
|
33
|
-
DictStr,
|
34
|
-
Matrix,
|
35
|
-
MatrixExclude,
|
36
|
-
MatrixInclude,
|
37
|
-
TupleStr,
|
38
|
-
)
|
39
|
-
from .cron import CronRunner
|
40
|
-
from .exceptions import (
|
41
|
-
JobException,
|
42
|
-
PipelineException,
|
43
|
-
StageException,
|
44
|
-
UtilException,
|
45
|
-
)
|
46
|
-
from .log import FileLog, Log, get_logger
|
47
|
-
from .on import On
|
48
|
-
from .stage import Stage
|
49
|
-
from .utils import (
|
50
|
-
Loader,
|
51
|
-
Param,
|
52
|
-
Result,
|
53
|
-
cross_product,
|
54
|
-
dash2underscore,
|
55
|
-
delay,
|
56
|
-
filter_func,
|
57
|
-
gen_id,
|
58
|
-
get_diff_sec,
|
59
|
-
has_template,
|
60
|
-
param2template,
|
61
|
-
)
|
62
|
-
|
63
|
-
logger = get_logger("ddeutil.workflow")
|
64
|
-
|
65
|
-
|
66
|
-
__all__: TupleStr = (
|
67
|
-
"Strategy",
|
68
|
-
"Job",
|
69
|
-
"Pipeline",
|
70
|
-
)
|
71
|
-
|
72
|
-
|
73
|
-
class Strategy(BaseModel):
|
74
|
-
"""Strategy Model that will combine a matrix together for running the
|
75
|
-
special job.
|
76
|
-
|
77
|
-
Data Validate:
|
78
|
-
>>> strategy = {
|
79
|
-
... 'max-parallel': 1,
|
80
|
-
... 'fail-fast': False,
|
81
|
-
... 'matrix': {
|
82
|
-
... 'first': [1, 2, 3],
|
83
|
-
... 'second': ['foo', 'bar'],
|
84
|
-
... },
|
85
|
-
... 'include': [{'first': 4, 'second': 'foo'}],
|
86
|
-
... 'exclude': [{'first': 1, 'second': 'bar'}],
|
87
|
-
... }
|
88
|
-
"""
|
89
|
-
|
90
|
-
fail_fast: bool = Field(
|
91
|
-
default=False,
|
92
|
-
serialization_alias="fail-fast",
|
93
|
-
)
|
94
|
-
max_parallel: int = Field(
|
95
|
-
default=1,
|
96
|
-
gt=0,
|
97
|
-
description=(
|
98
|
-
"The maximum number of executor thread pool that want to run "
|
99
|
-
"parallel"
|
100
|
-
),
|
101
|
-
serialization_alias="max-parallel",
|
102
|
-
)
|
103
|
-
matrix: Matrix = Field(
|
104
|
-
default_factory=dict,
|
105
|
-
description=(
|
106
|
-
"A matrix values that want to cross product to possible strategies."
|
107
|
-
),
|
108
|
-
)
|
109
|
-
include: MatrixInclude = Field(
|
110
|
-
default_factory=list,
|
111
|
-
description="A list of additional matrix that want to adds-in.",
|
112
|
-
)
|
113
|
-
exclude: MatrixExclude = Field(
|
114
|
-
default_factory=list,
|
115
|
-
description="A list of exclude matrix that want to filter-out.",
|
116
|
-
)
|
117
|
-
|
118
|
-
@model_validator(mode="before")
|
119
|
-
def __prepare_keys(cls, values: DictData) -> DictData:
|
120
|
-
"""Rename key that use dash to underscore because Python does not
|
121
|
-
support this character exist in any variable name.
|
122
|
-
"""
|
123
|
-
dash2underscore("max-parallel", values)
|
124
|
-
dash2underscore("fail-fast", values)
|
125
|
-
return values
|
126
|
-
|
127
|
-
def is_set(self) -> bool:
|
128
|
-
"""Return True if this strategy was set from yaml template."""
|
129
|
-
return len(self.matrix) > 0
|
130
|
-
|
131
|
-
def make(self) -> list[DictStr]:
|
132
|
-
"""Return List of product of matrix values that already filter with
|
133
|
-
exclude and add include.
|
134
|
-
|
135
|
-
:rtype: list[DictStr]
|
136
|
-
"""
|
137
|
-
# NOTE: If it does not set matrix, it will return list of an empty dict.
|
138
|
-
if not (mt := self.matrix):
|
139
|
-
return [{}]
|
140
|
-
|
141
|
-
final: list[DictStr] = []
|
142
|
-
for r in cross_product(matrix=mt):
|
143
|
-
if any(
|
144
|
-
all(r[k] == v for k, v in exclude.items())
|
145
|
-
for exclude in self.exclude
|
146
|
-
):
|
147
|
-
continue
|
148
|
-
final.append(r)
|
149
|
-
|
150
|
-
# NOTE: If it is empty matrix and include, it will return list of an
|
151
|
-
# empty dict.
|
152
|
-
if not final and not self.include:
|
153
|
-
return [{}]
|
154
|
-
|
155
|
-
# NOTE: Add include to generated matrix with exclude list.
|
156
|
-
add: list[DictStr] = []
|
157
|
-
for include in self.include:
|
158
|
-
# VALIDATE:
|
159
|
-
# Validate any key in include list should be a subset of some one
|
160
|
-
# in matrix.
|
161
|
-
if all(not (set(include.keys()) <= set(m.keys())) for m in final):
|
162
|
-
raise ValueError("Include should have the keys equal to matrix")
|
163
|
-
|
164
|
-
# VALIDATE:
|
165
|
-
# Validate value of include does not duplicate with generated
|
166
|
-
# matrix.
|
167
|
-
if any(
|
168
|
-
all(include.get(k) == v for k, v in m.items())
|
169
|
-
for m in [*final, *add]
|
170
|
-
):
|
171
|
-
continue
|
172
|
-
add.append(include)
|
173
|
-
final.extend(add)
|
174
|
-
return final
|
175
|
-
|
176
|
-
|
177
|
-
class Job(BaseModel):
|
178
|
-
"""Job Model (group of stages).
|
179
|
-
|
180
|
-
This job model allow you to use for-loop that call matrix strategy. If
|
181
|
-
you pass matrix mapping and it able to generate, you will see it running
|
182
|
-
with loop of matrix values.
|
183
|
-
|
184
|
-
Data Validate:
|
185
|
-
>>> job = {
|
186
|
-
... "runs-on": None,
|
187
|
-
... "strategy": {
|
188
|
-
... "max-parallel": 1,
|
189
|
-
... "matrix": {
|
190
|
-
... "first": [1, 2, 3],
|
191
|
-
... "second": ['foo', 'bar'],
|
192
|
-
... },
|
193
|
-
... },
|
194
|
-
... "needs": [],
|
195
|
-
... "stages": [
|
196
|
-
... {
|
197
|
-
... "name": "Some stage",
|
198
|
-
... "run": "print('Hello World')",
|
199
|
-
... },
|
200
|
-
... ...
|
201
|
-
... ],
|
202
|
-
... }
|
203
|
-
"""
|
204
|
-
|
205
|
-
id: Optional[str] = Field(
|
206
|
-
default=None,
|
207
|
-
description=(
|
208
|
-
"A job ID, this value will add from pipeline after validation "
|
209
|
-
"process."
|
210
|
-
),
|
211
|
-
)
|
212
|
-
desc: Optional[str] = Field(
|
213
|
-
default=None,
|
214
|
-
description="A job description that can be string of markdown content.",
|
215
|
-
)
|
216
|
-
runs_on: Optional[str] = Field(
|
217
|
-
default=None,
|
218
|
-
description="A target executor node for this job use to execution.",
|
219
|
-
serialization_alias="runs-on",
|
220
|
-
)
|
221
|
-
stages: list[Stage] = Field(
|
222
|
-
default_factory=list,
|
223
|
-
description="A list of Stage of this job.",
|
224
|
-
)
|
225
|
-
needs: list[str] = Field(
|
226
|
-
default_factory=list,
|
227
|
-
description="A list of the job ID that want to run before this job.",
|
228
|
-
)
|
229
|
-
strategy: Strategy = Field(
|
230
|
-
default_factory=Strategy,
|
231
|
-
description="A strategy matrix that want to generate.",
|
232
|
-
)
|
233
|
-
run_id: Optional[str] = Field(
|
234
|
-
default=None,
|
235
|
-
description="A running job ID.",
|
236
|
-
repr=False,
|
237
|
-
exclude=True,
|
238
|
-
)
|
239
|
-
|
240
|
-
@model_validator(mode="before")
|
241
|
-
def __prepare_keys(cls, values: DictData) -> DictData:
|
242
|
-
"""Rename key that use dash to underscore because Python does not
|
243
|
-
support this character exist in any variable name.
|
244
|
-
"""
|
245
|
-
dash2underscore("runs-on", values)
|
246
|
-
return values
|
247
|
-
|
248
|
-
@field_validator("desc", mode="after")
|
249
|
-
def ___prepare_desc(cls, value: str) -> str:
|
250
|
-
"""Prepare description string that was created on a template."""
|
251
|
-
return dedent(value)
|
252
|
-
|
253
|
-
@model_validator(mode="after")
|
254
|
-
def __prepare_running_id(self):
|
255
|
-
if self.run_id is None:
|
256
|
-
self.run_id = gen_id(self.id or "", unique=True)
|
257
|
-
|
258
|
-
# VALIDATE: Validate job id should not dynamic with params template.
|
259
|
-
if has_template(self.id):
|
260
|
-
raise ValueError("Job ID should not has any template.")
|
261
|
-
|
262
|
-
return self
|
263
|
-
|
264
|
-
def get_running_id(self, run_id: str) -> Self:
|
265
|
-
"""Return Job model object that changing job running ID with an
|
266
|
-
input running ID.
|
267
|
-
|
268
|
-
:param run_id: A replace job running ID.
|
269
|
-
:rtype: Self
|
270
|
-
"""
|
271
|
-
return self.model_copy(update={"run_id": run_id})
|
272
|
-
|
273
|
-
def stage(self, stage_id: str) -> Stage:
|
274
|
-
"""Return stage model that match with an input stage ID."""
|
275
|
-
for stage in self.stages:
|
276
|
-
if stage_id == (stage.id or ""):
|
277
|
-
return stage
|
278
|
-
raise ValueError(f"Stage ID {stage_id} does not exists")
|
279
|
-
|
280
|
-
def set_outputs(self, output: DictData) -> DictData:
|
281
|
-
"""Setting output of job execution"""
|
282
|
-
if len(output) > 1 and self.strategy.is_set():
|
283
|
-
return {"strategies": output}
|
284
|
-
return output[next(iter(output))]
|
285
|
-
|
286
|
-
def execute_strategy(
|
287
|
-
self,
|
288
|
-
strategy: DictData,
|
289
|
-
params: DictData,
|
290
|
-
*,
|
291
|
-
event: Event | None = None,
|
292
|
-
) -> Result:
|
293
|
-
"""Job Strategy execution with passing dynamic parameters from the
|
294
|
-
pipeline execution to strategy matrix.
|
295
|
-
|
296
|
-
This execution is the minimum level execution of job model.
|
297
|
-
|
298
|
-
:param strategy: A metrix strategy value.
|
299
|
-
:param params: A dynamic parameters.
|
300
|
-
:param event: An manger event that pass to the PoolThreadExecutor.
|
301
|
-
:rtype: Result
|
302
|
-
|
303
|
-
:raise JobException: If it has any error from StageException or
|
304
|
-
UtilException.
|
305
|
-
"""
|
306
|
-
# NOTE: Force stop this execution if event was set from main execution.
|
307
|
-
if event and event.is_set():
|
308
|
-
return Result(
|
309
|
-
status=1,
|
310
|
-
context={
|
311
|
-
gen_id(strategy): {
|
312
|
-
"matrix": strategy,
|
313
|
-
"stages": {},
|
314
|
-
"error_message": {
|
315
|
-
"message": "Process Event stopped before execution"
|
316
|
-
},
|
317
|
-
},
|
318
|
-
},
|
319
|
-
)
|
320
|
-
|
321
|
-
# NOTE: Create strategy execution context and update a matrix and copied
|
322
|
-
# of params. So, the context value will have structure like;
|
323
|
-
# ---
|
324
|
-
# {
|
325
|
-
# "params": { ... }, <== Current input params
|
326
|
-
# "jobs": { ... }, <== Current input params
|
327
|
-
# "matrix": { ... } <== Current strategy value
|
328
|
-
# }
|
329
|
-
#
|
330
|
-
context: DictData = params
|
331
|
-
context.update({"matrix": strategy})
|
332
|
-
|
333
|
-
# IMPORTANT: The stage execution only run sequentially one-by-one.
|
334
|
-
for stage in self.stages:
|
335
|
-
|
336
|
-
# IMPORTANT: Change any stage running IDs to this job running ID.
|
337
|
-
stage: Stage = stage.get_running_id(self.run_id)
|
338
|
-
|
339
|
-
_st_name: str = stage.id or stage.name
|
340
|
-
|
341
|
-
if stage.is_skipped(params=context):
|
342
|
-
logger.info(
|
343
|
-
f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
|
344
|
-
)
|
345
|
-
continue
|
346
|
-
|
347
|
-
logger.info(
|
348
|
-
f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
|
349
|
-
)
|
350
|
-
|
351
|
-
# NOTE: Logging a matrix that pass on this stage execution.
|
352
|
-
if strategy:
|
353
|
-
logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
|
354
|
-
|
355
|
-
# NOTE:
|
356
|
-
# I do not use below syntax because `params` dict be the
|
357
|
-
# reference memory pointer and it was changed when I action
|
358
|
-
# anything like update or re-construct this.
|
359
|
-
#
|
360
|
-
# ... params |= stage.execute(params=params)
|
361
|
-
#
|
362
|
-
# This step will add the stage result to ``stages`` key in
|
363
|
-
# that stage id. It will have structure like;
|
364
|
-
# ---
|
365
|
-
# {
|
366
|
-
# "params": { ... },
|
367
|
-
# "jobs": { ... },
|
368
|
-
# "matrix": { ... },
|
369
|
-
# "stages": { { "stage-id-1": ... }, ... }
|
370
|
-
# }
|
371
|
-
#
|
372
|
-
if event and event.is_set():
|
373
|
-
return Result(
|
374
|
-
status=1,
|
375
|
-
context={
|
376
|
-
gen_id(strategy): {
|
377
|
-
"matrix": strategy,
|
378
|
-
# NOTE: If job strategy executor use multithreading,
|
379
|
-
# it will not filter function object from context.
|
380
|
-
# ---
|
381
|
-
# "stages": filter_func(context.pop("stages", {})),
|
382
|
-
"stages": context.pop("stages", {}),
|
383
|
-
"error_message": {
|
384
|
-
"message": (
|
385
|
-
"Process Event stopped before execution"
|
386
|
-
),
|
387
|
-
},
|
388
|
-
},
|
389
|
-
},
|
390
|
-
)
|
391
|
-
try:
|
392
|
-
rs: Result = stage.execute(params=context)
|
393
|
-
stage.set_outputs(rs.context, to=context)
|
394
|
-
except (StageException, UtilException) as err:
|
395
|
-
logger.error(
|
396
|
-
f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
|
397
|
-
)
|
398
|
-
raise JobException(
|
399
|
-
f"Get stage execution error: {err.__class__.__name__}: "
|
400
|
-
f"{err}"
|
401
|
-
) from None
|
402
|
-
|
403
|
-
# NOTE: Remove new stage object that was created from
|
404
|
-
# ``get_running_id`` method.
|
405
|
-
del stage
|
406
|
-
|
407
|
-
return Result(
|
408
|
-
status=0,
|
409
|
-
context={
|
410
|
-
gen_id(strategy): {
|
411
|
-
"matrix": strategy,
|
412
|
-
# NOTE: (WF001) filter own created function from stages
|
413
|
-
# value, because it does not dump with pickle when you
|
414
|
-
# execute with multiprocess.
|
415
|
-
#
|
416
|
-
"stages": filter_func(context.pop("stages", {})),
|
417
|
-
},
|
418
|
-
},
|
419
|
-
)
|
420
|
-
|
421
|
-
def execute(self, params: DictData | None = None) -> Result:
|
422
|
-
"""Job execution with passing dynamic parameters from the pipeline
|
423
|
-
execution. It will generate matrix values at the first step and for-loop
|
424
|
-
any metrix to all stages dependency.
|
425
|
-
|
426
|
-
:param params: An input parameters that use on job execution.
|
427
|
-
:rtype: Result
|
428
|
-
"""
|
429
|
-
context: DictData = {}
|
430
|
-
|
431
|
-
# NOTE: Normal Job execution.
|
432
|
-
if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
|
433
|
-
for strategy in self.strategy.make():
|
434
|
-
rs: Result = self.execute_strategy(
|
435
|
-
strategy, params=copy.deepcopy(params)
|
436
|
-
)
|
437
|
-
context.update(rs.context)
|
438
|
-
return Result(
|
439
|
-
status=0,
|
440
|
-
context=context,
|
441
|
-
)
|
442
|
-
|
443
|
-
# # WARNING: (WF001) I got error that raise when use
|
444
|
-
# # ``ProcessPoolExecutor``;
|
445
|
-
# # ---
|
446
|
-
# # _pickle.PicklingError: Can't pickle
|
447
|
-
# # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
|
448
|
-
# # on ddeutil.workflow.stage failed
|
449
|
-
# #
|
450
|
-
# # from multiprocessing import Event, Manager
|
451
|
-
# with Manager() as manager:
|
452
|
-
# event: Event = manager.Event()
|
453
|
-
#
|
454
|
-
# # NOTE: Start process pool executor for running strategy executor
|
455
|
-
# # in parallel mode.
|
456
|
-
# with ProcessPoolExecutor(
|
457
|
-
# max_workers=self.strategy.max_parallel
|
458
|
-
# ) as executor:
|
459
|
-
# futures: list[Future] = [
|
460
|
-
# executor.submit(
|
461
|
-
# self.execute_strategy,
|
462
|
-
# strategy,
|
463
|
-
# params=copy.deepcopy(params),
|
464
|
-
# event=event,
|
465
|
-
# )
|
466
|
-
# for strategy in self.strategy.make()
|
467
|
-
# ]
|
468
|
-
# if self.strategy.fail_fast:
|
469
|
-
# rs = self.__catch_fail_fast(event, futures)
|
470
|
-
# else:
|
471
|
-
# rs = self.__catch_all_completed(futures)
|
472
|
-
|
473
|
-
# NOTE: Create event for cancel executor stop running.
|
474
|
-
event: Event = Event()
|
475
|
-
|
476
|
-
with ThreadPoolExecutor(
|
477
|
-
max_workers=self.strategy.max_parallel
|
478
|
-
) as executor:
|
479
|
-
futures: list[Future] = [
|
480
|
-
executor.submit(
|
481
|
-
self.execute_strategy,
|
482
|
-
strategy,
|
483
|
-
params=copy.deepcopy(params),
|
484
|
-
event=event,
|
485
|
-
)
|
486
|
-
for strategy in self.strategy.make()
|
487
|
-
]
|
488
|
-
|
489
|
-
# NOTE: Dynamic catching futures object with fail-fast flag.
|
490
|
-
if self.strategy.fail_fast:
|
491
|
-
rs: Result = self.__catch_fail_fast(event, futures)
|
492
|
-
else:
|
493
|
-
rs: Result = self.__catch_all_completed(futures)
|
494
|
-
return Result(
|
495
|
-
status=0,
|
496
|
-
context=rs.context,
|
497
|
-
)
|
498
|
-
|
499
|
-
def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
|
500
|
-
"""Job parallel pool futures catching with fail-fast mode. That will
|
501
|
-
stop all not done futures if it receive the first exception from all
|
502
|
-
running futures.
|
503
|
-
|
504
|
-
:param event:
|
505
|
-
:param futures: A list of futures.
|
506
|
-
:rtype: Result
|
507
|
-
"""
|
508
|
-
context: DictData = {}
|
509
|
-
# NOTE: Get results from a collection of tasks with a
|
510
|
-
# timeout that has the first exception.
|
511
|
-
done, not_done = wait(
|
512
|
-
futures, timeout=1800, return_when=FIRST_EXCEPTION
|
513
|
-
)
|
514
|
-
nd: str = (
|
515
|
-
f", the strategies do not run is {not_done}" if not_done else ""
|
516
|
-
)
|
517
|
-
logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
|
518
|
-
|
519
|
-
if len(done) != len(futures):
|
520
|
-
|
521
|
-
# NOTE: Stop all running tasks
|
522
|
-
event.set()
|
523
|
-
|
524
|
-
# NOTE: Cancel any scheduled tasks
|
525
|
-
for future in futures:
|
526
|
-
future.cancel()
|
527
|
-
|
528
|
-
status: int = 0
|
529
|
-
for future in done:
|
530
|
-
if future.exception():
|
531
|
-
status = 1
|
532
|
-
logger.error(
|
533
|
-
f"({self.run_id}) [JOB]: One stage failed with: "
|
534
|
-
f"{future.exception()}, shutting down this future."
|
535
|
-
)
|
536
|
-
elif future.cancelled():
|
537
|
-
continue
|
538
|
-
else:
|
539
|
-
rs: Result = future.result(timeout=60)
|
540
|
-
context.update(rs.context)
|
541
|
-
return Result(status=status, context=context)
|
542
|
-
|
543
|
-
def __catch_all_completed(self, futures: list[Future]) -> Result:
|
544
|
-
"""Job parallel pool futures catching with all-completed mode.
|
545
|
-
|
546
|
-
:param futures: A list of futures.
|
547
|
-
:rtype: Result
|
548
|
-
"""
|
549
|
-
context: DictData = {}
|
550
|
-
status: int = 0
|
551
|
-
for future in as_completed(futures):
|
552
|
-
try:
|
553
|
-
rs: Result = future.result(timeout=60)
|
554
|
-
context.update(rs.context)
|
555
|
-
except PickleError as err:
|
556
|
-
# NOTE: (WF001) I do not want to fix this issue because
|
557
|
-
# it does not make sense and over-engineering with
|
558
|
-
# this bug fix process.
|
559
|
-
raise JobException(
|
560
|
-
f"PyStage that create object on locals does use "
|
561
|
-
f"parallel in strategy execution;\n\t{err}"
|
562
|
-
) from None
|
563
|
-
except TimeoutError:
|
564
|
-
status = 1
|
565
|
-
logger.warning(
|
566
|
-
f"({self.run_id}) [JOB]: Task is hanging. Attempting to "
|
567
|
-
f"kill."
|
568
|
-
)
|
569
|
-
future.cancel()
|
570
|
-
time.sleep(0.1)
|
571
|
-
if not future.cancelled():
|
572
|
-
logger.warning(
|
573
|
-
f"({self.run_id}) [JOB]: Failed to cancel the task."
|
574
|
-
)
|
575
|
-
else:
|
576
|
-
logger.warning(
|
577
|
-
f"({self.run_id}) [JOB]: Task canceled successfully."
|
578
|
-
)
|
579
|
-
except JobException as err:
|
580
|
-
status = 1
|
581
|
-
logger.error(
|
582
|
-
f"({self.run_id}) [JOB]: Get stage exception with "
|
583
|
-
f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
|
584
|
-
f"{err}"
|
585
|
-
)
|
586
|
-
return Result(status=status, context=context)
|
587
|
-
|
588
|
-
|
589
|
-
class Pipeline(BaseModel):
|
590
|
-
"""Pipeline Model this is the main future of this project because it use to
|
591
|
-
be workflow data for running everywhere that you want or using it to
|
592
|
-
scheduler task in background. It use lightweight coding line from Pydantic
|
593
|
-
Model and enhance execute method on it.
|
594
|
-
"""
|
595
|
-
|
596
|
-
name: str = Field(description="A pipeline name.")
|
597
|
-
desc: Optional[str] = Field(
|
598
|
-
default=None,
|
599
|
-
description=(
|
600
|
-
"A pipeline description that can be string of markdown content."
|
601
|
-
),
|
602
|
-
)
|
603
|
-
params: dict[str, Param] = Field(
|
604
|
-
default_factory=dict,
|
605
|
-
description="A parameters that want to use on this pipeline.",
|
606
|
-
)
|
607
|
-
on: list[On] = Field(
|
608
|
-
default_factory=list,
|
609
|
-
description="A list of On instance for this pipeline schedule.",
|
610
|
-
)
|
611
|
-
jobs: dict[str, Job] = Field(
|
612
|
-
default_factory=dict,
|
613
|
-
description="A mapping of job ID and job model that already loaded.",
|
614
|
-
)
|
615
|
-
run_id: Optional[str] = Field(
|
616
|
-
default=None,
|
617
|
-
description="A running pipeline ID.",
|
618
|
-
repr=False,
|
619
|
-
exclude=True,
|
620
|
-
)
|
621
|
-
|
622
|
-
@property
|
623
|
-
def new_run_id(self) -> str:
|
624
|
-
"""Running ID of this pipeline that always generate new unique value."""
|
625
|
-
return gen_id(self.name, unique=True)
|
626
|
-
|
627
|
-
@classmethod
|
628
|
-
def from_loader(
|
629
|
-
cls,
|
630
|
-
name: str,
|
631
|
-
externals: DictData | None = None,
|
632
|
-
) -> Self:
|
633
|
-
"""Create Pipeline instance from the Loader object that only receive
|
634
|
-
an input pipeline name. The loader object will use this pipeline name to
|
635
|
-
searching configuration data of this pipeline model in conf path.
|
636
|
-
|
637
|
-
:param name: A pipeline name that want to pass to Loader object.
|
638
|
-
:param externals: An external parameters that want to pass to Loader
|
639
|
-
object.
|
640
|
-
:rtype: Self
|
641
|
-
"""
|
642
|
-
loader: Loader = Loader(name, externals=(externals or {}))
|
643
|
-
|
644
|
-
# NOTE: Validate the config type match with current connection model
|
645
|
-
if loader.type != cls:
|
646
|
-
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
647
|
-
|
648
|
-
loader_data: DictData = copy.deepcopy(loader.data)
|
649
|
-
|
650
|
-
# NOTE: Add name to loader data
|
651
|
-
loader_data["name"] = name.replace(" ", "_")
|
652
|
-
|
653
|
-
# NOTE: Prepare `on` data
|
654
|
-
cls.__bypass_on(loader_data)
|
655
|
-
return cls.model_validate(obj=loader_data)
|
656
|
-
|
657
|
-
@classmethod
|
658
|
-
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
659
|
-
"""Bypass the on data to loaded config data."""
|
660
|
-
if on := data.pop("on", []):
|
661
|
-
if isinstance(on, str):
|
662
|
-
on = [on]
|
663
|
-
if any(not isinstance(i, (dict, str)) for i in on):
|
664
|
-
raise TypeError("The ``on`` key should be list of str or dict")
|
665
|
-
|
666
|
-
# NOTE: Pass on value to Loader and keep on model object to on field
|
667
|
-
data["on"] = [
|
668
|
-
(
|
669
|
-
Loader(n, externals=(externals or {})).data
|
670
|
-
if isinstance(n, str)
|
671
|
-
else n
|
672
|
-
)
|
673
|
-
for n in on
|
674
|
-
]
|
675
|
-
return data
|
676
|
-
|
677
|
-
@model_validator(mode="before")
|
678
|
-
def __prepare_params(cls, values: DictData) -> DictData:
|
679
|
-
"""Prepare the params key."""
|
680
|
-
# NOTE: Prepare params type if it passing with only type value.
|
681
|
-
if params := values.pop("params", {}):
|
682
|
-
values["params"] = {
|
683
|
-
p: (
|
684
|
-
{"type": params[p]}
|
685
|
-
if isinstance(params[p], str)
|
686
|
-
else params[p]
|
687
|
-
)
|
688
|
-
for p in params
|
689
|
-
}
|
690
|
-
return values
|
691
|
-
|
692
|
-
@field_validator("desc", mode="after")
|
693
|
-
def ___prepare_desc(cls, value: str) -> str:
|
694
|
-
"""Prepare description string that was created on a template."""
|
695
|
-
return dedent(value)
|
696
|
-
|
697
|
-
@model_validator(mode="after")
|
698
|
-
def __validate_jobs_need_and_prepare_running_id(self):
|
699
|
-
"""Validate each need job in any jobs should exists."""
|
700
|
-
for job in self.jobs:
|
701
|
-
if not_exist := [
|
702
|
-
need for need in self.jobs[job].needs if need not in self.jobs
|
703
|
-
]:
|
704
|
-
raise PipelineException(
|
705
|
-
f"This needed jobs: {not_exist} do not exist in this "
|
706
|
-
f"pipeline, {self.name!r}"
|
707
|
-
)
|
708
|
-
|
709
|
-
# NOTE: update a job id with its job id from pipeline template
|
710
|
-
self.jobs[job].id = job
|
711
|
-
|
712
|
-
if self.run_id is None:
|
713
|
-
self.run_id = self.new_run_id
|
714
|
-
|
715
|
-
# VALIDATE: Validate pipeline name should not dynamic with params
|
716
|
-
# template.
|
717
|
-
if has_template(self.name):
|
718
|
-
raise ValueError(
|
719
|
-
f"Pipeline name should not has any template, please check, "
|
720
|
-
f"{self.name!r}."
|
721
|
-
)
|
722
|
-
|
723
|
-
return self
|
724
|
-
|
725
|
-
def get_running_id(self, run_id: str) -> Self:
|
726
|
-
"""Return Pipeline model object that changing pipeline running ID with
|
727
|
-
an input running ID.
|
728
|
-
|
729
|
-
:param run_id: A replace pipeline running ID.
|
730
|
-
:rtype: Self
|
731
|
-
"""
|
732
|
-
return self.model_copy(update={"run_id": run_id})
|
733
|
-
|
734
|
-
def job(self, name: str) -> Job:
|
735
|
-
"""Return Job model that exists on this pipeline.
|
736
|
-
|
737
|
-
:param name: A job name that want to get from a mapping of job models.
|
738
|
-
:type name: str
|
739
|
-
|
740
|
-
:rtype: Job
|
741
|
-
:returns: A job model that exists on this pipeline by input name.
|
742
|
-
"""
|
743
|
-
if name not in self.jobs:
|
744
|
-
raise ValueError(
|
745
|
-
f"A Job {name!r} does not exists in this pipeline, "
|
746
|
-
f"{self.name!r}"
|
747
|
-
)
|
748
|
-
return self.jobs[name]
|
749
|
-
|
750
|
-
def parameterize(self, params: DictData) -> DictData:
|
751
|
-
"""Prepare parameters before passing to execution process. This method
|
752
|
-
will create jobs key to params mapping that will keep any result from
|
753
|
-
job execution.
|
754
|
-
|
755
|
-
:param params: A parameter mapping that receive from pipeline execution.
|
756
|
-
:rtype: DictData
|
757
|
-
"""
|
758
|
-
# VALIDATE: Incoming params should have keys that set on this pipeline.
|
759
|
-
if check_key := tuple(
|
760
|
-
f"{k!r}"
|
761
|
-
for k in self.params
|
762
|
-
if (k not in params and self.params[k].required)
|
763
|
-
):
|
764
|
-
raise PipelineException(
|
765
|
-
f"Required Param on this pipeline setting does not set: "
|
766
|
-
f"{', '.join(check_key)}."
|
767
|
-
)
|
768
|
-
|
769
|
-
# NOTE: mapping type of param before adding it to params variable.
|
770
|
-
return {
|
771
|
-
"params": (
|
772
|
-
params
|
773
|
-
| {
|
774
|
-
k: self.params[k].receive(params[k])
|
775
|
-
for k in params
|
776
|
-
if k in self.params
|
777
|
-
}
|
778
|
-
),
|
779
|
-
"jobs": {},
|
780
|
-
}
|
781
|
-
|
782
|
-
def release(
|
783
|
-
self,
|
784
|
-
on: On,
|
785
|
-
params: DictData,
|
786
|
-
queue: list[datetime],
|
787
|
-
*,
|
788
|
-
waiting_sec: int = 60,
|
789
|
-
sleep_interval: int = 15,
|
790
|
-
log: Log = None,
|
791
|
-
) -> Result:
|
792
|
-
"""Start running pipeline with the on schedule in period of 30 minutes.
|
793
|
-
That mean it will still running at background 30 minutes until the
|
794
|
-
schedule matching with its time.
|
795
|
-
|
796
|
-
This method allow pipeline use log object to save the execution
|
797
|
-
result to log destination like file log to local `/logs` directory.
|
798
|
-
|
799
|
-
:param on: An on schedule value.
|
800
|
-
:param params: A pipeline parameter that pass to execute method.
|
801
|
-
:param queue: A list of release time that already running.
|
802
|
-
:param waiting_sec: A second period value that allow pipeline execute.
|
803
|
-
:param sleep_interval: A second value that want to waiting until time
|
804
|
-
to execute.
|
805
|
-
:param log: A log object that want to save execution result.
|
806
|
-
:rtype: Result
|
807
|
-
"""
|
808
|
-
log: Log = log or FileLog
|
809
|
-
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
810
|
-
gen: CronRunner = on.generate(
|
811
|
-
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
812
|
-
+ timedelta(seconds=1)
|
813
|
-
)
|
814
|
-
cron_tz: ZoneInfo = gen.tz
|
815
|
-
|
816
|
-
# NOTE: get next schedule time that generate from now.
|
817
|
-
next_time: datetime = gen.next
|
818
|
-
|
819
|
-
# NOTE: get next utils it does not logger.
|
820
|
-
while log.is_pointed(self.name, next_time, queue=queue):
|
821
|
-
next_time: datetime = gen.next
|
822
|
-
|
823
|
-
# NOTE: push this next running time to log queue
|
824
|
-
heappush(queue, next_time)
|
825
|
-
|
826
|
-
# VALIDATE: Check the different time between the next schedule time and
|
827
|
-
# now that less than waiting period (second unit).
|
828
|
-
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
829
|
-
logger.debug(
|
830
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
831
|
-
f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
832
|
-
)
|
833
|
-
|
834
|
-
# NOTE: Remove next datetime from queue.
|
835
|
-
queue.remove(next_time)
|
836
|
-
|
837
|
-
time.sleep(0.15)
|
838
|
-
return Result(
|
839
|
-
status=0,
|
840
|
-
context={
|
841
|
-
"params": params,
|
842
|
-
"poking": {"skipped": [str(on.cronjob)], "run": []},
|
843
|
-
},
|
844
|
-
)
|
845
|
-
|
846
|
-
logger.debug(
|
847
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
848
|
-
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
849
|
-
)
|
850
|
-
|
851
|
-
# NOTE: Release when the time is nearly to schedule time.
|
852
|
-
while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
|
853
|
-
sleep_interval + 5
|
854
|
-
):
|
855
|
-
logger.debug(
|
856
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
857
|
-
f"Sleep until: {duration}"
|
858
|
-
)
|
859
|
-
time.sleep(sleep_interval)
|
860
|
-
|
861
|
-
time.sleep(0.5)
|
862
|
-
|
863
|
-
# NOTE: Release parameter that use to change if params has
|
864
|
-
# templating.
|
865
|
-
release_params: DictData = {
|
866
|
-
"release": {
|
867
|
-
"logical_date": next_time,
|
868
|
-
},
|
869
|
-
}
|
870
|
-
|
871
|
-
# WARNING: Re-create pipeline object that use new running pipeline
|
872
|
-
# ID.
|
873
|
-
runner: Self = self.get_running_id(run_id=self.new_run_id)
|
874
|
-
rs: Result = runner.execute(
|
875
|
-
params=param2template(params, release_params),
|
876
|
-
)
|
877
|
-
logger.debug(
|
878
|
-
f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
|
879
|
-
f"End release {next_time:%Y-%m-%d %H:%M:%S}"
|
880
|
-
)
|
881
|
-
|
882
|
-
# NOTE: Delete a copied pipeline instance for saving memory.
|
883
|
-
del runner
|
884
|
-
|
885
|
-
rs.set_parent_run_id(self.run_id)
|
886
|
-
rs_log: Log = log.model_validate(
|
887
|
-
{
|
888
|
-
"name": self.name,
|
889
|
-
"on": str(on.cronjob),
|
890
|
-
"release": next_time,
|
891
|
-
"context": rs.context,
|
892
|
-
"parent_run_id": rs.run_id,
|
893
|
-
"run_id": rs.run_id,
|
894
|
-
}
|
895
|
-
)
|
896
|
-
# NOTE: Saving execution result to destination of the input log object.
|
897
|
-
rs_log.save(excluded=None)
|
898
|
-
|
899
|
-
queue.remove(next_time)
|
900
|
-
time.sleep(0.05)
|
901
|
-
return Result(
|
902
|
-
status=0,
|
903
|
-
context={
|
904
|
-
"params": params,
|
905
|
-
"poking": {"skipped": [], "run": [str(on.cronjob)]},
|
906
|
-
},
|
907
|
-
)
|
908
|
-
|
909
|
-
def poke(
|
910
|
-
self,
|
911
|
-
params: DictData | None = None,
|
912
|
-
*,
|
913
|
-
log: Log | None = None,
|
914
|
-
) -> list[Result]:
|
915
|
-
"""Poke pipeline with threading executor pool for executing with all its
|
916
|
-
schedules that was set on the `on` value. This method will observe its
|
917
|
-
schedule that nearing to run with the ``self.release()`` method.
|
918
|
-
|
919
|
-
:param params: A parameters that want to pass to the release method.
|
920
|
-
:param log: A log object that want to use on this poking process.
|
921
|
-
:rtype: list[Result]
|
922
|
-
"""
|
923
|
-
logger.info(
|
924
|
-
f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
|
925
|
-
)
|
926
|
-
|
927
|
-
# NOTE: If this pipeline does not set the on schedule, it will return
|
928
|
-
# empty result.
|
929
|
-
if len(self.on) == 0:
|
930
|
-
return []
|
931
|
-
|
932
|
-
params: DictData = params or {}
|
933
|
-
queue: list[datetime] = []
|
934
|
-
results: list[Result] = []
|
935
|
-
|
936
|
-
wk: int = int(os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING") or "4")
|
937
|
-
with ThreadPoolExecutor(max_workers=wk) as executor:
|
938
|
-
# TODO: If I want to run infinite loop.
|
939
|
-
futures: list[Future] = []
|
940
|
-
for on in self.on:
|
941
|
-
futures.append(
|
942
|
-
executor.submit(
|
943
|
-
self.release,
|
944
|
-
on,
|
945
|
-
params=params,
|
946
|
-
log=log,
|
947
|
-
queue=queue,
|
948
|
-
)
|
949
|
-
)
|
950
|
-
delay()
|
951
|
-
|
952
|
-
# WARNING: This poking method does not allow to use fail-fast logic
|
953
|
-
# to catching parallel execution result.
|
954
|
-
for future in as_completed(futures):
|
955
|
-
results.append(future.result(timeout=60))
|
956
|
-
|
957
|
-
if len(queue) > 0:
|
958
|
-
logger.error(
|
959
|
-
f"({self.run_id}) [POKING]: Log Queue does empty when poking "
|
960
|
-
f"process was finishing."
|
961
|
-
)
|
962
|
-
|
963
|
-
return results
|
964
|
-
|
965
|
-
def execute_job(
|
966
|
-
self,
|
967
|
-
job: str,
|
968
|
-
params: DictData,
|
969
|
-
) -> Result:
|
970
|
-
"""Job Executor that use on pipeline executor.
|
971
|
-
|
972
|
-
:param job: A job ID that want to execute.
|
973
|
-
:param params: A params that was parameterized from pipeline execution.
|
974
|
-
:rtype: Result
|
975
|
-
"""
|
976
|
-
# VALIDATE: check a job ID that exists in this pipeline or not.
|
977
|
-
if job not in self.jobs:
|
978
|
-
raise PipelineException(
|
979
|
-
f"The job ID: {job} does not exists on {self.name!r} pipeline."
|
980
|
-
)
|
981
|
-
try:
|
982
|
-
logger.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
|
983
|
-
|
984
|
-
# IMPORTANT:
|
985
|
-
# Change any job running IDs to this pipeline running ID.
|
986
|
-
job_obj: Job = self.jobs[job].get_running_id(self.run_id)
|
987
|
-
j_rs: Result = job_obj.execute(params=params)
|
988
|
-
|
989
|
-
except JobException as err:
|
990
|
-
raise PipelineException(f"{job}: JobException: {err}") from None
|
991
|
-
|
992
|
-
return Result(
|
993
|
-
status=j_rs.status,
|
994
|
-
context={job: job_obj.set_outputs(j_rs.context)},
|
995
|
-
)
|
996
|
-
|
997
|
-
def execute(
|
998
|
-
self,
|
999
|
-
params: DictData | None = None,
|
1000
|
-
*,
|
1001
|
-
timeout: int = 60,
|
1002
|
-
) -> Result:
|
1003
|
-
"""Execute pipeline with passing dynamic parameters to any jobs that
|
1004
|
-
included in the pipeline.
|
1005
|
-
|
1006
|
-
:param params: An input parameters that use on pipeline execution that
|
1007
|
-
will parameterize before using it.
|
1008
|
-
:param timeout: A pipeline execution time out in second unit that use
|
1009
|
-
for limit time of execution and waiting job dependency.
|
1010
|
-
:rtype: Result
|
1011
|
-
|
1012
|
-
See Also:
|
1013
|
-
---
|
1014
|
-
|
1015
|
-
The result of execution process for each jobs and stages on this
|
1016
|
-
pipeline will keeping in dict which able to catch out with all jobs and
|
1017
|
-
stages by dot annotation.
|
1018
|
-
|
1019
|
-
For example, when I want to use the output from previous stage, I
|
1020
|
-
can access it with syntax:
|
1021
|
-
|
1022
|
-
... ${job-name}.stages.${stage-id}.outputs.${key}
|
1023
|
-
|
1024
|
-
"""
|
1025
|
-
logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
|
1026
|
-
params: DictData = params or {}
|
1027
|
-
ts: float = time.monotonic()
|
1028
|
-
|
1029
|
-
# NOTE: It should not do anything if it does not have job.
|
1030
|
-
if not self.jobs:
|
1031
|
-
logger.warning(
|
1032
|
-
f"({self.run_id}) [PIPELINE]: This pipeline: {self.name!r} "
|
1033
|
-
f"does not have any jobs"
|
1034
|
-
)
|
1035
|
-
return Result(status=0, context=params)
|
1036
|
-
|
1037
|
-
# NOTE: Create a job queue that keep the job that want to running after
|
1038
|
-
# it dependency condition.
|
1039
|
-
jq: Queue = Queue()
|
1040
|
-
for job_id in self.jobs:
|
1041
|
-
jq.put(job_id)
|
1042
|
-
|
1043
|
-
# NOTE: Create result context that will pass this context to any
|
1044
|
-
# execution dependency.
|
1045
|
-
context: DictData = self.parameterize(params)
|
1046
|
-
try:
|
1047
|
-
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
|
1048
|
-
(
|
1049
|
-
self.__exec_non_threading(context, ts, jq, timeout=timeout)
|
1050
|
-
if worker == 1
|
1051
|
-
else self.__exec_threading(
|
1052
|
-
context, ts, jq, worker=worker, timeout=timeout
|
1053
|
-
)
|
1054
|
-
)
|
1055
|
-
return Result(status=0, context=context)
|
1056
|
-
except PipelineException as err:
|
1057
|
-
context.update(
|
1058
|
-
{"error_message": f"{err.__class__.__name__}: {err}"}
|
1059
|
-
)
|
1060
|
-
return Result(status=1, context=context)
|
1061
|
-
|
1062
|
-
def __exec_threading(
|
1063
|
-
self,
|
1064
|
-
context: DictData,
|
1065
|
-
ts: float,
|
1066
|
-
job_queue: Queue,
|
1067
|
-
*,
|
1068
|
-
worker: int = 2,
|
1069
|
-
timeout: int = 600,
|
1070
|
-
) -> DictData:
|
1071
|
-
"""Pipeline threading execution.
|
1072
|
-
|
1073
|
-
:param context: A context pipeline data that want to downstream passing.
|
1074
|
-
:param ts: A start timestamp that use for checking execute time should
|
1075
|
-
timeout.
|
1076
|
-
:param timeout: A second value unit that bounding running time.
|
1077
|
-
:param worker: A number of threading executor pool size.
|
1078
|
-
:rtype: DictData
|
1079
|
-
"""
|
1080
|
-
not_time_out_flag: bool = True
|
1081
|
-
logger.debug(
|
1082
|
-
f"({self.run_id}): [CORE]: Run {self.name} with threading job "
|
1083
|
-
f"executor"
|
1084
|
-
)
|
1085
|
-
|
1086
|
-
# IMPORTANT: The job execution can run parallel and waiting by
|
1087
|
-
# needed.
|
1088
|
-
with ThreadPoolExecutor(max_workers=worker) as executor:
|
1089
|
-
futures: list[Future] = []
|
1090
|
-
|
1091
|
-
while not job_queue.empty() and (
|
1092
|
-
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
1093
|
-
):
|
1094
|
-
job_id: str = job_queue.get()
|
1095
|
-
job: Job = self.jobs[job_id]
|
1096
|
-
|
1097
|
-
if any(need not in context["jobs"] for need in job.needs):
|
1098
|
-
job_queue.put(job_id)
|
1099
|
-
time.sleep(0.25)
|
1100
|
-
continue
|
1101
|
-
|
1102
|
-
futures.append(
|
1103
|
-
executor.submit(
|
1104
|
-
self.execute_job,
|
1105
|
-
job_id,
|
1106
|
-
params=copy.deepcopy(context),
|
1107
|
-
),
|
1108
|
-
)
|
1109
|
-
job_queue.task_done()
|
1110
|
-
|
1111
|
-
# NOTE: Wait for all items to finish processing
|
1112
|
-
job_queue.join()
|
1113
|
-
|
1114
|
-
for future in as_completed(futures):
|
1115
|
-
if err := future.exception():
|
1116
|
-
logger.error(f"{err}")
|
1117
|
-
raise PipelineException(f"{err}")
|
1118
|
-
|
1119
|
-
# NOTE: Update job result to pipeline result.
|
1120
|
-
context["jobs"].update(future.result(timeout=20).conext)
|
1121
|
-
|
1122
|
-
if not_time_out_flag:
|
1123
|
-
return context
|
1124
|
-
|
1125
|
-
# NOTE: Raise timeout error.
|
1126
|
-
logger.warning(
|
1127
|
-
f"({self.run_id}) [PIPELINE]: Execution of pipeline, {self.name!r} "
|
1128
|
-
f", was timeout"
|
1129
|
-
)
|
1130
|
-
raise PipelineException(
|
1131
|
-
f"Execution of pipeline: {self.name} was timeout"
|
1132
|
-
)
|
1133
|
-
|
1134
|
-
def __exec_non_threading(
|
1135
|
-
self,
|
1136
|
-
context: DictData,
|
1137
|
-
ts: float,
|
1138
|
-
job_queue: Queue,
|
1139
|
-
*,
|
1140
|
-
timeout: int = 600,
|
1141
|
-
) -> DictData:
|
1142
|
-
"""Pipeline non-threading execution that use sequential job running
|
1143
|
-
and waiting previous run successful.
|
1144
|
-
|
1145
|
-
:param context: A context pipeline data that want to downstream passing.
|
1146
|
-
:param ts: A start timestamp that use for checking execute time should
|
1147
|
-
timeout.
|
1148
|
-
:param timeout: A second value unit that bounding running time.
|
1149
|
-
:rtype: DictData
|
1150
|
-
"""
|
1151
|
-
not_time_out_flag: bool = True
|
1152
|
-
logger.debug(
|
1153
|
-
f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
|
1154
|
-
f"executor"
|
1155
|
-
)
|
1156
|
-
|
1157
|
-
while not job_queue.empty() and (
|
1158
|
-
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
1159
|
-
):
|
1160
|
-
job_id: str = job_queue.get()
|
1161
|
-
job: Job = self.jobs[job_id]
|
1162
|
-
|
1163
|
-
# NOTE:
|
1164
|
-
if any(need not in context["jobs"] for need in job.needs):
|
1165
|
-
job_queue.put(job_id)
|
1166
|
-
time.sleep(0.25)
|
1167
|
-
continue
|
1168
|
-
|
1169
|
-
# NOTE: Start job execution.
|
1170
|
-
job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
|
1171
|
-
context["jobs"].update(job_rs.context)
|
1172
|
-
job_queue.task_done()
|
1173
|
-
|
1174
|
-
# NOTE: Wait for all items to finish processing
|
1175
|
-
job_queue.join()
|
1176
|
-
|
1177
|
-
if not_time_out_flag:
|
1178
|
-
return context
|
1179
|
-
|
1180
|
-
# NOTE: Raise timeout error.
|
1181
|
-
logger.warning(
|
1182
|
-
f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
|
1183
|
-
)
|
1184
|
-
raise PipelineException(
|
1185
|
-
f"Execution of pipeline: {self.name} was timeout"
|
1186
|
-
)
|