ddeutil-workflow 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +9 -0
- ddeutil/workflow/__types.py +43 -1
- ddeutil/workflow/exceptions.py +13 -1
- ddeutil/workflow/loader.py +13 -115
- ddeutil/workflow/on.py +78 -26
- ddeutil/workflow/pipeline.py +341 -392
- ddeutil/workflow/{__scheduler.py → scheduler.py} +73 -45
- ddeutil/workflow/stage.py +402 -0
- ddeutil/workflow/utils.py +205 -35
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/METADATA +95 -66
- ddeutil_workflow-0.0.6.dist-info/RECORD +15 -0
- ddeutil/workflow/__regex.py +0 -44
- ddeutil/workflow/tasks/__init__.py +0 -6
- ddeutil/workflow/tasks/dummy.py +0 -52
- ddeutil_workflow-0.0.5.dist-info/RECORD +0 -17
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/top_level.txt +0 -0
ddeutil/workflow/pipeline.py
CHANGED
@@ -5,302 +5,29 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
import
|
9
|
-
import inspect
|
10
|
-
import itertools
|
8
|
+
import copy
|
11
9
|
import logging
|
12
|
-
import subprocess
|
13
|
-
import sys
|
14
10
|
import time
|
15
|
-
import uuid
|
16
|
-
from abc import ABC, abstractmethod
|
17
|
-
from inspect import Parameter
|
18
|
-
from pathlib import Path
|
19
11
|
from queue import Queue
|
20
|
-
from
|
21
|
-
from typing import Any, Callable, Optional, Union
|
12
|
+
from typing import Optional
|
22
13
|
|
23
14
|
from pydantic import BaseModel, Field
|
24
15
|
from pydantic.functional_validators import model_validator
|
25
16
|
from typing_extensions import Self
|
26
17
|
|
27
|
-
from .
|
28
|
-
from .
|
29
|
-
from .
|
30
|
-
from .
|
31
|
-
from .
|
32
|
-
|
33
|
-
|
34
|
-
class BaseStage(BaseModel, ABC):
|
35
|
-
"""Base Stage Model that keep only id and name fields for the stage
|
36
|
-
metadata. If you want to implement any custom stage, you can use this class
|
37
|
-
to parent and implement ``self.execute()`` method only.
|
38
|
-
"""
|
39
|
-
|
40
|
-
id: Optional[str] = Field(
|
41
|
-
default=None,
|
42
|
-
description=(
|
43
|
-
"The stage ID that use to keep execution output or getting by job "
|
44
|
-
"owner."
|
45
|
-
),
|
46
|
-
)
|
47
|
-
name: str = Field(
|
48
|
-
description="The stage name that want to logging when start execution."
|
49
|
-
)
|
50
|
-
|
51
|
-
@abstractmethod
|
52
|
-
def execute(self, params: DictData) -> DictData:
|
53
|
-
"""Execute abstraction method that action something by sub-model class.
|
54
|
-
This is important method that make this class is able to be the stage.
|
55
|
-
|
56
|
-
:param params: A parameter data that want to use in this execution.
|
57
|
-
:rtype: DictData
|
58
|
-
"""
|
59
|
-
raise NotImplementedError("Stage should implement ``execute`` method.")
|
60
|
-
|
61
|
-
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
62
|
-
"""Set an outputs from execution process to an input params.
|
63
|
-
|
64
|
-
:param rs: A result data that want to extract to an output key.
|
65
|
-
:param params: A context data that want to add output result.
|
66
|
-
:rtype: DictData
|
67
|
-
"""
|
68
|
-
if self.id is None:
|
69
|
-
return params
|
70
|
-
|
71
|
-
if "stages" not in params:
|
72
|
-
params["stages"] = {}
|
73
|
-
|
74
|
-
params["stages"][self.id] = {"outputs": rs}
|
75
|
-
return params
|
76
|
-
|
77
|
-
|
78
|
-
class EmptyStage(BaseStage):
|
79
|
-
"""Empty stage that do nothing (context equal empty stage) and logging the
|
80
|
-
name of stage only to stdout.
|
81
|
-
"""
|
82
|
-
|
83
|
-
def execute(self, params: DictData) -> DictData:
|
84
|
-
"""Execution method for the Empty stage that do only logging out to
|
85
|
-
stdout.
|
86
|
-
|
87
|
-
:param params: A context data that want to add output result. But this
|
88
|
-
stage does not pass any output.
|
89
|
-
"""
|
90
|
-
logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
|
91
|
-
return params
|
92
|
-
|
93
|
-
|
94
|
-
class ShellStage(BaseStage):
|
95
|
-
"""Shell stage that execute bash script on the current OS. That mean if your
|
96
|
-
current OS is Windows, it will running bash in the WSL.
|
97
|
-
"""
|
98
|
-
|
99
|
-
shell: str = Field(description="A shell statement that want to execute.")
|
100
|
-
env: DictStr = Field(
|
101
|
-
default_factory=dict,
|
102
|
-
description=(
|
103
|
-
"An environment variable mapping that want to set before execute "
|
104
|
-
"this shell statement."
|
105
|
-
),
|
106
|
-
)
|
107
|
-
|
108
|
-
@contextlib.contextmanager
|
109
|
-
def __prepare_shell(self):
|
110
|
-
"""Return context of prepared shell statement that want to execute. This
|
111
|
-
step will write the `.sh` file before giving this file name to context.
|
112
|
-
After that, it will auto delete this file automatic.
|
113
|
-
"""
|
114
|
-
f_name: str = f"{uuid.uuid4()}.sh"
|
115
|
-
f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
|
116
|
-
with open(f"./{f_name}", mode="w", newline="\n") as f:
|
117
|
-
f.write(f"#!/bin/{f_shebang}\n")
|
118
|
-
|
119
|
-
for k in self.env:
|
120
|
-
f.write(f"{k}='{self.env[k]}';\n")
|
121
|
-
|
122
|
-
# NOTE: make sure that shell script file does not have `\r` char.
|
123
|
-
f.write(self.shell.replace("\r\n", "\n"))
|
124
|
-
|
125
|
-
make_exec(f"./{f_name}")
|
126
|
-
|
127
|
-
yield [f_shebang, f_name]
|
128
|
-
|
129
|
-
Path(f_name).unlink()
|
130
|
-
|
131
|
-
def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
|
132
|
-
"""Set outputs to params"""
|
133
|
-
# NOTE: skipping set outputs of stage execution when id does not set.
|
134
|
-
if self.id is None:
|
135
|
-
return params
|
136
|
-
|
137
|
-
if "stages" not in params:
|
138
|
-
params["stages"] = {}
|
139
|
-
|
140
|
-
params["stages"][self.id] = {
|
141
|
-
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
142
|
-
"outputs": {
|
143
|
-
"return_code": rs.returncode,
|
144
|
-
"stdout": rs.stdout.rstrip("\n"),
|
145
|
-
},
|
146
|
-
}
|
147
|
-
return params
|
148
|
-
|
149
|
-
def execute(self, params: DictData) -> DictData:
|
150
|
-
"""Execute the Shell & Powershell statement with the Python build-in
|
151
|
-
``subprocess`` package.
|
152
|
-
"""
|
153
|
-
with self.__prepare_shell() as sh:
|
154
|
-
logging.info(f"[STAGE]: Shell-Execute: {sh}")
|
155
|
-
rs: CompletedProcess = subprocess.run(
|
156
|
-
sh,
|
157
|
-
shell=False,
|
158
|
-
capture_output=True,
|
159
|
-
text=True,
|
160
|
-
)
|
161
|
-
if rs.returncode > 0:
|
162
|
-
err: str = (
|
163
|
-
rs.stderr.encode("utf-8").decode("utf-16")
|
164
|
-
if "\\x00" in rs.stderr
|
165
|
-
else rs.stderr
|
166
|
-
)
|
167
|
-
logging.error(f"{err}\nRunning Statement:\n---\n{self.shell}")
|
168
|
-
raise TaskException(f"{err}\nRunning Statement:\n---\n{self.shell}")
|
169
|
-
self.set_outputs(rs, params)
|
170
|
-
return params
|
171
|
-
|
172
|
-
|
173
|
-
class PyStage(BaseStage):
|
174
|
-
"""Python executor stage that running the Python statement that receive
|
175
|
-
globals nad additional variables.
|
176
|
-
"""
|
177
|
-
|
178
|
-
run: str
|
179
|
-
vars: DictData = Field(default_factory=dict)
|
180
|
-
|
181
|
-
def get_vars(self, params: DictData) -> DictData:
|
182
|
-
"""Return variables"""
|
183
|
-
rs = self.vars.copy()
|
184
|
-
for p, v in self.vars.items():
|
185
|
-
rs[p] = map_params(v, params)
|
186
|
-
return rs
|
187
|
-
|
188
|
-
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
189
|
-
"""Set an outputs from execution process to an input params.
|
190
|
-
|
191
|
-
:param rs: A result data that want to extract to an output key.
|
192
|
-
:param params: A context data that want to add output result.
|
193
|
-
:rtype: DictData
|
194
|
-
"""
|
195
|
-
# NOTE: skipping set outputs of stage execution when id does not set.
|
196
|
-
if self.id is None:
|
197
|
-
return params
|
198
|
-
|
199
|
-
if "stages" not in params:
|
200
|
-
params["stages"] = {}
|
201
|
-
|
202
|
-
params["stages"][self.id] = {
|
203
|
-
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
204
|
-
"outputs": {k: rs[k] for k in rs if k != "__annotations__"},
|
205
|
-
}
|
206
|
-
return params
|
207
|
-
|
208
|
-
def execute(self, params: DictData) -> DictData:
|
209
|
-
"""Execute the Python statement that pass all globals and input params
|
210
|
-
to globals argument on ``exec`` build-in function.
|
211
|
-
|
212
|
-
:param params: A parameter that want to pass before run any statement.
|
213
|
-
:type params: DictData
|
214
|
-
|
215
|
-
:rtype: DictData
|
216
|
-
:returns: A parameters from an input that was mapped output if the stage
|
217
|
-
ID was set.
|
218
|
-
"""
|
219
|
-
_globals: DictData = globals() | params | self.get_vars(params)
|
220
|
-
_locals: DictData = {}
|
221
|
-
try:
|
222
|
-
exec(map_params(self.run, params), _globals, _locals)
|
223
|
-
except Exception as err:
|
224
|
-
raise TaskException(
|
225
|
-
f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
|
226
|
-
f"{self.run}"
|
227
|
-
) from None
|
228
|
-
|
229
|
-
# NOTE: set outputs from ``_locals`` value from ``exec``.
|
230
|
-
self.set_outputs(_locals, params)
|
231
|
-
return params | {k: _globals[k] for k in params if k in _globals}
|
232
|
-
|
233
|
-
|
234
|
-
class TaskStage(BaseStage):
|
235
|
-
"""Task executor stage that running the Python function."""
|
236
|
-
|
237
|
-
task: str
|
238
|
-
args: DictData
|
239
|
-
|
240
|
-
@staticmethod
|
241
|
-
def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
|
242
|
-
"""Extract Task string value to task function."""
|
243
|
-
if not (found := RegexConf.RE_TASK_FMT.search(task)):
|
244
|
-
raise ValueError("Task does not match with task format regex.")
|
245
|
-
tasks: TaskSearch = TaskSearch(**found.groupdict())
|
246
|
-
|
247
|
-
# NOTE: Registry object should implement on this package only.
|
248
|
-
# TODO: This prefix value to search registry should dynamic with
|
249
|
-
# config file.
|
250
|
-
rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
|
251
|
-
if tasks.func not in rgt:
|
252
|
-
raise NotImplementedError(
|
253
|
-
f"ddeutil.workflow.{tasks.path}.registries does not "
|
254
|
-
f"implement registry: {tasks.func}."
|
255
|
-
)
|
256
|
-
|
257
|
-
if tasks.tag not in rgt[tasks.func]:
|
258
|
-
raise NotImplementedError(
|
259
|
-
f"tag: {tasks.tag} does not found on registry func: "
|
260
|
-
f"ddeutil.workflow.{tasks.path}.registries."
|
261
|
-
f"{tasks.func}"
|
262
|
-
)
|
263
|
-
return rgt[tasks.func][tasks.tag]
|
264
|
-
|
265
|
-
def execute(self, params: DictData) -> DictData:
|
266
|
-
"""Execute the Task function."""
|
267
|
-
task_caller = self.extract_task(self.task)()
|
268
|
-
if not callable(task_caller):
|
269
|
-
raise ImportError("Task caller function does not callable.")
|
270
|
-
|
271
|
-
# NOTE: check task caller parameters
|
272
|
-
ips = inspect.signature(task_caller)
|
273
|
-
if any(
|
274
|
-
k not in self.args
|
275
|
-
for k in ips.parameters
|
276
|
-
if ips.parameters[k].default == Parameter.empty
|
277
|
-
):
|
278
|
-
raise ValueError(
|
279
|
-
f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
|
280
|
-
f"does not set to args"
|
281
|
-
)
|
282
|
-
try:
|
283
|
-
rs = task_caller(**map_params(self.args, params))
|
284
|
-
except Exception as err:
|
285
|
-
raise TaskException(f"{err.__class__.__name__}: {err}") from err
|
286
|
-
self.set_outputs(rs, params)
|
287
|
-
return params
|
288
|
-
|
289
|
-
|
290
|
-
# NOTE: Order of parsing stage data
|
291
|
-
Stage = Union[
|
292
|
-
PyStage,
|
293
|
-
ShellStage,
|
294
|
-
TaskStage,
|
295
|
-
EmptyStage,
|
296
|
-
]
|
18
|
+
from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
|
19
|
+
from .exceptions import JobException, PipelineException
|
20
|
+
from .loader import Loader
|
21
|
+
from .on import On
|
22
|
+
from .stage import Stage
|
23
|
+
from .utils import Param, Result, cross_product, dash2underscore, gen_id
|
297
24
|
|
298
25
|
|
299
26
|
class Strategy(BaseModel):
|
300
27
|
"""Strategy Model that will combine a matrix together for running the
|
301
28
|
special job.
|
302
29
|
|
303
|
-
|
30
|
+
Data Validate:
|
304
31
|
>>> strategy = {
|
305
32
|
... 'matrix': {
|
306
33
|
... 'first': [1, 2, 3],
|
@@ -313,34 +40,114 @@ class Strategy(BaseModel):
|
|
313
40
|
|
314
41
|
fail_fast: bool = Field(default=False)
|
315
42
|
max_parallel: int = Field(default=-1)
|
316
|
-
matrix:
|
317
|
-
include:
|
318
|
-
|
43
|
+
matrix: Matrix = Field(default_factory=dict)
|
44
|
+
include: MatrixInclude = Field(
|
45
|
+
default_factory=list,
|
46
|
+
description="A list of additional matrix that want to adds-in.",
|
47
|
+
)
|
48
|
+
exclude: MatrixExclude = Field(
|
49
|
+
default_factory=list,
|
50
|
+
description="A list of exclude matrix that want to filter-out.",
|
51
|
+
)
|
319
52
|
|
320
53
|
@model_validator(mode="before")
|
321
54
|
def __prepare_keys(cls, values: DictData) -> DictData:
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
55
|
+
"""Rename key that use dash to underscore because Python does not
|
56
|
+
support this character exist in any variable name.
|
57
|
+
"""
|
58
|
+
dash2underscore("max-parallel", values)
|
59
|
+
dash2underscore("fail-fast", values)
|
326
60
|
return values
|
327
61
|
|
62
|
+
def make(self) -> list[DictStr]:
|
63
|
+
"""Return List of product of matrix values that already filter with
|
64
|
+
exclude and add include.
|
65
|
+
|
66
|
+
:rtype: list[DictStr]
|
67
|
+
"""
|
68
|
+
# NOTE: If it does not set matrix, it will return list of an empty dict.
|
69
|
+
if not (mt := self.matrix):
|
70
|
+
return [{}]
|
71
|
+
|
72
|
+
final: list[DictStr] = []
|
73
|
+
for r in cross_product(matrix=mt):
|
74
|
+
if any(
|
75
|
+
all(r[k] == v for k, v in exclude.items())
|
76
|
+
for exclude in self.exclude
|
77
|
+
):
|
78
|
+
continue
|
79
|
+
final.append(r)
|
80
|
+
|
81
|
+
# NOTE: If it is empty matrix and include, it will return list of an
|
82
|
+
# empty dict.
|
83
|
+
if not final and not self.include:
|
84
|
+
return [{}]
|
85
|
+
|
86
|
+
# NOTE: Add include to generated matrix with exclude list.
|
87
|
+
add: list[DictStr] = []
|
88
|
+
for include in self.include:
|
89
|
+
# VALIDATE:
|
90
|
+
# Validate any key in include list should be a subset of some one
|
91
|
+
# in matrix.
|
92
|
+
if all(not (set(include.keys()) <= set(m.keys())) for m in final):
|
93
|
+
raise ValueError("Include should have the keys equal to matrix")
|
94
|
+
|
95
|
+
# VALIDATE:
|
96
|
+
# Validate value of include does not duplicate with generated
|
97
|
+
# matrix.
|
98
|
+
if any(
|
99
|
+
all(include.get(k) == v for k, v in m.items())
|
100
|
+
for m in [*final, *add]
|
101
|
+
):
|
102
|
+
continue
|
103
|
+
add.append(include)
|
104
|
+
final.extend(add)
|
105
|
+
return final
|
106
|
+
|
328
107
|
|
329
108
|
class Job(BaseModel):
|
330
|
-
"""Job Model
|
109
|
+
"""Job Model (group of stages).
|
110
|
+
|
111
|
+
This job model allow you to use for-loop that call matrix strategy. If
|
112
|
+
you pass matrix mapping and it able to generate, you will see it running
|
113
|
+
with loop of matrix values.
|
114
|
+
|
115
|
+
Data Validate:
|
116
|
+
>>> job = {
|
117
|
+
... "runs-on": None,
|
118
|
+
... "strategy": {},
|
119
|
+
... "needs": [],
|
120
|
+
... "stages": [
|
121
|
+
... {
|
122
|
+
... "name": "Some stage",
|
123
|
+
... "run": "print('Hello World')",
|
124
|
+
... },
|
125
|
+
... ],
|
126
|
+
... }
|
127
|
+
"""
|
331
128
|
|
129
|
+
name: Optional[str] = Field(default=None)
|
130
|
+
desc: Optional[str] = Field(default=None)
|
332
131
|
runs_on: Optional[str] = Field(default=None)
|
333
|
-
stages: list[Stage] = Field(
|
132
|
+
stages: list[Stage] = Field(
|
133
|
+
default_factory=list,
|
134
|
+
description="A list of Stage of this job.",
|
135
|
+
)
|
334
136
|
needs: list[str] = Field(
|
335
137
|
default_factory=list,
|
336
138
|
description="A list of the job ID that want to run before this job.",
|
337
139
|
)
|
338
|
-
strategy: Strategy = Field(
|
140
|
+
strategy: Strategy = Field(
|
141
|
+
default_factory=Strategy,
|
142
|
+
description="A strategy matrix that want to generate.",
|
143
|
+
)
|
339
144
|
|
340
145
|
@model_validator(mode="before")
|
341
146
|
def __prepare_keys(cls, values: DictData) -> DictData:
|
342
|
-
|
343
|
-
|
147
|
+
"""Rename key that use dash to underscore because Python does not
|
148
|
+
support this character exist in any variable name.
|
149
|
+
"""
|
150
|
+
dash2underscore("runs-on", values)
|
344
151
|
return values
|
345
152
|
|
346
153
|
def stage(self, stage_id: str) -> Stage:
|
@@ -350,57 +157,118 @@ class Job(BaseModel):
|
|
350
157
|
return stage
|
351
158
|
raise ValueError(f"Stage ID {stage_id} does not exists")
|
352
159
|
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
if not (mt := self.strategy.matrix):
|
358
|
-
return [{}]
|
359
|
-
final: list[DictStr] = []
|
360
|
-
for r in [
|
361
|
-
{_k: _v for e in mapped for _k, _v in e.items()}
|
362
|
-
for mapped in itertools.product(
|
363
|
-
*[[{k: v} for v in vs] for k, vs in mt.items()]
|
364
|
-
)
|
365
|
-
]:
|
366
|
-
if any(
|
367
|
-
all(r[k] == v for k, v in exclude.items())
|
368
|
-
for exclude in self.strategy.exclude
|
369
|
-
):
|
370
|
-
continue
|
371
|
-
final.append(r)
|
160
|
+
@staticmethod
|
161
|
+
def set_outputs(output: DictData) -> DictData:
|
162
|
+
if len(output) > 1:
|
163
|
+
return {"strategies": output}
|
372
164
|
|
373
|
-
|
374
|
-
return [{}]
|
165
|
+
return output[next(iter(output))]
|
375
166
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
167
|
+
def strategy_execute(self, strategy: DictData, params: DictData) -> Result:
|
168
|
+
context: DictData = {}
|
169
|
+
context.update(params)
|
170
|
+
context.update({"matrix": strategy})
|
171
|
+
|
172
|
+
for stage in self.stages:
|
173
|
+
_st_name: str = stage.id or stage.name
|
174
|
+
|
175
|
+
if stage.is_skip(params=context):
|
176
|
+
logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
|
380
177
|
continue
|
381
|
-
|
382
|
-
|
178
|
+
logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
|
179
|
+
|
180
|
+
rs: Result = stage.execute(params=context)
|
181
|
+
if rs.status == 0:
|
182
|
+
stage.set_outputs(rs.context, params=context)
|
183
|
+
else:
|
184
|
+
raise JobException(
|
185
|
+
f"Getting status does not equal zero on stage: "
|
186
|
+
f"{stage.name}."
|
187
|
+
)
|
188
|
+
return Result(
|
189
|
+
status=0,
|
190
|
+
context={
|
191
|
+
gen_id(strategy): {
|
192
|
+
"matrix": strategy,
|
193
|
+
"stages": context.pop("stages", {}),
|
194
|
+
},
|
195
|
+
},
|
196
|
+
)
|
197
|
+
|
198
|
+
def execute(self, params: DictData | None = None) -> Result:
|
199
|
+
"""Job execution with passing dynamic parameters from the pipeline
|
200
|
+
execution. It will generate matrix values at the first step and for-loop
|
201
|
+
any metrix to all stages dependency.
|
202
|
+
|
203
|
+
:param params: An input parameters that use on job execution.
|
204
|
+
:rtype: Result
|
205
|
+
"""
|
206
|
+
strategy_context: DictData = {}
|
207
|
+
for strategy in self.strategy.make():
|
383
208
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
209
|
+
# NOTE: Create strategy context and update matrix and params to this
|
210
|
+
# context. So, the context will have structure like;
|
211
|
+
# ---
|
212
|
+
# {
|
213
|
+
# "params": { ... }, <== Current input params
|
214
|
+
# "jobs": { ... },
|
215
|
+
# "matrix": { ... } <== Current strategy value
|
216
|
+
# }
|
217
|
+
#
|
218
|
+
context: DictData = {}
|
219
|
+
context.update(params)
|
220
|
+
context.update({"matrix": strategy})
|
388
221
|
|
222
|
+
# TODO: we should add option for ``wait_as_complete`` for release
|
223
|
+
# a stage execution to run on background (multi-thread).
|
224
|
+
# ---
|
225
|
+
# >>> from concurrency
|
226
|
+
#
|
389
227
|
# IMPORTANT: The stage execution only run sequentially one-by-one.
|
390
228
|
for stage in self.stages:
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
229
|
+
_st_name: str = stage.id or stage.name
|
230
|
+
|
231
|
+
if stage.is_skip(params=context):
|
232
|
+
logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
|
233
|
+
continue
|
234
|
+
logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
|
235
|
+
|
236
|
+
# NOTE: Logging a matrix that pass on this stage execution.
|
237
|
+
if strategy:
|
238
|
+
logging.info(f"[...]: Matrix: {strategy}")
|
395
239
|
|
396
240
|
# NOTE:
|
397
241
|
# I do not use below syntax because `params` dict be the
|
398
242
|
# reference memory pointer and it was changed when I action
|
399
243
|
# anything like update or re-construct this.
|
244
|
+
#
|
400
245
|
# ... params |= stage.execute(params=params)
|
401
|
-
|
402
|
-
|
403
|
-
|
246
|
+
#
|
247
|
+
# This step will add the stage result to ``stages`` key in
|
248
|
+
# that stage id. It will have structure like;
|
249
|
+
# ---
|
250
|
+
# {
|
251
|
+
# "params": { ... },
|
252
|
+
# "jobs": { ... },
|
253
|
+
# "matrix": { ... },
|
254
|
+
# "stages": { { "stage-id-1": ... }, ... }
|
255
|
+
# }
|
256
|
+
#
|
257
|
+
rs: Result = stage.execute(params=context)
|
258
|
+
if rs.status == 0:
|
259
|
+
stage.set_outputs(rs.context, params=context)
|
260
|
+
else:
|
261
|
+
raise JobException(
|
262
|
+
f"Getting status does not equal zero on stage: "
|
263
|
+
f"{stage.name}."
|
264
|
+
)
|
265
|
+
|
266
|
+
strategy_context[gen_id(strategy)] = {
|
267
|
+
"matrix": strategy,
|
268
|
+
"stages": context.pop("stages", {}),
|
269
|
+
}
|
270
|
+
|
271
|
+
return Result(status=0, context=strategy_context)
|
404
272
|
|
405
273
|
|
406
274
|
class Pipeline(BaseModel):
|
@@ -409,13 +277,73 @@ class Pipeline(BaseModel):
|
|
409
277
|
coding line to execute it.
|
410
278
|
"""
|
411
279
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
280
|
+
name: str = Field(description="A pipeline name.")
|
281
|
+
desc: Optional[str] = Field(
|
282
|
+
default=None,
|
283
|
+
description=(
|
284
|
+
"A pipeline description that is able to be string of markdown "
|
285
|
+
"content."
|
286
|
+
),
|
287
|
+
)
|
288
|
+
params: dict[str, Param] = Field(
|
289
|
+
default_factory=dict,
|
290
|
+
description="A parameters that want to use on this pipeline.",
|
291
|
+
)
|
292
|
+
on: list[On] = Field(
|
293
|
+
default_factory=list,
|
294
|
+
description="A list of On instance for this pipeline schedule.",
|
295
|
+
)
|
296
|
+
jobs: dict[str, Job] = Field(
|
297
|
+
default_factory=dict,
|
298
|
+
description="A mapping of job ID and job model that already loaded.",
|
299
|
+
)
|
300
|
+
|
301
|
+
@classmethod
|
302
|
+
def from_loader(
|
303
|
+
cls,
|
304
|
+
name: str,
|
305
|
+
externals: DictData | None = None,
|
306
|
+
) -> Self:
|
307
|
+
"""Create Pipeline instance from the Loader object.
|
308
|
+
|
309
|
+
:param name: A pipeline name that want to pass to Loader object.
|
310
|
+
:param externals: An external parameters that want to pass to Loader
|
311
|
+
object.
|
312
|
+
"""
|
313
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
314
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
315
|
+
|
316
|
+
# NOTE: Add name to loader data
|
317
|
+
loader_data["name"] = name.replace(" ", "_")
|
318
|
+
|
319
|
+
if "jobs" not in loader_data:
|
320
|
+
raise ValueError("Config does not set ``jobs`` value")
|
321
|
+
|
322
|
+
# NOTE: Prepare `on` data
|
323
|
+
cls.__bypass_on(loader_data)
|
324
|
+
return cls.model_validate(loader_data)
|
325
|
+
|
326
|
+
@classmethod
|
327
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
328
|
+
"""Bypass the on data to loaded config data."""
|
329
|
+
if on := data.pop("on", []):
|
330
|
+
if isinstance(on, str):
|
331
|
+
on = [on]
|
332
|
+
if any(not isinstance(i, (dict, str)) for i in on):
|
333
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
334
|
+
data["on"] = [
|
335
|
+
(
|
336
|
+
Loader(n, externals=(externals or {})).data
|
337
|
+
if isinstance(n, str)
|
338
|
+
else n
|
339
|
+
)
|
340
|
+
for n in on
|
341
|
+
]
|
342
|
+
return data
|
416
343
|
|
417
344
|
@model_validator(mode="before")
|
418
345
|
def __prepare_params(cls, values: DictData) -> DictData:
|
346
|
+
"""Prepare the params key."""
|
419
347
|
# NOTE: Prepare params type if it passing with only type value.
|
420
348
|
if params := values.pop("params", {}):
|
421
349
|
values["params"] = {
|
@@ -428,25 +356,6 @@ class Pipeline(BaseModel):
|
|
428
356
|
}
|
429
357
|
return values
|
430
358
|
|
431
|
-
@classmethod
|
432
|
-
def from_loader(
|
433
|
-
cls,
|
434
|
-
name: str,
|
435
|
-
externals: DictData | None = None,
|
436
|
-
) -> Self:
|
437
|
-
"""Create Pipeline instance from the Loader object."""
|
438
|
-
loader: Loader = Loader(name, externals=(externals or {}))
|
439
|
-
if "jobs" not in loader.data:
|
440
|
-
raise ValueError("Config does not set ``jobs`` value")
|
441
|
-
return cls(
|
442
|
-
jobs=loader.data["jobs"],
|
443
|
-
params=loader.data["params"],
|
444
|
-
)
|
445
|
-
|
446
|
-
@model_validator(mode="after")
|
447
|
-
def job_checking_needs(self):
|
448
|
-
return self
|
449
|
-
|
450
359
|
def job(self, name: str) -> Job:
|
451
360
|
"""Return Job model that exists on this pipeline.
|
452
361
|
|
@@ -460,17 +369,51 @@ class Pipeline(BaseModel):
|
|
460
369
|
raise ValueError(f"Job {name!r} does not exists")
|
461
370
|
return self.jobs[name]
|
462
371
|
|
372
|
+
def parameterize(self, params: DictData) -> DictData:
|
373
|
+
"""Prepare parameters before passing to execution process. This method
|
374
|
+
will create jobs key to params mapping that will keep any result from
|
375
|
+
job execution.
|
376
|
+
|
377
|
+
:param params: A parameter mapping that receive from pipeline execution.
|
378
|
+
"""
|
379
|
+
# VALIDATE: Incoming params should have keys that set on this pipeline.
|
380
|
+
if check_key := tuple(
|
381
|
+
f"{k!r}"
|
382
|
+
for k in self.params
|
383
|
+
if (k not in params and self.params[k].required)
|
384
|
+
):
|
385
|
+
raise ValueError(
|
386
|
+
f"Required Param on this pipeline setting does not set: "
|
387
|
+
f"{', '.join(check_key)}."
|
388
|
+
)
|
389
|
+
|
390
|
+
# NOTE: mapping type of param before adding it to params variable.
|
391
|
+
return {
|
392
|
+
"params": (
|
393
|
+
params
|
394
|
+
| {
|
395
|
+
k: self.params[k].receive(params[k])
|
396
|
+
for k in params
|
397
|
+
if k in self.params
|
398
|
+
}
|
399
|
+
),
|
400
|
+
"jobs": {},
|
401
|
+
}
|
402
|
+
|
463
403
|
def execute(
|
464
404
|
self,
|
465
405
|
params: DictData | None = None,
|
466
|
-
|
467
|
-
|
406
|
+
*,
|
407
|
+
timeout: int = 60,
|
408
|
+
) -> Result:
|
468
409
|
"""Execute pipeline with passing dynamic parameters to any jobs that
|
469
410
|
included in the pipeline.
|
470
411
|
|
471
|
-
:param params: An input parameters that use on pipeline execution
|
472
|
-
|
473
|
-
|
412
|
+
:param params: An input parameters that use on pipeline execution that
|
413
|
+
will parameterize before using it.
|
414
|
+
:param timeout: A pipeline execution time out in second unit that use
|
415
|
+
for limit time of execution and waiting job dependency.
|
416
|
+
:rtype: Result
|
474
417
|
|
475
418
|
---
|
476
419
|
|
@@ -483,44 +426,36 @@ class Pipeline(BaseModel):
|
|
483
426
|
For example, when I want to use the output from previous stage, I
|
484
427
|
can access it with syntax:
|
485
428
|
|
486
|
-
...
|
429
|
+
... ${job-name}.stages.${stage-id}.outputs.${key}
|
487
430
|
|
488
431
|
"""
|
432
|
+
logging.info(
|
433
|
+
f"[CORE]: Start Pipeline {self.name}:"
|
434
|
+
f"{gen_id(self.name, unique=True)}"
|
435
|
+
)
|
489
436
|
params: DictData = params or {}
|
490
|
-
if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
|
491
|
-
raise ValueError(
|
492
|
-
f"Parameters that needed on pipeline does not pass: "
|
493
|
-
f"{', '.join(check_key)}."
|
494
|
-
)
|
495
437
|
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
params: DictData = {
|
501
|
-
"params": (
|
502
|
-
params
|
503
|
-
| {
|
504
|
-
k: self.params[k].receive(params[k])
|
505
|
-
for k in params
|
506
|
-
if k in self.params
|
507
|
-
}
|
508
|
-
),
|
509
|
-
"jobs": {},
|
510
|
-
}
|
438
|
+
# NOTE: It should not do anything if it does not have job.
|
439
|
+
if not self.jobs:
|
440
|
+
logging.warning("[PIPELINE]: This pipeline does not have any jobs")
|
441
|
+
return Result(status=0, context=params)
|
511
442
|
|
512
443
|
# NOTE: create a job queue that keep the job that want to running after
|
513
444
|
# it dependency condition.
|
514
|
-
jq = Queue()
|
445
|
+
jq: Queue = Queue()
|
515
446
|
for job_id in self.jobs:
|
516
447
|
jq.put(job_id)
|
517
448
|
|
518
449
|
ts: float = time.monotonic()
|
519
|
-
not_time_out_flag = True
|
450
|
+
not_time_out_flag: bool = True
|
451
|
+
|
452
|
+
# NOTE: Create result context that will pass this context to any
|
453
|
+
# execution dependency.
|
454
|
+
rs: Result = Result(context=self.parameterize(params))
|
520
455
|
|
521
456
|
# IMPORTANT: The job execution can run parallel and waiting by needed.
|
522
457
|
while not jq.empty() and (
|
523
|
-
not_time_out_flag := ((time.monotonic() - ts) <
|
458
|
+
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
524
459
|
):
|
525
460
|
job_id: str = jq.get()
|
526
461
|
logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
|
@@ -534,15 +469,29 @@ class Pipeline(BaseModel):
|
|
534
469
|
# >>> import multiprocessing
|
535
470
|
# >>> with multiprocessing.Pool(processes=3) as pool:
|
536
471
|
# ... results = pool.starmap(merge_names, ('', '', ...))
|
472
|
+
# ---
|
473
|
+
# This case we use multi-process because I want to split usage of
|
474
|
+
# data in this level, that mean the data that push to parallel job
|
475
|
+
# should not use across another job.
|
537
476
|
#
|
538
|
-
if any(
|
477
|
+
if any(rs.context["jobs"].get(need) for need in job.needs):
|
539
478
|
jq.put(job_id)
|
540
479
|
|
541
|
-
job
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
480
|
+
# NOTE: copy current the result context for reference other job
|
481
|
+
# context.
|
482
|
+
job_context: DictData = copy.deepcopy(rs.context)
|
483
|
+
job_rs: Result = job.execute(params=job_context)
|
484
|
+
if job_rs.status == 0:
|
485
|
+
# NOTE: Receive output of job execution.
|
486
|
+
rs.context["jobs"][job_id] = job.set_outputs(job_rs.context)
|
487
|
+
else:
|
488
|
+
raise PipelineException(
|
489
|
+
f"Getting status does not equal zero on job: {job_id}."
|
490
|
+
)
|
491
|
+
|
546
492
|
if not not_time_out_flag:
|
547
|
-
|
548
|
-
|
493
|
+
logging.warning("Execution of pipeline was time out")
|
494
|
+
rs.status = 1
|
495
|
+
return rs
|
496
|
+
rs.status = 0
|
497
|
+
return rs
|