ddeutil-workflow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +9 -0
- ddeutil/workflow/__types.py +43 -1
- ddeutil/workflow/exceptions.py +13 -1
- ddeutil/workflow/loader.py +16 -110
- ddeutil/workflow/on.py +195 -0
- ddeutil/workflow/pipeline.py +351 -371
- ddeutil/workflow/{vendors/__schedule.py → scheduler.py} +222 -176
- ddeutil/workflow/stage.py +402 -0
- ddeutil/workflow/utils.py +219 -28
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/METADATA +118 -90
- ddeutil_workflow-0.0.6.dist-info/RECORD +15 -0
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/WHEEL +1 -1
- ddeutil/workflow/__regex.py +0 -44
- ddeutil/workflow/conn.py +0 -240
- ddeutil/workflow/schedule.py +0 -82
- ddeutil/workflow/tasks/__init__.py +0 -6
- ddeutil/workflow/tasks/_pandas.py +0 -54
- ddeutil/workflow/tasks/_polars.py +0 -92
- ddeutil/workflow/vendors/__dataset.py +0 -127
- ddeutil/workflow/vendors/__dict.py +0 -333
- ddeutil/workflow/vendors/__init__.py +0 -0
- ddeutil/workflow/vendors/aws.py +0 -185
- ddeutil/workflow/vendors/az.py +0 -0
- ddeutil/workflow/vendors/minio.py +0 -11
- ddeutil/workflow/vendors/pd.py +0 -13
- ddeutil/workflow/vendors/pg.py +0 -11
- ddeutil/workflow/vendors/pl.py +0 -172
- ddeutil/workflow/vendors/sftp.py +0 -209
- ddeutil_workflow-0.0.4.dist-info/RECORD +0 -29
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/top_level.txt +0 -0
ddeutil/workflow/pipeline.py
CHANGED
@@ -5,284 +5,29 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
import
|
9
|
-
import inspect
|
10
|
-
import itertools
|
8
|
+
import copy
|
11
9
|
import logging
|
12
|
-
import subprocess
|
13
|
-
import sys
|
14
10
|
import time
|
15
|
-
import uuid
|
16
|
-
from abc import ABC, abstractmethod
|
17
|
-
from inspect import Parameter
|
18
|
-
from pathlib import Path
|
19
11
|
from queue import Queue
|
20
|
-
from
|
21
|
-
from typing import Any, Callable, Optional, Union
|
12
|
+
from typing import Optional
|
22
13
|
|
23
|
-
import msgspec as spec
|
24
14
|
from pydantic import BaseModel, Field
|
25
15
|
from pydantic.functional_validators import model_validator
|
26
16
|
from typing_extensions import Self
|
27
17
|
|
28
|
-
from .
|
29
|
-
from .
|
30
|
-
from .
|
31
|
-
from .
|
32
|
-
from .
|
33
|
-
|
34
|
-
|
35
|
-
class BaseStage(BaseModel, ABC):
|
36
|
-
"""Base Stage Model that keep only id and name fields."""
|
37
|
-
|
38
|
-
id: Optional[str] = Field(
|
39
|
-
default=None,
|
40
|
-
description=(
|
41
|
-
"The stage ID that use to keep execution output or getting by job "
|
42
|
-
"owner."
|
43
|
-
),
|
44
|
-
)
|
45
|
-
name: str = Field(
|
46
|
-
description="The stage name that want to logging when start execution."
|
47
|
-
)
|
48
|
-
|
49
|
-
@abstractmethod
|
50
|
-
def execute(self, params: DictData) -> DictData:
|
51
|
-
"""Execute abstraction method that action something by sub-model class.
|
52
|
-
|
53
|
-
:param params: A parameter data that want to use in this execution.
|
54
|
-
"""
|
55
|
-
raise NotImplementedError("Stage should implement ``execute`` method.")
|
56
|
-
|
57
|
-
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
58
|
-
"""Set an outputs from execution process to an input params."""
|
59
|
-
if self.id is None:
|
60
|
-
return params
|
61
|
-
|
62
|
-
if "stages" not in params:
|
63
|
-
params["stages"] = {}
|
64
|
-
|
65
|
-
params["stages"][self.id] = {"outputs": rs}
|
66
|
-
return params
|
67
|
-
|
68
|
-
|
69
|
-
class EmptyStage(BaseStage):
|
70
|
-
"""Empty stage that is doing nothing and logging the name of stage only."""
|
71
|
-
|
72
|
-
def execute(self, params: DictData) -> DictData:
|
73
|
-
"""Execute for the Empty stage that do only logging out."""
|
74
|
-
logging.info(f"Execute: {self.name!r}")
|
75
|
-
return params
|
76
|
-
|
77
|
-
|
78
|
-
class ShellStage(BaseStage):
|
79
|
-
"""Shell statement stage."""
|
80
|
-
|
81
|
-
shell: str
|
82
|
-
env: DictStr = Field(default_factory=dict)
|
83
|
-
|
84
|
-
@staticmethod
|
85
|
-
@contextlib.contextmanager
|
86
|
-
def __prepare_shell(shell: str):
|
87
|
-
"""Return context of prepared shell statement that want to execute. This
|
88
|
-
step will write the `.sh` file before giving this file name to context.
|
89
|
-
After that, it will auto delete this file automatic.
|
90
|
-
|
91
|
-
:param shell: A shell statement that want to prepare.
|
92
|
-
"""
|
93
|
-
f_name: str = f"{uuid.uuid4()}.sh"
|
94
|
-
f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
|
95
|
-
with open(f"./{f_name}", mode="w", newline="\n") as f:
|
96
|
-
f.write(f"#!/bin/{f_shebang}\n")
|
97
|
-
|
98
|
-
# NOTE: make sure that shell script file does not have `\r` char.
|
99
|
-
f.write(shell.replace("\r\n", "\n"))
|
100
|
-
|
101
|
-
make_exec(f"./{f_name}")
|
102
|
-
|
103
|
-
yield [f_shebang, f_name]
|
104
|
-
|
105
|
-
Path(f_name).unlink()
|
106
|
-
|
107
|
-
def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
|
108
|
-
"""Set outputs to params"""
|
109
|
-
# NOTE: skipping set outputs of stage execution when id does not set.
|
110
|
-
if self.id is None:
|
111
|
-
return params
|
112
|
-
|
113
|
-
if "stages" not in params:
|
114
|
-
params["stages"] = {}
|
115
|
-
|
116
|
-
params["stages"][self.id] = {
|
117
|
-
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
118
|
-
"outputs": {
|
119
|
-
"return_code": rs.returncode,
|
120
|
-
"stdout": rs.stdout.rstrip("\n"),
|
121
|
-
},
|
122
|
-
}
|
123
|
-
return params
|
124
|
-
|
125
|
-
def execute(self, params: DictData) -> DictData:
|
126
|
-
"""Execute the Shell & Powershell statement with the Python build-in
|
127
|
-
``subprocess`` package.
|
128
|
-
"""
|
129
|
-
with self.__prepare_shell(self.shell) as sh:
|
130
|
-
with open(sh[-1]) as f:
|
131
|
-
logging.debug(f.read())
|
132
|
-
logging.info(f"Shell-Execute: {sh}")
|
133
|
-
rs: CompletedProcess = subprocess.run(
|
134
|
-
sh,
|
135
|
-
shell=False,
|
136
|
-
capture_output=True,
|
137
|
-
text=True,
|
138
|
-
)
|
139
|
-
if rs.returncode > 0:
|
140
|
-
logging.error(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
|
141
|
-
raise TaskException(
|
142
|
-
f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}"
|
143
|
-
)
|
144
|
-
self.set_outputs(rs, params)
|
145
|
-
return params
|
146
|
-
|
147
|
-
|
148
|
-
class PyStage(BaseStage):
|
149
|
-
"""Python executor stage that running the Python statement that receive
|
150
|
-
globals nad additional variables.
|
151
|
-
"""
|
152
|
-
|
153
|
-
run: str
|
154
|
-
vars: DictData = Field(default_factory=dict)
|
155
|
-
|
156
|
-
def get_vars(self, params: DictData) -> DictData:
|
157
|
-
"""Return variables"""
|
158
|
-
rs = self.vars.copy()
|
159
|
-
for p, v in self.vars.items():
|
160
|
-
rs[p] = map_params(v, params)
|
161
|
-
return rs
|
162
|
-
|
163
|
-
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
164
|
-
"""Set outputs to params"""
|
165
|
-
# NOTE: skipping set outputs of stage execution when id does not set.
|
166
|
-
if self.id is None:
|
167
|
-
return params
|
168
|
-
|
169
|
-
if "stages" not in params:
|
170
|
-
params["stages"] = {}
|
171
|
-
|
172
|
-
params["stages"][self.id] = {
|
173
|
-
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
174
|
-
"outputs": {k: rs[k] for k in rs if k != "__annotations__"},
|
175
|
-
}
|
176
|
-
return params
|
177
|
-
|
178
|
-
def execute(self, params: DictData) -> DictData:
|
179
|
-
"""Execute the Python statement that pass all globals and input params
|
180
|
-
to globals argument on ``exec`` build-in function.
|
181
|
-
|
182
|
-
:param params: A parameter that want to pass before run any statement.
|
183
|
-
:type params: DictData
|
184
|
-
|
185
|
-
:rtype: DictData
|
186
|
-
:returns: A parameters from an input that was mapped output if the stage
|
187
|
-
ID was set.
|
188
|
-
"""
|
189
|
-
_globals: DictData = globals() | params | self.get_vars(params)
|
190
|
-
_locals: DictData = {}
|
191
|
-
try:
|
192
|
-
exec(map_params(self.run, params), _globals, _locals)
|
193
|
-
except Exception as err:
|
194
|
-
raise TaskException(
|
195
|
-
f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
|
196
|
-
f"{self.run}"
|
197
|
-
) from None
|
198
|
-
|
199
|
-
# NOTE: set outputs from ``_locals`` value from ``exec``.
|
200
|
-
self.set_outputs(_locals, params)
|
201
|
-
return params | {k: _globals[k] for k in params if k in _globals}
|
202
|
-
|
203
|
-
|
204
|
-
class TaskSearch(spec.Struct, kw_only=True, tag="task"):
|
205
|
-
"""Task Search Struct that use the `msgspec` for the best performance."""
|
206
|
-
|
207
|
-
path: str
|
208
|
-
func: str
|
209
|
-
tag: str
|
210
|
-
|
211
|
-
def to_dict(self) -> DictData:
|
212
|
-
"""Return dict data from struct fields."""
|
213
|
-
return {f: getattr(self, f) for f in self.__struct_fields__}
|
214
|
-
|
215
|
-
|
216
|
-
class TaskStage(BaseStage):
|
217
|
-
"""Task executor stage that running the Python function."""
|
218
|
-
|
219
|
-
task: str
|
220
|
-
args: DictData
|
221
|
-
|
222
|
-
@staticmethod
|
223
|
-
def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
|
224
|
-
"""Extract Task string value to task function."""
|
225
|
-
if not (found := RegexConf.RE_TASK_FMT.search(task)):
|
226
|
-
raise ValueError("Task does not match with task format regex.")
|
227
|
-
tasks: TaskSearch = TaskSearch(**found.groupdict())
|
228
|
-
|
229
|
-
# NOTE: Registry object should implement on this package only.
|
230
|
-
# TODO: This prefix value to search registry should dynamic with
|
231
|
-
# config file.
|
232
|
-
rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
|
233
|
-
if tasks.func not in rgt:
|
234
|
-
raise NotImplementedError(
|
235
|
-
f"ddeutil.workflow.{tasks.path}.registries does not "
|
236
|
-
f"implement registry: {tasks.func}."
|
237
|
-
)
|
238
|
-
|
239
|
-
if tasks.tag not in rgt[tasks.func]:
|
240
|
-
raise NotImplementedError(
|
241
|
-
f"tag: {tasks.tag} does not found on registry func: "
|
242
|
-
f"ddeutil.workflow.{tasks.path}.registries."
|
243
|
-
f"{tasks.func}"
|
244
|
-
)
|
245
|
-
return rgt[tasks.func][tasks.tag]
|
246
|
-
|
247
|
-
def execute(self, params: DictData) -> DictData:
|
248
|
-
"""Execute the Task function."""
|
249
|
-
task_caller = self.extract_task(self.task)()
|
250
|
-
if not callable(task_caller):
|
251
|
-
raise ImportError("Task caller function does not callable.")
|
252
|
-
|
253
|
-
# NOTE: check task caller parameters
|
254
|
-
ips = inspect.signature(task_caller)
|
255
|
-
if any(
|
256
|
-
k not in self.args
|
257
|
-
for k in ips.parameters
|
258
|
-
if ips.parameters[k].default == Parameter.empty
|
259
|
-
):
|
260
|
-
raise ValueError(
|
261
|
-
f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
|
262
|
-
f"does not set to args"
|
263
|
-
)
|
264
|
-
try:
|
265
|
-
rs = task_caller(**map_params(self.args, params))
|
266
|
-
except Exception as err:
|
267
|
-
raise TaskException(f"{err.__class__.__name__}: {err}") from err
|
268
|
-
self.set_outputs(rs, params)
|
269
|
-
return params
|
270
|
-
|
271
|
-
|
272
|
-
# NOTE: Order of parsing stage data
|
273
|
-
Stage = Union[
|
274
|
-
PyStage,
|
275
|
-
ShellStage,
|
276
|
-
TaskStage,
|
277
|
-
EmptyStage,
|
278
|
-
]
|
18
|
+
from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
|
19
|
+
from .exceptions import JobException, PipelineException
|
20
|
+
from .loader import Loader
|
21
|
+
from .on import On
|
22
|
+
from .stage import Stage
|
23
|
+
from .utils import Param, Result, cross_product, dash2underscore, gen_id
|
279
24
|
|
280
25
|
|
281
26
|
class Strategy(BaseModel):
|
282
27
|
"""Strategy Model that will combine a matrix together for running the
|
283
28
|
special job.
|
284
29
|
|
285
|
-
|
30
|
+
Data Validate:
|
286
31
|
>>> strategy = {
|
287
32
|
... 'matrix': {
|
288
33
|
... 'first': [1, 2, 3],
|
@@ -295,31 +40,114 @@ class Strategy(BaseModel):
|
|
295
40
|
|
296
41
|
fail_fast: bool = Field(default=False)
|
297
42
|
max_parallel: int = Field(default=-1)
|
298
|
-
matrix:
|
299
|
-
include:
|
300
|
-
|
43
|
+
matrix: Matrix = Field(default_factory=dict)
|
44
|
+
include: MatrixInclude = Field(
|
45
|
+
default_factory=list,
|
46
|
+
description="A list of additional matrix that want to adds-in.",
|
47
|
+
)
|
48
|
+
exclude: MatrixExclude = Field(
|
49
|
+
default_factory=list,
|
50
|
+
description="A list of exclude matrix that want to filter-out.",
|
51
|
+
)
|
301
52
|
|
302
53
|
@model_validator(mode="before")
|
303
54
|
def __prepare_keys(cls, values: DictData) -> DictData:
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
55
|
+
"""Rename key that use dash to underscore because Python does not
|
56
|
+
support this character exist in any variable name.
|
57
|
+
"""
|
58
|
+
dash2underscore("max-parallel", values)
|
59
|
+
dash2underscore("fail-fast", values)
|
308
60
|
return values
|
309
61
|
|
62
|
+
def make(self) -> list[DictStr]:
|
63
|
+
"""Return List of product of matrix values that already filter with
|
64
|
+
exclude and add include.
|
65
|
+
|
66
|
+
:rtype: list[DictStr]
|
67
|
+
"""
|
68
|
+
# NOTE: If it does not set matrix, it will return list of an empty dict.
|
69
|
+
if not (mt := self.matrix):
|
70
|
+
return [{}]
|
71
|
+
|
72
|
+
final: list[DictStr] = []
|
73
|
+
for r in cross_product(matrix=mt):
|
74
|
+
if any(
|
75
|
+
all(r[k] == v for k, v in exclude.items())
|
76
|
+
for exclude in self.exclude
|
77
|
+
):
|
78
|
+
continue
|
79
|
+
final.append(r)
|
80
|
+
|
81
|
+
# NOTE: If it is empty matrix and include, it will return list of an
|
82
|
+
# empty dict.
|
83
|
+
if not final and not self.include:
|
84
|
+
return [{}]
|
85
|
+
|
86
|
+
# NOTE: Add include to generated matrix with exclude list.
|
87
|
+
add: list[DictStr] = []
|
88
|
+
for include in self.include:
|
89
|
+
# VALIDATE:
|
90
|
+
# Validate any key in include list should be a subset of some one
|
91
|
+
# in matrix.
|
92
|
+
if all(not (set(include.keys()) <= set(m.keys())) for m in final):
|
93
|
+
raise ValueError("Include should have the keys equal to matrix")
|
94
|
+
|
95
|
+
# VALIDATE:
|
96
|
+
# Validate value of include does not duplicate with generated
|
97
|
+
# matrix.
|
98
|
+
if any(
|
99
|
+
all(include.get(k) == v for k, v in m.items())
|
100
|
+
for m in [*final, *add]
|
101
|
+
):
|
102
|
+
continue
|
103
|
+
add.append(include)
|
104
|
+
final.extend(add)
|
105
|
+
return final
|
106
|
+
|
310
107
|
|
311
108
|
class Job(BaseModel):
|
312
|
-
"""Job Model
|
109
|
+
"""Job Model (group of stages).
|
110
|
+
|
111
|
+
This job model allow you to use for-loop that call matrix strategy. If
|
112
|
+
you pass matrix mapping and it able to generate, you will see it running
|
113
|
+
with loop of matrix values.
|
114
|
+
|
115
|
+
Data Validate:
|
116
|
+
>>> job = {
|
117
|
+
... "runs-on": None,
|
118
|
+
... "strategy": {},
|
119
|
+
... "needs": [],
|
120
|
+
... "stages": [
|
121
|
+
... {
|
122
|
+
... "name": "Some stage",
|
123
|
+
... "run": "print('Hello World')",
|
124
|
+
... },
|
125
|
+
... ],
|
126
|
+
... }
|
127
|
+
"""
|
313
128
|
|
129
|
+
name: Optional[str] = Field(default=None)
|
130
|
+
desc: Optional[str] = Field(default=None)
|
314
131
|
runs_on: Optional[str] = Field(default=None)
|
315
|
-
stages: list[Stage] = Field(
|
316
|
-
|
317
|
-
|
132
|
+
stages: list[Stage] = Field(
|
133
|
+
default_factory=list,
|
134
|
+
description="A list of Stage of this job.",
|
135
|
+
)
|
136
|
+
needs: list[str] = Field(
|
137
|
+
default_factory=list,
|
138
|
+
description="A list of the job ID that want to run before this job.",
|
139
|
+
)
|
140
|
+
strategy: Strategy = Field(
|
141
|
+
default_factory=Strategy,
|
142
|
+
description="A strategy matrix that want to generate.",
|
143
|
+
)
|
318
144
|
|
319
145
|
@model_validator(mode="before")
|
320
146
|
def __prepare_keys(cls, values: DictData) -> DictData:
|
321
|
-
|
322
|
-
|
147
|
+
"""Rename key that use dash to underscore because Python does not
|
148
|
+
support this character exist in any variable name.
|
149
|
+
"""
|
150
|
+
dash2underscore("runs-on", values)
|
323
151
|
return values
|
324
152
|
|
325
153
|
def stage(self, stage_id: str) -> Stage:
|
@@ -329,57 +157,118 @@ class Job(BaseModel):
|
|
329
157
|
return stage
|
330
158
|
raise ValueError(f"Stage ID {stage_id} does not exists")
|
331
159
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
if not (mt := self.strategy.matrix):
|
337
|
-
return [{}]
|
338
|
-
final: list[DictStr] = []
|
339
|
-
for r in [
|
340
|
-
{_k: _v for e in mapped for _k, _v in e.items()}
|
341
|
-
for mapped in itertools.product(
|
342
|
-
*[[{k: v} for v in vs] for k, vs in mt.items()]
|
343
|
-
)
|
344
|
-
]:
|
345
|
-
if any(
|
346
|
-
all(r[k] == v for k, v in exclude.items())
|
347
|
-
for exclude in self.strategy.exclude
|
348
|
-
):
|
349
|
-
continue
|
350
|
-
final.append(r)
|
160
|
+
@staticmethod
|
161
|
+
def set_outputs(output: DictData) -> DictData:
|
162
|
+
if len(output) > 1:
|
163
|
+
return {"strategies": output}
|
351
164
|
|
352
|
-
|
353
|
-
return [{}]
|
165
|
+
return output[next(iter(output))]
|
354
166
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
167
|
+
def strategy_execute(self, strategy: DictData, params: DictData) -> Result:
|
168
|
+
context: DictData = {}
|
169
|
+
context.update(params)
|
170
|
+
context.update({"matrix": strategy})
|
171
|
+
|
172
|
+
for stage in self.stages:
|
173
|
+
_st_name: str = stage.id or stage.name
|
174
|
+
|
175
|
+
if stage.is_skip(params=context):
|
176
|
+
logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
|
359
177
|
continue
|
360
|
-
|
361
|
-
|
178
|
+
logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
|
179
|
+
|
180
|
+
rs: Result = stage.execute(params=context)
|
181
|
+
if rs.status == 0:
|
182
|
+
stage.set_outputs(rs.context, params=context)
|
183
|
+
else:
|
184
|
+
raise JobException(
|
185
|
+
f"Getting status does not equal zero on stage: "
|
186
|
+
f"{stage.name}."
|
187
|
+
)
|
188
|
+
return Result(
|
189
|
+
status=0,
|
190
|
+
context={
|
191
|
+
gen_id(strategy): {
|
192
|
+
"matrix": strategy,
|
193
|
+
"stages": context.pop("stages", {}),
|
194
|
+
},
|
195
|
+
},
|
196
|
+
)
|
362
197
|
|
363
|
-
def execute(self, params: DictData | None = None) ->
|
364
|
-
"""
|
365
|
-
|
366
|
-
|
198
|
+
def execute(self, params: DictData | None = None) -> Result:
|
199
|
+
"""Job execution with passing dynamic parameters from the pipeline
|
200
|
+
execution. It will generate matrix values at the first step and for-loop
|
201
|
+
any metrix to all stages dependency.
|
202
|
+
|
203
|
+
:param params: An input parameters that use on job execution.
|
204
|
+
:rtype: Result
|
205
|
+
"""
|
206
|
+
strategy_context: DictData = {}
|
207
|
+
for strategy in self.strategy.make():
|
208
|
+
|
209
|
+
# NOTE: Create strategy context and update matrix and params to this
|
210
|
+
# context. So, the context will have structure like;
|
211
|
+
# ---
|
212
|
+
# {
|
213
|
+
# "params": { ... }, <== Current input params
|
214
|
+
# "jobs": { ... },
|
215
|
+
# "matrix": { ... } <== Current strategy value
|
216
|
+
# }
|
217
|
+
#
|
218
|
+
context: DictData = {}
|
219
|
+
context.update(params)
|
220
|
+
context.update({"matrix": strategy})
|
367
221
|
|
222
|
+
# TODO: we should add option for ``wait_as_complete`` for release
|
223
|
+
# a stage execution to run on background (multi-thread).
|
224
|
+
# ---
|
225
|
+
# >>> from concurrency
|
226
|
+
#
|
368
227
|
# IMPORTANT: The stage execution only run sequentially one-by-one.
|
369
228
|
for stage in self.stages:
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
229
|
+
_st_name: str = stage.id or stage.name
|
230
|
+
|
231
|
+
if stage.is_skip(params=context):
|
232
|
+
logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
|
233
|
+
continue
|
234
|
+
logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
|
235
|
+
|
236
|
+
# NOTE: Logging a matrix that pass on this stage execution.
|
237
|
+
if strategy:
|
238
|
+
logging.info(f"[...]: Matrix: {strategy}")
|
374
239
|
|
375
240
|
# NOTE:
|
376
241
|
# I do not use below syntax because `params` dict be the
|
377
242
|
# reference memory pointer and it was changed when I action
|
378
243
|
# anything like update or re-construct this.
|
244
|
+
#
|
379
245
|
# ... params |= stage.execute(params=params)
|
380
|
-
|
381
|
-
|
382
|
-
|
246
|
+
#
|
247
|
+
# This step will add the stage result to ``stages`` key in
|
248
|
+
# that stage id. It will have structure like;
|
249
|
+
# ---
|
250
|
+
# {
|
251
|
+
# "params": { ... },
|
252
|
+
# "jobs": { ... },
|
253
|
+
# "matrix": { ... },
|
254
|
+
# "stages": { { "stage-id-1": ... }, ... }
|
255
|
+
# }
|
256
|
+
#
|
257
|
+
rs: Result = stage.execute(params=context)
|
258
|
+
if rs.status == 0:
|
259
|
+
stage.set_outputs(rs.context, params=context)
|
260
|
+
else:
|
261
|
+
raise JobException(
|
262
|
+
f"Getting status does not equal zero on stage: "
|
263
|
+
f"{stage.name}."
|
264
|
+
)
|
265
|
+
|
266
|
+
strategy_context[gen_id(strategy)] = {
|
267
|
+
"matrix": strategy,
|
268
|
+
"stages": context.pop("stages", {}),
|
269
|
+
}
|
270
|
+
|
271
|
+
return Result(status=0, context=strategy_context)
|
383
272
|
|
384
273
|
|
385
274
|
class Pipeline(BaseModel):
|
@@ -388,11 +277,74 @@ class Pipeline(BaseModel):
|
|
388
277
|
coding line to execute it.
|
389
278
|
"""
|
390
279
|
|
391
|
-
|
392
|
-
|
280
|
+
name: str = Field(description="A pipeline name.")
|
281
|
+
desc: Optional[str] = Field(
|
282
|
+
default=None,
|
283
|
+
description=(
|
284
|
+
"A pipeline description that is able to be string of markdown "
|
285
|
+
"content."
|
286
|
+
),
|
287
|
+
)
|
288
|
+
params: dict[str, Param] = Field(
|
289
|
+
default_factory=dict,
|
290
|
+
description="A parameters that want to use on this pipeline.",
|
291
|
+
)
|
292
|
+
on: list[On] = Field(
|
293
|
+
default_factory=list,
|
294
|
+
description="A list of On instance for this pipeline schedule.",
|
295
|
+
)
|
296
|
+
jobs: dict[str, Job] = Field(
|
297
|
+
default_factory=dict,
|
298
|
+
description="A mapping of job ID and job model that already loaded.",
|
299
|
+
)
|
300
|
+
|
301
|
+
@classmethod
|
302
|
+
def from_loader(
|
303
|
+
cls,
|
304
|
+
name: str,
|
305
|
+
externals: DictData | None = None,
|
306
|
+
) -> Self:
|
307
|
+
"""Create Pipeline instance from the Loader object.
|
308
|
+
|
309
|
+
:param name: A pipeline name that want to pass to Loader object.
|
310
|
+
:param externals: An external parameters that want to pass to Loader
|
311
|
+
object.
|
312
|
+
"""
|
313
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
314
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
315
|
+
|
316
|
+
# NOTE: Add name to loader data
|
317
|
+
loader_data["name"] = name.replace(" ", "_")
|
318
|
+
|
319
|
+
if "jobs" not in loader_data:
|
320
|
+
raise ValueError("Config does not set ``jobs`` value")
|
321
|
+
|
322
|
+
# NOTE: Prepare `on` data
|
323
|
+
cls.__bypass_on(loader_data)
|
324
|
+
return cls.model_validate(loader_data)
|
325
|
+
|
326
|
+
@classmethod
|
327
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
328
|
+
"""Bypass the on data to loaded config data."""
|
329
|
+
if on := data.pop("on", []):
|
330
|
+
if isinstance(on, str):
|
331
|
+
on = [on]
|
332
|
+
if any(not isinstance(i, (dict, str)) for i in on):
|
333
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
334
|
+
data["on"] = [
|
335
|
+
(
|
336
|
+
Loader(n, externals=(externals or {})).data
|
337
|
+
if isinstance(n, str)
|
338
|
+
else n
|
339
|
+
)
|
340
|
+
for n in on
|
341
|
+
]
|
342
|
+
return data
|
393
343
|
|
394
344
|
@model_validator(mode="before")
|
395
345
|
def __prepare_params(cls, values: DictData) -> DictData:
|
346
|
+
"""Prepare the params key."""
|
347
|
+
# NOTE: Prepare params type if it passing with only type value.
|
396
348
|
if params := values.pop("params", {}):
|
397
349
|
values["params"] = {
|
398
350
|
p: (
|
@@ -404,24 +356,6 @@ class Pipeline(BaseModel):
|
|
404
356
|
}
|
405
357
|
return values
|
406
358
|
|
407
|
-
@classmethod
|
408
|
-
def from_loader(
|
409
|
-
cls,
|
410
|
-
name: str,
|
411
|
-
externals: Optional[DictData] = None,
|
412
|
-
) -> Self:
|
413
|
-
loader: Loader = Loader(name, externals=(externals or {}))
|
414
|
-
if "jobs" not in loader.data:
|
415
|
-
raise ValueError("Config does not set ``jobs`` value")
|
416
|
-
return cls(
|
417
|
-
jobs=loader.data["jobs"],
|
418
|
-
params=loader.data["params"],
|
419
|
-
)
|
420
|
-
|
421
|
-
@model_validator(mode="after")
|
422
|
-
def job_checking_needs(self):
|
423
|
-
return self
|
424
|
-
|
425
359
|
def job(self, name: str) -> Job:
|
426
360
|
"""Return Job model that exists on this pipeline.
|
427
361
|
|
@@ -435,17 +369,53 @@ class Pipeline(BaseModel):
|
|
435
369
|
raise ValueError(f"Job {name!r} does not exists")
|
436
370
|
return self.jobs[name]
|
437
371
|
|
372
|
+
def parameterize(self, params: DictData) -> DictData:
|
373
|
+
"""Prepare parameters before passing to execution process. This method
|
374
|
+
will create jobs key to params mapping that will keep any result from
|
375
|
+
job execution.
|
376
|
+
|
377
|
+
:param params: A parameter mapping that receive from pipeline execution.
|
378
|
+
"""
|
379
|
+
# VALIDATE: Incoming params should have keys that set on this pipeline.
|
380
|
+
if check_key := tuple(
|
381
|
+
f"{k!r}"
|
382
|
+
for k in self.params
|
383
|
+
if (k not in params and self.params[k].required)
|
384
|
+
):
|
385
|
+
raise ValueError(
|
386
|
+
f"Required Param on this pipeline setting does not set: "
|
387
|
+
f"{', '.join(check_key)}."
|
388
|
+
)
|
389
|
+
|
390
|
+
# NOTE: mapping type of param before adding it to params variable.
|
391
|
+
return {
|
392
|
+
"params": (
|
393
|
+
params
|
394
|
+
| {
|
395
|
+
k: self.params[k].receive(params[k])
|
396
|
+
for k in params
|
397
|
+
if k in self.params
|
398
|
+
}
|
399
|
+
),
|
400
|
+
"jobs": {},
|
401
|
+
}
|
402
|
+
|
438
403
|
def execute(
|
439
404
|
self,
|
440
405
|
params: DictData | None = None,
|
441
|
-
|
442
|
-
|
406
|
+
*,
|
407
|
+
timeout: int = 60,
|
408
|
+
) -> Result:
|
443
409
|
"""Execute pipeline with passing dynamic parameters to any jobs that
|
444
410
|
included in the pipeline.
|
445
411
|
|
446
|
-
:param params: An input parameters that use on pipeline execution
|
447
|
-
|
448
|
-
|
412
|
+
:param params: An input parameters that use on pipeline execution that
|
413
|
+
will parameterize before using it.
|
414
|
+
:param timeout: A pipeline execution time out in second unit that use
|
415
|
+
for limit time of execution and waiting job dependency.
|
416
|
+
:rtype: Result
|
417
|
+
|
418
|
+
---
|
449
419
|
|
450
420
|
See Also:
|
451
421
|
|
@@ -456,41 +426,36 @@ class Pipeline(BaseModel):
|
|
456
426
|
For example, when I want to use the output from previous stage, I
|
457
427
|
can access it with syntax:
|
458
428
|
|
459
|
-
...
|
429
|
+
... ${job-name}.stages.${stage-id}.outputs.${key}
|
460
430
|
|
461
431
|
"""
|
432
|
+
logging.info(
|
433
|
+
f"[CORE]: Start Pipeline {self.name}:"
|
434
|
+
f"{gen_id(self.name, unique=True)}"
|
435
|
+
)
|
462
436
|
params: DictData = params or {}
|
463
|
-
if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
|
464
|
-
raise ValueError(
|
465
|
-
f"Parameters that needed on pipeline does not pass: "
|
466
|
-
f"{', '.join(check_key)}."
|
467
|
-
)
|
468
437
|
|
469
|
-
|
470
|
-
|
438
|
+
# NOTE: It should not do anything if it does not have job.
|
439
|
+
if not self.jobs:
|
440
|
+
logging.warning("[PIPELINE]: This pipeline does not have any jobs")
|
441
|
+
return Result(status=0, context=params)
|
471
442
|
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
| {
|
476
|
-
k: self.params[k].receive(params[k])
|
477
|
-
for k in params
|
478
|
-
if k in self.params
|
479
|
-
}
|
480
|
-
),
|
481
|
-
"jobs": {},
|
482
|
-
}
|
483
|
-
|
484
|
-
jq = Queue()
|
443
|
+
# NOTE: create a job queue that keep the job that want to running after
|
444
|
+
# it dependency condition.
|
445
|
+
jq: Queue = Queue()
|
485
446
|
for job_id in self.jobs:
|
486
447
|
jq.put(job_id)
|
487
448
|
|
488
449
|
ts: float = time.monotonic()
|
489
|
-
not_time_out_flag = True
|
450
|
+
not_time_out_flag: bool = True
|
451
|
+
|
452
|
+
# NOTE: Create result context that will pass this context to any
|
453
|
+
# execution dependency.
|
454
|
+
rs: Result = Result(context=self.parameterize(params))
|
490
455
|
|
491
456
|
# IMPORTANT: The job execution can run parallel and waiting by needed.
|
492
457
|
while not jq.empty() and (
|
493
|
-
not_time_out_flag := ((time.monotonic() - ts) <
|
458
|
+
not_time_out_flag := ((time.monotonic() - ts) < timeout)
|
494
459
|
):
|
495
460
|
job_id: str = jq.get()
|
496
461
|
logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
|
@@ -504,14 +469,29 @@ class Pipeline(BaseModel):
|
|
504
469
|
# >>> import multiprocessing
|
505
470
|
# >>> with multiprocessing.Pool(processes=3) as pool:
|
506
471
|
# ... results = pool.starmap(merge_names, ('', '', ...))
|
472
|
+
# ---
|
473
|
+
# This case we use multi-process because I want to split usage of
|
474
|
+
# data in this level, that mean the data that push to parallel job
|
475
|
+
# should not use across another job.
|
507
476
|
#
|
508
|
-
if any(
|
477
|
+
if any(rs.context["jobs"].get(need) for need in job.needs):
|
509
478
|
jq.put(job_id)
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
479
|
+
|
480
|
+
# NOTE: copy current the result context for reference other job
|
481
|
+
# context.
|
482
|
+
job_context: DictData = copy.deepcopy(rs.context)
|
483
|
+
job_rs: Result = job.execute(params=job_context)
|
484
|
+
if job_rs.status == 0:
|
485
|
+
# NOTE: Receive output of job execution.
|
486
|
+
rs.context["jobs"][job_id] = job.set_outputs(job_rs.context)
|
487
|
+
else:
|
488
|
+
raise PipelineException(
|
489
|
+
f"Getting status does not equal zero on job: {job_id}."
|
490
|
+
)
|
491
|
+
|
515
492
|
if not not_time_out_flag:
|
516
|
-
|
517
|
-
|
493
|
+
logging.warning("Execution of pipeline was time out")
|
494
|
+
rs.status = 1
|
495
|
+
return rs
|
496
|
+
rs.status = 0
|
497
|
+
return rs
|