ddeutil-workflow 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -0
- ddeutil/workflow/__init__.py +0 -0
- ddeutil/workflow/__regex.py +44 -0
- ddeutil/workflow/__types.py +11 -0
- ddeutil/workflow/conn.py +235 -0
- ddeutil/workflow/dataset.py +306 -0
- ddeutil/workflow/exceptions.py +82 -0
- ddeutil/workflow/hooks/__init__.py +9 -0
- ddeutil/workflow/hooks/_postgres.py +2 -0
- ddeutil/workflow/loader.py +310 -0
- ddeutil/workflow/pipeline.py +338 -0
- ddeutil/workflow/schedule.py +87 -0
- ddeutil/workflow/tasks/__init__.py +10 -0
- ddeutil/workflow/tasks/_polars.py +41 -0
- ddeutil/workflow/utils/__init__.py +0 -0
- ddeutil/workflow/utils/receive.py +33 -0
- ddeutil/workflow/utils/selection.py +2 -0
- ddeutil/workflow/vendors/__dict.py +333 -0
- ddeutil/workflow/vendors/__init__.py +0 -0
- ddeutil/workflow/vendors/__schedule.py +667 -0
- ddeutil/workflow/vendors/aws_warpped.py +185 -0
- ddeutil/workflow/vendors/minio_warpped.py +11 -0
- ddeutil/workflow/vendors/sftp_wrapped.py +209 -0
- ddeutil_workflow-0.0.1.dist-info/LICENSE +21 -0
- ddeutil_workflow-0.0.1.dist-info/METADATA +251 -0
- ddeutil_workflow-0.0.1.dist-info/RECORD +28 -0
- ddeutil_workflow-0.0.1.dist-info/WHEEL +5 -0
- ddeutil_workflow-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,310 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import copy
|
9
|
+
import logging
|
10
|
+
import urllib.parse
|
11
|
+
from functools import cached_property
|
12
|
+
from typing import Any, Callable, TypeVar
|
13
|
+
|
14
|
+
from ddeutil.core import (
|
15
|
+
clear_cache,
|
16
|
+
getdot,
|
17
|
+
hasdot,
|
18
|
+
import_string,
|
19
|
+
setdot,
|
20
|
+
)
|
21
|
+
from ddeutil.io import (
|
22
|
+
ConfigNotFound,
|
23
|
+
Params,
|
24
|
+
PathSearch,
|
25
|
+
Register,
|
26
|
+
YamlEnvFl,
|
27
|
+
map_func,
|
28
|
+
)
|
29
|
+
from ddeutil.io.__conf import UPDATE_KEY, VERSION_KEY
|
30
|
+
from fmtutil import Datetime
|
31
|
+
from pydantic import BaseModel
|
32
|
+
from typing_extensions import Self
|
33
|
+
|
34
|
+
from .__regex import RegexConf
|
35
|
+
from .__types import DictData, TupleStr
|
36
|
+
from .exceptions import ConfigArgumentError
|
37
|
+
|
38
|
+
AnyModel = TypeVar("AnyModel", bound=BaseModel)
|
39
|
+
|
40
|
+
|
41
|
+
class YamlEnvQuote(YamlEnvFl):
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def prepare(x: str) -> str:
|
45
|
+
return urllib.parse.quote_plus(str(x))
|
46
|
+
|
47
|
+
|
48
|
+
class BaseLoad:
|
49
|
+
"""Base configuration data loading object for load config data from
|
50
|
+
`cls.load_stage` stage. The base loading object contain necessary
|
51
|
+
properties and method for type object.
|
52
|
+
|
53
|
+
:param data: dict : A configuration data content with fix keys, `name`,
|
54
|
+
`fullname`, and `data`.
|
55
|
+
:param params: Optional[dict] : A parameters mapping for some
|
56
|
+
subclass of loading use.
|
57
|
+
"""
|
58
|
+
|
59
|
+
# NOTE: Set loading config for inherit
|
60
|
+
load_prefixes: TupleStr = ("conn",)
|
61
|
+
load_datetime_name: str = "audit_date"
|
62
|
+
load_datetime_fmt: str = "%Y-%m-%d %H:%M:%S"
|
63
|
+
|
64
|
+
# NOTE: Set preparing config for inherit
|
65
|
+
data_excluded: TupleStr = (UPDATE_KEY, VERSION_KEY)
|
66
|
+
option_key: TupleStr = ("parameters",)
|
67
|
+
datetime_key: TupleStr = ("endpoint",)
|
68
|
+
|
69
|
+
@classmethod
|
70
|
+
def from_register(
|
71
|
+
cls,
|
72
|
+
name: str,
|
73
|
+
params: Params,
|
74
|
+
externals: DictData | None = None,
|
75
|
+
) -> Self:
|
76
|
+
"""Loading config data from register object.
|
77
|
+
|
78
|
+
:param name: A name of config data catalog that can register.
|
79
|
+
:type name: str
|
80
|
+
:param params: A params object.
|
81
|
+
:type params: Params
|
82
|
+
:param externals: A external parameters
|
83
|
+
:type externals: DictData | None(=None)
|
84
|
+
"""
|
85
|
+
try:
|
86
|
+
rs: Register = Register(
|
87
|
+
name=name,
|
88
|
+
stage=params.stage_final,
|
89
|
+
params=params,
|
90
|
+
loader=YamlEnvQuote,
|
91
|
+
)
|
92
|
+
except ConfigNotFound:
|
93
|
+
rs: Register = Register(
|
94
|
+
name=name,
|
95
|
+
params=params,
|
96
|
+
loader=YamlEnvQuote,
|
97
|
+
).deploy(stop=params.stage_final)
|
98
|
+
return cls(
|
99
|
+
name=rs.name,
|
100
|
+
data=rs.data().copy(),
|
101
|
+
params=params,
|
102
|
+
externals=externals,
|
103
|
+
)
|
104
|
+
|
105
|
+
def __init__(
|
106
|
+
self,
|
107
|
+
name: str,
|
108
|
+
data: DictData,
|
109
|
+
params: Params,
|
110
|
+
externals: DictData | None = None,
|
111
|
+
) -> None:
|
112
|
+
"""Main initialize base config object which get a name of configuration
|
113
|
+
and load data by the register object.
|
114
|
+
"""
|
115
|
+
self.name: str = name
|
116
|
+
self.__data: DictData = data
|
117
|
+
self.params: Params = params
|
118
|
+
self.externals: DictData = externals or {}
|
119
|
+
|
120
|
+
# NOTE: Validate step of base loading object.
|
121
|
+
if not any(
|
122
|
+
self.name.startswith(prefix) for prefix in self.load_prefixes
|
123
|
+
):
|
124
|
+
raise ConfigArgumentError(
|
125
|
+
"prefix",
|
126
|
+
(
|
127
|
+
f"{self.name!r} does not starts with the "
|
128
|
+
f"{self.__class__.__name__} prefixes: "
|
129
|
+
f"{self.load_prefixes!r}."
|
130
|
+
),
|
131
|
+
)
|
132
|
+
|
133
|
+
@property
|
134
|
+
def updt(self):
|
135
|
+
return self.data.get(UPDATE_KEY)
|
136
|
+
|
137
|
+
@cached_property
|
138
|
+
def _map_data(self) -> DictData:
|
139
|
+
"""Return configuration data without key in the excluded key set."""
|
140
|
+
data: DictData = self.__data.copy()
|
141
|
+
rs: DictData = {k: data[k] for k in data if k not in self.data_excluded}
|
142
|
+
|
143
|
+
# Mapping datetime format to string value.
|
144
|
+
for _ in self.datetime_key:
|
145
|
+
if hasdot(_, rs):
|
146
|
+
# Fill format datetime object to any type value.
|
147
|
+
rs: DictData = setdot(
|
148
|
+
_,
|
149
|
+
rs,
|
150
|
+
map_func(
|
151
|
+
getdot(_, rs),
|
152
|
+
Datetime.parse(
|
153
|
+
value=self.externals[self.load_datetime_name],
|
154
|
+
fmt=self.load_datetime_fmt,
|
155
|
+
).format,
|
156
|
+
),
|
157
|
+
)
|
158
|
+
return rs
|
159
|
+
|
160
|
+
@property
|
161
|
+
def data(self) -> DictData:
|
162
|
+
"""Return deep copy of the input data.
|
163
|
+
|
164
|
+
:rtype: DictData
|
165
|
+
"""
|
166
|
+
return copy.deepcopy(self._map_data)
|
167
|
+
|
168
|
+
@clear_cache(attrs=("type", "_map_data"))
|
169
|
+
def refresh(self) -> Self:
|
170
|
+
"""Refresh configuration data. This process will use `deploy` method
|
171
|
+
of the register object.
|
172
|
+
|
173
|
+
:rtype: Self
|
174
|
+
"""
|
175
|
+
return self.from_register(
|
176
|
+
name=self.name,
|
177
|
+
params=self.params,
|
178
|
+
externals=self.externals,
|
179
|
+
)
|
180
|
+
|
181
|
+
@cached_property
|
182
|
+
def type(self) -> Any:
|
183
|
+
"""Return object type which implement in `config_object` key."""
|
184
|
+
if not (_typ := self.data.get("type")):
|
185
|
+
raise ValueError(
|
186
|
+
f"the 'type' value: {_typ} does not exists in config data."
|
187
|
+
)
|
188
|
+
return import_string(f"ddeutil.pipe.{_typ}")
|
189
|
+
|
190
|
+
|
191
|
+
class SimLoad:
|
192
|
+
"""Simple Load Object that will search config data by name.
|
193
|
+
|
194
|
+
:param name: A name of config data that will read by Yaml Loader object.
|
195
|
+
:param params: A Params model object.
|
196
|
+
:param externals: An external parameters
|
197
|
+
|
198
|
+
Note:
|
199
|
+
The config data should have ``type`` key for engine can know what is
|
200
|
+
config should to do next.
|
201
|
+
"""
|
202
|
+
|
203
|
+
import_prefix: str = "ddeutil.workflow"
|
204
|
+
|
205
|
+
def __init__(
|
206
|
+
self,
|
207
|
+
name: str,
|
208
|
+
params: Params,
|
209
|
+
externals: DictData,
|
210
|
+
) -> None:
|
211
|
+
self.data: DictData = {}
|
212
|
+
for file in PathSearch(params.engine.paths.conf).files:
|
213
|
+
if any(file.suffix.endswith(s) for s in ("yml", "yaml")) and (
|
214
|
+
data := YamlEnvFl(file).read().get(name, {})
|
215
|
+
):
|
216
|
+
self.data = data
|
217
|
+
if not self.data:
|
218
|
+
raise ConfigNotFound(f"Config {name!r} does not found on conf path")
|
219
|
+
self.__conf_params: Params = params
|
220
|
+
self.externals: DictData = externals
|
221
|
+
|
222
|
+
@property
|
223
|
+
def conf_params(self) -> Params:
|
224
|
+
return self.__conf_params
|
225
|
+
|
226
|
+
@cached_property
|
227
|
+
def type(self) -> AnyModel:
|
228
|
+
"""Return object type which implement in `config_object` key."""
|
229
|
+
if not (_typ := self.data.get("type")):
|
230
|
+
raise ValueError(
|
231
|
+
f"the 'type' value: {_typ} does not exists in config data."
|
232
|
+
)
|
233
|
+
try:
|
234
|
+
# NOTE: Auto adding module prefix if it does not set
|
235
|
+
return import_string(f"ddeutil.workflow.{_typ}")
|
236
|
+
except ModuleNotFoundError:
|
237
|
+
return import_string(f"{_typ}")
|
238
|
+
|
239
|
+
def params(self) -> dict[str, Callable[[Any], Any]]:
|
240
|
+
"""Return a mapping of key from params and imported value on params."""
|
241
|
+
if not (p := self.data.get("params", {})):
|
242
|
+
return p
|
243
|
+
|
244
|
+
try:
|
245
|
+
return {i: import_string(f"{self.import_prefix}.{p[i]}") for i in p}
|
246
|
+
except ModuleNotFoundError as err:
|
247
|
+
logging.error(err)
|
248
|
+
raise err
|
249
|
+
|
250
|
+
def validate_params(self, param: dict[str, Any]) -> dict[str, Any]:
|
251
|
+
"""Return parameter that want to catch before workflow running."""
|
252
|
+
try:
|
253
|
+
return {i: caller(param[i]) for i, caller in self.params().items()}
|
254
|
+
except KeyError as err:
|
255
|
+
logging.error(f"Parameter: {err} does not exists from passing")
|
256
|
+
raise err
|
257
|
+
except ValueError as err:
|
258
|
+
logging.error("Value that passing to params does not valid")
|
259
|
+
raise err
|
260
|
+
|
261
|
+
|
262
|
+
class Loader(SimLoad):
|
263
|
+
"""Main Loader Object."""
|
264
|
+
|
265
|
+
def __init__(
|
266
|
+
self,
|
267
|
+
name: str,
|
268
|
+
externals: DictData,
|
269
|
+
*,
|
270
|
+
path: str | None = None,
|
271
|
+
) -> None:
|
272
|
+
self.data: DictData = {}
|
273
|
+
|
274
|
+
# NOTE: import params object from specific config file
|
275
|
+
params: Params = self.config(path)
|
276
|
+
|
277
|
+
super().__init__(name, params, externals)
|
278
|
+
|
279
|
+
@classmethod
|
280
|
+
def config(cls, path: str | None = None) -> Params:
|
281
|
+
return Params.model_validate(
|
282
|
+
YamlEnvFl(path or "./workflows-conf.yaml").read()
|
283
|
+
)
|
284
|
+
|
285
|
+
|
286
|
+
def map_caller(value: str, params: dict[str, Any]) -> Any:
|
287
|
+
"""Map caller value that found from ``RE_CALLER`` regex.
|
288
|
+
|
289
|
+
:returns: Any value that getter of caller receive from the params.
|
290
|
+
"""
|
291
|
+
if not (found := RegexConf.RE_CALLER.search(value)):
|
292
|
+
return value
|
293
|
+
# NOTE: get caller value that setting inside; ``${{ <caller-value> }}``
|
294
|
+
caller = found.group("caller")
|
295
|
+
if not hasdot(caller, params):
|
296
|
+
raise ValueError(f"params does not set caller: {caller!r}")
|
297
|
+
getter = getdot(caller, params)
|
298
|
+
|
299
|
+
# NOTE: check type of vars
|
300
|
+
if isinstance(getter, (str, int)):
|
301
|
+
return value.replace(found.group(0), str(getter))
|
302
|
+
|
303
|
+
# NOTE:
|
304
|
+
# If type of getter caller does not formatting, it will return origin
|
305
|
+
# value.
|
306
|
+
if value.replace(found.group(0), "") != "":
|
307
|
+
raise ValueError(
|
308
|
+
"Callable variable should not pass other outside ${{ ... }}"
|
309
|
+
)
|
310
|
+
return getter
|
@@ -0,0 +1,338 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import inspect
|
9
|
+
import subprocess
|
10
|
+
from inspect import Parameter
|
11
|
+
from subprocess import CompletedProcess
|
12
|
+
from typing import Any, Callable, Optional, Union
|
13
|
+
|
14
|
+
from pydantic import BaseModel, Field
|
15
|
+
from typing_extensions import Self
|
16
|
+
|
17
|
+
from .__regex import RegexConf
|
18
|
+
from .__types import DictData
|
19
|
+
from .exceptions import PipeArgumentError, PyException, TaskException
|
20
|
+
from .loader import Loader, map_caller
|
21
|
+
|
22
|
+
|
23
|
+
class StageResult(BaseModel): ...
|
24
|
+
|
25
|
+
|
26
|
+
class JobResult(BaseModel): ...
|
27
|
+
|
28
|
+
|
29
|
+
class PipeResult(BaseModel): ...
|
30
|
+
|
31
|
+
|
32
|
+
class EmptyStage(BaseModel):
|
33
|
+
"""Empty stage that is doing nothing and logging the name of stage only."""
|
34
|
+
|
35
|
+
id: Optional[str] = None
|
36
|
+
name: str
|
37
|
+
|
38
|
+
def execute(self, params: dict[str, Any]) -> dict[str, Any]:
|
39
|
+
return params
|
40
|
+
|
41
|
+
|
42
|
+
class ShellStage(EmptyStage):
|
43
|
+
"""Shell statement stage."""
|
44
|
+
|
45
|
+
shell: str
|
46
|
+
env: dict[str, str] = Field(default_factory=dict)
|
47
|
+
|
48
|
+
@staticmethod
|
49
|
+
def __prepare_shell(shell: str):
|
50
|
+
"""Prepare shell statement string that include newline"""
|
51
|
+
return shell.replace("\n", ";")
|
52
|
+
|
53
|
+
def set_outputs(
|
54
|
+
self, rs: CompletedProcess, params: dict[str, Any]
|
55
|
+
) -> dict[str, Any]:
|
56
|
+
"""Set outputs to params"""
|
57
|
+
# NOTE: skipping set outputs of stage execution when id does not set.
|
58
|
+
if self.id is None:
|
59
|
+
return params
|
60
|
+
|
61
|
+
if "stages" not in params:
|
62
|
+
params["stages"] = {}
|
63
|
+
|
64
|
+
params["stages"][self.id] = {
|
65
|
+
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
66
|
+
"outputs": {
|
67
|
+
"return_code": rs.returncode,
|
68
|
+
"stdout": rs.stdout,
|
69
|
+
"stderr": rs.stderr,
|
70
|
+
},
|
71
|
+
}
|
72
|
+
return params
|
73
|
+
|
74
|
+
def execute(self, params: dict[str, Any]) -> dict[str, Any]:
|
75
|
+
"""Execute the Shell & Powershell statement with the Python build-in
|
76
|
+
``subprocess`` package.
|
77
|
+
"""
|
78
|
+
rs: CompletedProcess = subprocess.run(
|
79
|
+
self.__prepare_shell(self.shell),
|
80
|
+
capture_output=True,
|
81
|
+
text=True,
|
82
|
+
shell=True,
|
83
|
+
)
|
84
|
+
if rs.returncode > 0:
|
85
|
+
print(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
|
86
|
+
# FIXME: raise err for this execution.
|
87
|
+
# raise ShellException(
|
88
|
+
# f"{rs.stderr}\nRunning Statement:\n---\n"
|
89
|
+
# f"{self.shell}"
|
90
|
+
# )
|
91
|
+
self.set_outputs(rs, params)
|
92
|
+
return params
|
93
|
+
|
94
|
+
|
95
|
+
class PyStage(EmptyStage):
|
96
|
+
"""Python executor stage that running the Python statement that receive
|
97
|
+
globals nad additional variables.
|
98
|
+
"""
|
99
|
+
|
100
|
+
run: str
|
101
|
+
vars: dict[str, Any] = Field(default_factory=dict)
|
102
|
+
|
103
|
+
def get_var(self, params: dict[str, Any]) -> dict[str, Any]:
|
104
|
+
"""Return variables"""
|
105
|
+
rs = self.vars.copy()
|
106
|
+
for p, v in self.vars.items():
|
107
|
+
rs[p] = map_caller(v, params)
|
108
|
+
return rs
|
109
|
+
|
110
|
+
def set_outputs(
|
111
|
+
self, lc: dict[str, Any], params: dict[str, Any]
|
112
|
+
) -> dict[str, Any]:
|
113
|
+
"""Set outputs to params"""
|
114
|
+
# NOTE: skipping set outputs of stage execution when id does not set.
|
115
|
+
if self.id is None:
|
116
|
+
return params
|
117
|
+
|
118
|
+
if "stages" not in params:
|
119
|
+
params["stages"] = {}
|
120
|
+
|
121
|
+
params["stages"][self.id] = {
|
122
|
+
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
123
|
+
"outputs": {k: lc[k] for k in lc if k != "__annotations__"},
|
124
|
+
}
|
125
|
+
return params
|
126
|
+
|
127
|
+
def execute(self, params: dict[str, Any]) -> dict[str, Any]:
|
128
|
+
"""Execute the Python statement that pass all globals and input params
|
129
|
+
to globals argument on ``exec`` build-in function.
|
130
|
+
|
131
|
+
:param params: A parameter that want to pass before run any statement.
|
132
|
+
:type params: dict[str, Any]
|
133
|
+
|
134
|
+
:rtype: dict[str, Any]
|
135
|
+
:returns: A parameters from an input that was mapped output if the stage
|
136
|
+
ID was set.
|
137
|
+
"""
|
138
|
+
_globals: dict[str, Any] = globals() | params | self.get_var(params)
|
139
|
+
_locals: dict[str, Any] = {}
|
140
|
+
try:
|
141
|
+
exec(map_caller(self.run, params), _globals, _locals)
|
142
|
+
except Exception as err:
|
143
|
+
raise PyException(
|
144
|
+
f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
|
145
|
+
f"{self.run}"
|
146
|
+
) from None
|
147
|
+
|
148
|
+
# NOTE: set outputs from ``_locals`` value from ``exec``.
|
149
|
+
self.set_outputs(_locals, params)
|
150
|
+
return params | {k: _globals[k] for k in params if k in _globals}
|
151
|
+
|
152
|
+
|
153
|
+
class TaskSearch(BaseModel):
|
154
|
+
path: str
|
155
|
+
func: str
|
156
|
+
tag: str
|
157
|
+
|
158
|
+
|
159
|
+
class TaskStage(EmptyStage):
|
160
|
+
task: str
|
161
|
+
args: dict[str, Any]
|
162
|
+
|
163
|
+
@staticmethod
|
164
|
+
def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
|
165
|
+
"""Extract Task string value to task function."""
|
166
|
+
if not (found := RegexConf.RE_TASK_FMT.search(task)):
|
167
|
+
raise ValueError("Task does not match with task format regex.")
|
168
|
+
tasks = TaskSearch(**found.groupdict())
|
169
|
+
|
170
|
+
from ddeutil.core import import_string
|
171
|
+
|
172
|
+
try:
|
173
|
+
rgt = import_string(f"ddeutil.workflow.{tasks.path}.registries")
|
174
|
+
if tasks.func not in rgt:
|
175
|
+
raise NotImplementedError(
|
176
|
+
f"ddeutil.workflow.{tasks.path}.registries does not "
|
177
|
+
f"implement registry: {tasks.func}."
|
178
|
+
)
|
179
|
+
except ImportError:
|
180
|
+
|
181
|
+
# NOTE: Try to import this task function fom target module.
|
182
|
+
try:
|
183
|
+
return import_string(
|
184
|
+
f"ddeutil.workflow.{tasks.path}.{tasks.func}"
|
185
|
+
)
|
186
|
+
except ImportError:
|
187
|
+
raise NotImplementedError(
|
188
|
+
f"ddeutil.workflow.{tasks.path} does not implement "
|
189
|
+
f"registries or {tasks.func}."
|
190
|
+
) from None
|
191
|
+
|
192
|
+
if tasks.tag not in rgt[tasks.func]:
|
193
|
+
raise NotImplementedError(
|
194
|
+
f"tag: {tasks.tag} does not found on registry func: "
|
195
|
+
f"ddeutil.workflow.{tasks.path}.registries."
|
196
|
+
f"{tasks.func}"
|
197
|
+
)
|
198
|
+
return rgt[tasks.func][tasks.tag]
|
199
|
+
|
200
|
+
def execute(self, params: dict[str, Any]) -> dict[str, Any]:
|
201
|
+
"""Execute the Task function."""
|
202
|
+
task_caller = self.extract_task(self.task)()
|
203
|
+
if not callable(task_caller):
|
204
|
+
raise ImportError("Task caller function does not callable.")
|
205
|
+
|
206
|
+
# NOTE: check task caller parameters
|
207
|
+
ips = inspect.signature(task_caller)
|
208
|
+
if any(
|
209
|
+
k not in self.args
|
210
|
+
for k in ips.parameters
|
211
|
+
if ips.parameters[k].default == Parameter.empty
|
212
|
+
):
|
213
|
+
raise ValueError(
|
214
|
+
f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
|
215
|
+
f"does not set to args"
|
216
|
+
)
|
217
|
+
try:
|
218
|
+
rs = task_caller(**self.args)
|
219
|
+
except Exception as err:
|
220
|
+
raise TaskException(f"{err.__class__.__name__}: {err}") from err
|
221
|
+
return {"output": rs}
|
222
|
+
|
223
|
+
|
224
|
+
class HookStage(EmptyStage):
|
225
|
+
hook: str
|
226
|
+
args: dict[str, Any]
|
227
|
+
|
228
|
+
def execute(self, params: dict[str, Any]) -> dict[str, Any]: ...
|
229
|
+
|
230
|
+
|
231
|
+
# NOTE: Order of parsing stage data
|
232
|
+
Stage = Union[
|
233
|
+
PyStage,
|
234
|
+
ShellStage,
|
235
|
+
TaskStage,
|
236
|
+
HookStage,
|
237
|
+
EmptyStage,
|
238
|
+
]
|
239
|
+
|
240
|
+
|
241
|
+
class Job(BaseModel):
|
242
|
+
stages: list[Stage] = Field(default_factory=list)
|
243
|
+
needs: list[str] = Field(default_factory=list)
|
244
|
+
|
245
|
+
def stage(self, stage_id: str) -> Stage:
|
246
|
+
for stage in self.stages:
|
247
|
+
if stage_id == (stage.id or ""):
|
248
|
+
return stage
|
249
|
+
raise ValueError(f"Stage ID {stage_id} does not exists")
|
250
|
+
|
251
|
+
def execute(self, params: dict[str, Any] | None = None) -> dict[str, Any]:
|
252
|
+
for stage in self.stages:
|
253
|
+
# NOTE:
|
254
|
+
# I do not use below syntax because `params` dict be the
|
255
|
+
# reference memory pointer and it was changed when I action
|
256
|
+
# anything like update or re-construct this.
|
257
|
+
# ... params |= stage.execute(params=params)
|
258
|
+
stage.execute(params=params)
|
259
|
+
return params
|
260
|
+
|
261
|
+
|
262
|
+
class Strategy(BaseModel):
|
263
|
+
matrix: list[str]
|
264
|
+
include: list[str]
|
265
|
+
exclude: list[str]
|
266
|
+
|
267
|
+
|
268
|
+
class JobStrategy(Job):
|
269
|
+
"""Strategy job"""
|
270
|
+
|
271
|
+
strategy: Strategy
|
272
|
+
|
273
|
+
|
274
|
+
class Pipeline(BaseModel):
|
275
|
+
"""Pipeline Model"""
|
276
|
+
|
277
|
+
params: dict[str, Any] = Field(default_factory=dict)
|
278
|
+
jobs: dict[str, Job]
|
279
|
+
|
280
|
+
@classmethod
|
281
|
+
def from_loader(
|
282
|
+
cls,
|
283
|
+
name: str,
|
284
|
+
externals: DictData,
|
285
|
+
) -> Self:
|
286
|
+
loader: Loader = Loader(name, externals=externals)
|
287
|
+
if "jobs" not in loader.data:
|
288
|
+
raise PipeArgumentError("jobs", "Config does not set ``jobs``")
|
289
|
+
return cls(
|
290
|
+
jobs=loader.data["jobs"],
|
291
|
+
params=loader.params(),
|
292
|
+
)
|
293
|
+
|
294
|
+
def job(self, name: str) -> Job:
|
295
|
+
"""Return Job model that exists on this pipeline."""
|
296
|
+
if name not in self.jobs:
|
297
|
+
raise ValueError(f"Job {name} does not exists")
|
298
|
+
return self.jobs[name]
|
299
|
+
|
300
|
+
def execute(self, params: dict[str, Any] | None = None):
|
301
|
+
"""Execute pipeline with passing dynamic parameters.
|
302
|
+
|
303
|
+
See Also:
|
304
|
+
|
305
|
+
The result of execution process for each jobs and stages on this
|
306
|
+
pipeline will keeping in dict which able to catch out with all jobs and
|
307
|
+
stages by dot annotation.
|
308
|
+
|
309
|
+
For example, when I want to use the output from previous stage, I
|
310
|
+
can access it with syntax:
|
311
|
+
|
312
|
+
... "<job-name>.stages.<stage-id>.outputs.<key>"
|
313
|
+
|
314
|
+
"""
|
315
|
+
params: dict[str, Any] = params or {}
|
316
|
+
check_key = tuple(f"{k!r}" for k in self.params if k not in params)
|
317
|
+
if check_key:
|
318
|
+
raise ValueError(
|
319
|
+
f"Parameters that needed on pipeline does not pass: "
|
320
|
+
f"{', '.join(check_key)}."
|
321
|
+
)
|
322
|
+
params: dict[str, Any] = {
|
323
|
+
"params": (
|
324
|
+
params
|
325
|
+
| {
|
326
|
+
k: self.params[k](params[k])
|
327
|
+
for k in params
|
328
|
+
if k in self.params
|
329
|
+
}
|
330
|
+
)
|
331
|
+
}
|
332
|
+
for job_id in self.jobs:
|
333
|
+
print(f"[PIPELINE]: Start execute the job: {job_id!r}")
|
334
|
+
job = self.jobs[job_id]
|
335
|
+
# TODO: Condition on ``needs`` of this job was set. It should create
|
336
|
+
# multithreading process on this step.
|
337
|
+
job.execute(params=params)
|
338
|
+
return params
|