ddeutil-workflow 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import copy
9
+ import logging
10
+ import urllib.parse
11
+ from functools import cached_property
12
+ from typing import Any, Callable, TypeVar
13
+
14
+ from ddeutil.core import (
15
+ clear_cache,
16
+ getdot,
17
+ hasdot,
18
+ import_string,
19
+ setdot,
20
+ )
21
+ from ddeutil.io import (
22
+ ConfigNotFound,
23
+ Params,
24
+ PathSearch,
25
+ Register,
26
+ YamlEnvFl,
27
+ map_func,
28
+ )
29
+ from ddeutil.io.__conf import UPDATE_KEY, VERSION_KEY
30
+ from fmtutil import Datetime
31
+ from pydantic import BaseModel
32
+ from typing_extensions import Self
33
+
34
+ from .__regex import RegexConf
35
+ from .__types import DictData, TupleStr
36
+ from .exceptions import ConfigArgumentError
37
+
38
+ AnyModel = TypeVar("AnyModel", bound=BaseModel)
39
+
40
+
41
+ class YamlEnvQuote(YamlEnvFl):
42
+
43
+ @staticmethod
44
+ def prepare(x: str) -> str:
45
+ return urllib.parse.quote_plus(str(x))
46
+
47
+
48
+ class BaseLoad:
49
+ """Base configuration data loading object for load config data from
50
+ `cls.load_stage` stage. The base loading object contain necessary
51
+ properties and method for type object.
52
+
53
+ :param data: dict : A configuration data content with fix keys, `name`,
54
+ `fullname`, and `data`.
55
+ :param params: Optional[dict] : A parameters mapping for some
56
+ subclass of loading use.
57
+ """
58
+
59
+ # NOTE: Set loading config for inherit
60
+ load_prefixes: TupleStr = ("conn",)
61
+ load_datetime_name: str = "audit_date"
62
+ load_datetime_fmt: str = "%Y-%m-%d %H:%M:%S"
63
+
64
+ # NOTE: Set preparing config for inherit
65
+ data_excluded: TupleStr = (UPDATE_KEY, VERSION_KEY)
66
+ option_key: TupleStr = ("parameters",)
67
+ datetime_key: TupleStr = ("endpoint",)
68
+
69
+ @classmethod
70
+ def from_register(
71
+ cls,
72
+ name: str,
73
+ params: Params,
74
+ externals: DictData | None = None,
75
+ ) -> Self:
76
+ """Loading config data from register object.
77
+
78
+ :param name: A name of config data catalog that can register.
79
+ :type name: str
80
+ :param params: A params object.
81
+ :type params: Params
82
+ :param externals: A external parameters
83
+ :type externals: DictData | None(=None)
84
+ """
85
+ try:
86
+ rs: Register = Register(
87
+ name=name,
88
+ stage=params.stage_final,
89
+ params=params,
90
+ loader=YamlEnvQuote,
91
+ )
92
+ except ConfigNotFound:
93
+ rs: Register = Register(
94
+ name=name,
95
+ params=params,
96
+ loader=YamlEnvQuote,
97
+ ).deploy(stop=params.stage_final)
98
+ return cls(
99
+ name=rs.name,
100
+ data=rs.data().copy(),
101
+ params=params,
102
+ externals=externals,
103
+ )
104
+
105
+ def __init__(
106
+ self,
107
+ name: str,
108
+ data: DictData,
109
+ params: Params,
110
+ externals: DictData | None = None,
111
+ ) -> None:
112
+ """Main initialize base config object which get a name of configuration
113
+ and load data by the register object.
114
+ """
115
+ self.name: str = name
116
+ self.__data: DictData = data
117
+ self.params: Params = params
118
+ self.externals: DictData = externals or {}
119
+
120
+ # NOTE: Validate step of base loading object.
121
+ if not any(
122
+ self.name.startswith(prefix) for prefix in self.load_prefixes
123
+ ):
124
+ raise ConfigArgumentError(
125
+ "prefix",
126
+ (
127
+ f"{self.name!r} does not starts with the "
128
+ f"{self.__class__.__name__} prefixes: "
129
+ f"{self.load_prefixes!r}."
130
+ ),
131
+ )
132
+
133
+ @property
134
+ def updt(self):
135
+ return self.data.get(UPDATE_KEY)
136
+
137
+ @cached_property
138
+ def _map_data(self) -> DictData:
139
+ """Return configuration data without key in the excluded key set."""
140
+ data: DictData = self.__data.copy()
141
+ rs: DictData = {k: data[k] for k in data if k not in self.data_excluded}
142
+
143
+ # Mapping datetime format to string value.
144
+ for _ in self.datetime_key:
145
+ if hasdot(_, rs):
146
+ # Fill format datetime object to any type value.
147
+ rs: DictData = setdot(
148
+ _,
149
+ rs,
150
+ map_func(
151
+ getdot(_, rs),
152
+ Datetime.parse(
153
+ value=self.externals[self.load_datetime_name],
154
+ fmt=self.load_datetime_fmt,
155
+ ).format,
156
+ ),
157
+ )
158
+ return rs
159
+
160
+ @property
161
+ def data(self) -> DictData:
162
+ """Return deep copy of the input data.
163
+
164
+ :rtype: DictData
165
+ """
166
+ return copy.deepcopy(self._map_data)
167
+
168
+ @clear_cache(attrs=("type", "_map_data"))
169
+ def refresh(self) -> Self:
170
+ """Refresh configuration data. This process will use `deploy` method
171
+ of the register object.
172
+
173
+ :rtype: Self
174
+ """
175
+ return self.from_register(
176
+ name=self.name,
177
+ params=self.params,
178
+ externals=self.externals,
179
+ )
180
+
181
+ @cached_property
182
+ def type(self) -> Any:
183
+ """Return object type which implement in `config_object` key."""
184
+ if not (_typ := self.data.get("type")):
185
+ raise ValueError(
186
+ f"the 'type' value: {_typ} does not exists in config data."
187
+ )
188
+ return import_string(f"ddeutil.pipe.{_typ}")
189
+
190
+
191
+ class SimLoad:
192
+ """Simple Load Object that will search config data by name.
193
+
194
+ :param name: A name of config data that will read by Yaml Loader object.
195
+ :param params: A Params model object.
196
+ :param externals: An external parameters
197
+
198
+ Note:
199
+ The config data should have ``type`` key for engine can know what is
200
+ config should to do next.
201
+ """
202
+
203
+ import_prefix: str = "ddeutil.workflow"
204
+
205
+ def __init__(
206
+ self,
207
+ name: str,
208
+ params: Params,
209
+ externals: DictData,
210
+ ) -> None:
211
+ self.data: DictData = {}
212
+ for file in PathSearch(params.engine.paths.conf).files:
213
+ if any(file.suffix.endswith(s) for s in ("yml", "yaml")) and (
214
+ data := YamlEnvFl(file).read().get(name, {})
215
+ ):
216
+ self.data = data
217
+ if not self.data:
218
+ raise ConfigNotFound(f"Config {name!r} does not found on conf path")
219
+ self.__conf_params: Params = params
220
+ self.externals: DictData = externals
221
+
222
+ @property
223
+ def conf_params(self) -> Params:
224
+ return self.__conf_params
225
+
226
+ @cached_property
227
+ def type(self) -> AnyModel:
228
+ """Return object type which implement in `config_object` key."""
229
+ if not (_typ := self.data.get("type")):
230
+ raise ValueError(
231
+ f"the 'type' value: {_typ} does not exists in config data."
232
+ )
233
+ try:
234
+ # NOTE: Auto adding module prefix if it does not set
235
+ return import_string(f"ddeutil.workflow.{_typ}")
236
+ except ModuleNotFoundError:
237
+ return import_string(f"{_typ}")
238
+
239
+ def params(self) -> dict[str, Callable[[Any], Any]]:
240
+ """Return a mapping of key from params and imported value on params."""
241
+ if not (p := self.data.get("params", {})):
242
+ return p
243
+
244
+ try:
245
+ return {i: import_string(f"{self.import_prefix}.{p[i]}") for i in p}
246
+ except ModuleNotFoundError as err:
247
+ logging.error(err)
248
+ raise err
249
+
250
+ def validate_params(self, param: dict[str, Any]) -> dict[str, Any]:
251
+ """Return parameter that want to catch before workflow running."""
252
+ try:
253
+ return {i: caller(param[i]) for i, caller in self.params().items()}
254
+ except KeyError as err:
255
+ logging.error(f"Parameter: {err} does not exists from passing")
256
+ raise err
257
+ except ValueError as err:
258
+ logging.error("Value that passing to params does not valid")
259
+ raise err
260
+
261
+
262
+ class Loader(SimLoad):
263
+ """Main Loader Object."""
264
+
265
+ def __init__(
266
+ self,
267
+ name: str,
268
+ externals: DictData,
269
+ *,
270
+ path: str | None = None,
271
+ ) -> None:
272
+ self.data: DictData = {}
273
+
274
+ # NOTE: import params object from specific config file
275
+ params: Params = self.config(path)
276
+
277
+ super().__init__(name, params, externals)
278
+
279
+ @classmethod
280
+ def config(cls, path: str | None = None) -> Params:
281
+ return Params.model_validate(
282
+ YamlEnvFl(path or "./workflows-conf.yaml").read()
283
+ )
284
+
285
+
286
+ def map_caller(value: str, params: dict[str, Any]) -> Any:
287
+ """Map caller value that found from ``RE_CALLER`` regex.
288
+
289
+ :returns: Any value that getter of caller receive from the params.
290
+ """
291
+ if not (found := RegexConf.RE_CALLER.search(value)):
292
+ return value
293
+ # NOTE: get caller value that setting inside; ``${{ <caller-value> }}``
294
+ caller = found.group("caller")
295
+ if not hasdot(caller, params):
296
+ raise ValueError(f"params does not set caller: {caller!r}")
297
+ getter = getdot(caller, params)
298
+
299
+ # NOTE: check type of vars
300
+ if isinstance(getter, (str, int)):
301
+ return value.replace(found.group(0), str(getter))
302
+
303
+ # NOTE:
304
+ # If type of getter caller does not formatting, it will return origin
305
+ # value.
306
+ if value.replace(found.group(0), "") != "":
307
+ raise ValueError(
308
+ "Callable variable should not pass other outside ${{ ... }}"
309
+ )
310
+ return getter
@@ -0,0 +1,338 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import inspect
9
+ import subprocess
10
+ from inspect import Parameter
11
+ from subprocess import CompletedProcess
12
+ from typing import Any, Callable, Optional, Union
13
+
14
+ from pydantic import BaseModel, Field
15
+ from typing_extensions import Self
16
+
17
+ from .__regex import RegexConf
18
+ from .__types import DictData
19
+ from .exceptions import PipeArgumentError, PyException, TaskException
20
+ from .loader import Loader, map_caller
21
+
22
+
23
+ class StageResult(BaseModel): ...
24
+
25
+
26
+ class JobResult(BaseModel): ...
27
+
28
+
29
+ class PipeResult(BaseModel): ...
30
+
31
+
32
+ class EmptyStage(BaseModel):
33
+ """Empty stage that is doing nothing and logging the name of stage only."""
34
+
35
+ id: Optional[str] = None
36
+ name: str
37
+
38
+ def execute(self, params: dict[str, Any]) -> dict[str, Any]:
39
+ return params
40
+
41
+
42
+ class ShellStage(EmptyStage):
43
+ """Shell statement stage."""
44
+
45
+ shell: str
46
+ env: dict[str, str] = Field(default_factory=dict)
47
+
48
+ @staticmethod
49
+ def __prepare_shell(shell: str):
50
+ """Prepare shell statement string that include newline"""
51
+ return shell.replace("\n", ";")
52
+
53
+ def set_outputs(
54
+ self, rs: CompletedProcess, params: dict[str, Any]
55
+ ) -> dict[str, Any]:
56
+ """Set outputs to params"""
57
+ # NOTE: skipping set outputs of stage execution when id does not set.
58
+ if self.id is None:
59
+ return params
60
+
61
+ if "stages" not in params:
62
+ params["stages"] = {}
63
+
64
+ params["stages"][self.id] = {
65
+ # NOTE: The output will fileter unnecessary keys from ``_locals``.
66
+ "outputs": {
67
+ "return_code": rs.returncode,
68
+ "stdout": rs.stdout,
69
+ "stderr": rs.stderr,
70
+ },
71
+ }
72
+ return params
73
+
74
+ def execute(self, params: dict[str, Any]) -> dict[str, Any]:
75
+ """Execute the Shell & Powershell statement with the Python build-in
76
+ ``subprocess`` package.
77
+ """
78
+ rs: CompletedProcess = subprocess.run(
79
+ self.__prepare_shell(self.shell),
80
+ capture_output=True,
81
+ text=True,
82
+ shell=True,
83
+ )
84
+ if rs.returncode > 0:
85
+ print(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
86
+ # FIXME: raise err for this execution.
87
+ # raise ShellException(
88
+ # f"{rs.stderr}\nRunning Statement:\n---\n"
89
+ # f"{self.shell}"
90
+ # )
91
+ self.set_outputs(rs, params)
92
+ return params
93
+
94
+
95
+ class PyStage(EmptyStage):
96
+ """Python executor stage that running the Python statement that receive
97
+ globals nad additional variables.
98
+ """
99
+
100
+ run: str
101
+ vars: dict[str, Any] = Field(default_factory=dict)
102
+
103
+ def get_var(self, params: dict[str, Any]) -> dict[str, Any]:
104
+ """Return variables"""
105
+ rs = self.vars.copy()
106
+ for p, v in self.vars.items():
107
+ rs[p] = map_caller(v, params)
108
+ return rs
109
+
110
+ def set_outputs(
111
+ self, lc: dict[str, Any], params: dict[str, Any]
112
+ ) -> dict[str, Any]:
113
+ """Set outputs to params"""
114
+ # NOTE: skipping set outputs of stage execution when id does not set.
115
+ if self.id is None:
116
+ return params
117
+
118
+ if "stages" not in params:
119
+ params["stages"] = {}
120
+
121
+ params["stages"][self.id] = {
122
+ # NOTE: The output will fileter unnecessary keys from ``_locals``.
123
+ "outputs": {k: lc[k] for k in lc if k != "__annotations__"},
124
+ }
125
+ return params
126
+
127
+ def execute(self, params: dict[str, Any]) -> dict[str, Any]:
128
+ """Execute the Python statement that pass all globals and input params
129
+ to globals argument on ``exec`` build-in function.
130
+
131
+ :param params: A parameter that want to pass before run any statement.
132
+ :type params: dict[str, Any]
133
+
134
+ :rtype: dict[str, Any]
135
+ :returns: A parameters from an input that was mapped output if the stage
136
+ ID was set.
137
+ """
138
+ _globals: dict[str, Any] = globals() | params | self.get_var(params)
139
+ _locals: dict[str, Any] = {}
140
+ try:
141
+ exec(map_caller(self.run, params), _globals, _locals)
142
+ except Exception as err:
143
+ raise PyException(
144
+ f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
145
+ f"{self.run}"
146
+ ) from None
147
+
148
+ # NOTE: set outputs from ``_locals`` value from ``exec``.
149
+ self.set_outputs(_locals, params)
150
+ return params | {k: _globals[k] for k in params if k in _globals}
151
+
152
+
153
+ class TaskSearch(BaseModel):
154
+ path: str
155
+ func: str
156
+ tag: str
157
+
158
+
159
+ class TaskStage(EmptyStage):
160
+ task: str
161
+ args: dict[str, Any]
162
+
163
+ @staticmethod
164
+ def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
165
+ """Extract Task string value to task function."""
166
+ if not (found := RegexConf.RE_TASK_FMT.search(task)):
167
+ raise ValueError("Task does not match with task format regex.")
168
+ tasks = TaskSearch(**found.groupdict())
169
+
170
+ from ddeutil.core import import_string
171
+
172
+ try:
173
+ rgt = import_string(f"ddeutil.workflow.{tasks.path}.registries")
174
+ if tasks.func not in rgt:
175
+ raise NotImplementedError(
176
+ f"ddeutil.workflow.{tasks.path}.registries does not "
177
+ f"implement registry: {tasks.func}."
178
+ )
179
+ except ImportError:
180
+
181
+ # NOTE: Try to import this task function fom target module.
182
+ try:
183
+ return import_string(
184
+ f"ddeutil.workflow.{tasks.path}.{tasks.func}"
185
+ )
186
+ except ImportError:
187
+ raise NotImplementedError(
188
+ f"ddeutil.workflow.{tasks.path} does not implement "
189
+ f"registries or {tasks.func}."
190
+ ) from None
191
+
192
+ if tasks.tag not in rgt[tasks.func]:
193
+ raise NotImplementedError(
194
+ f"tag: {tasks.tag} does not found on registry func: "
195
+ f"ddeutil.workflow.{tasks.path}.registries."
196
+ f"{tasks.func}"
197
+ )
198
+ return rgt[tasks.func][tasks.tag]
199
+
200
+ def execute(self, params: dict[str, Any]) -> dict[str, Any]:
201
+ """Execute the Task function."""
202
+ task_caller = self.extract_task(self.task)()
203
+ if not callable(task_caller):
204
+ raise ImportError("Task caller function does not callable.")
205
+
206
+ # NOTE: check task caller parameters
207
+ ips = inspect.signature(task_caller)
208
+ if any(
209
+ k not in self.args
210
+ for k in ips.parameters
211
+ if ips.parameters[k].default == Parameter.empty
212
+ ):
213
+ raise ValueError(
214
+ f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
215
+ f"does not set to args"
216
+ )
217
+ try:
218
+ rs = task_caller(**self.args)
219
+ except Exception as err:
220
+ raise TaskException(f"{err.__class__.__name__}: {err}") from err
221
+ return {"output": rs}
222
+
223
+
224
+ class HookStage(EmptyStage):
225
+ hook: str
226
+ args: dict[str, Any]
227
+
228
+ def execute(self, params: dict[str, Any]) -> dict[str, Any]: ...
229
+
230
+
231
+ # NOTE: Order of parsing stage data
232
+ Stage = Union[
233
+ PyStage,
234
+ ShellStage,
235
+ TaskStage,
236
+ HookStage,
237
+ EmptyStage,
238
+ ]
239
+
240
+
241
+ class Job(BaseModel):
242
+ stages: list[Stage] = Field(default_factory=list)
243
+ needs: list[str] = Field(default_factory=list)
244
+
245
+ def stage(self, stage_id: str) -> Stage:
246
+ for stage in self.stages:
247
+ if stage_id == (stage.id or ""):
248
+ return stage
249
+ raise ValueError(f"Stage ID {stage_id} does not exists")
250
+
251
+ def execute(self, params: dict[str, Any] | None = None) -> dict[str, Any]:
252
+ for stage in self.stages:
253
+ # NOTE:
254
+ # I do not use below syntax because `params` dict be the
255
+ # reference memory pointer and it was changed when I action
256
+ # anything like update or re-construct this.
257
+ # ... params |= stage.execute(params=params)
258
+ stage.execute(params=params)
259
+ return params
260
+
261
+
262
+ class Strategy(BaseModel):
263
+ matrix: list[str]
264
+ include: list[str]
265
+ exclude: list[str]
266
+
267
+
268
+ class JobStrategy(Job):
269
+ """Strategy job"""
270
+
271
+ strategy: Strategy
272
+
273
+
274
+ class Pipeline(BaseModel):
275
+ """Pipeline Model"""
276
+
277
+ params: dict[str, Any] = Field(default_factory=dict)
278
+ jobs: dict[str, Job]
279
+
280
+ @classmethod
281
+ def from_loader(
282
+ cls,
283
+ name: str,
284
+ externals: DictData,
285
+ ) -> Self:
286
+ loader: Loader = Loader(name, externals=externals)
287
+ if "jobs" not in loader.data:
288
+ raise PipeArgumentError("jobs", "Config does not set ``jobs``")
289
+ return cls(
290
+ jobs=loader.data["jobs"],
291
+ params=loader.params(),
292
+ )
293
+
294
+ def job(self, name: str) -> Job:
295
+ """Return Job model that exists on this pipeline."""
296
+ if name not in self.jobs:
297
+ raise ValueError(f"Job {name} does not exists")
298
+ return self.jobs[name]
299
+
300
+ def execute(self, params: dict[str, Any] | None = None):
301
+ """Execute pipeline with passing dynamic parameters.
302
+
303
+ See Also:
304
+
305
+ The result of execution process for each jobs and stages on this
306
+ pipeline will keeping in dict which able to catch out with all jobs and
307
+ stages by dot annotation.
308
+
309
+ For example, when I want to use the output from previous stage, I
310
+ can access it with syntax:
311
+
312
+ ... "<job-name>.stages.<stage-id>.outputs.<key>"
313
+
314
+ """
315
+ params: dict[str, Any] = params or {}
316
+ check_key = tuple(f"{k!r}" for k in self.params if k not in params)
317
+ if check_key:
318
+ raise ValueError(
319
+ f"Parameters that needed on pipeline does not pass: "
320
+ f"{', '.join(check_key)}."
321
+ )
322
+ params: dict[str, Any] = {
323
+ "params": (
324
+ params
325
+ | {
326
+ k: self.params[k](params[k])
327
+ for k in params
328
+ if k in self.params
329
+ }
330
+ )
331
+ }
332
+ for job_id in self.jobs:
333
+ print(f"[PIPELINE]: Start execute the job: {job_id!r}")
334
+ job = self.jobs[job_id]
335
+ # TODO: Condition on ``needs`` of this job was set. It should create
336
+ # multithreading process on this step.
337
+ job.execute(params=params)
338
+ return params