ddeutil-workflow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +9 -0
- ddeutil/workflow/__types.py +43 -1
- ddeutil/workflow/exceptions.py +13 -1
- ddeutil/workflow/loader.py +16 -110
- ddeutil/workflow/on.py +195 -0
- ddeutil/workflow/pipeline.py +351 -371
- ddeutil/workflow/{vendors/__schedule.py → scheduler.py} +222 -176
- ddeutil/workflow/stage.py +402 -0
- ddeutil/workflow/utils.py +219 -28
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/METADATA +118 -90
- ddeutil_workflow-0.0.6.dist-info/RECORD +15 -0
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/WHEEL +1 -1
- ddeutil/workflow/__regex.py +0 -44
- ddeutil/workflow/conn.py +0 -240
- ddeutil/workflow/schedule.py +0 -82
- ddeutil/workflow/tasks/__init__.py +0 -6
- ddeutil/workflow/tasks/_pandas.py +0 -54
- ddeutil/workflow/tasks/_polars.py +0 -92
- ddeutil/workflow/vendors/__dataset.py +0 -127
- ddeutil/workflow/vendors/__dict.py +0 -333
- ddeutil/workflow/vendors/__init__.py +0 -0
- ddeutil/workflow/vendors/aws.py +0 -185
- ddeutil/workflow/vendors/az.py +0 -0
- ddeutil/workflow/vendors/minio.py +0 -11
- ddeutil/workflow/vendors/pd.py +0 -13
- ddeutil/workflow/vendors/pg.py +0 -11
- ddeutil/workflow/vendors/pl.py +0 -172
- ddeutil/workflow/vendors/sftp.py +0 -209
- ddeutil_workflow-0.0.4.dist-info/RECORD +0 -29
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,402 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import contextlib
|
9
|
+
import inspect
|
10
|
+
import logging
|
11
|
+
import os
|
12
|
+
import subprocess
|
13
|
+
import sys
|
14
|
+
import uuid
|
15
|
+
from abc import ABC, abstractmethod
|
16
|
+
from collections.abc import Iterator
|
17
|
+
from dataclasses import dataclass
|
18
|
+
from inspect import Parameter
|
19
|
+
from pathlib import Path
|
20
|
+
from subprocess import CompletedProcess
|
21
|
+
from typing import Callable, Optional, Union
|
22
|
+
|
23
|
+
from ddeutil.core import str2bool
|
24
|
+
from pydantic import BaseModel, Field
|
25
|
+
|
26
|
+
from .__types import DictData, DictStr, Re, TupleStr
|
27
|
+
from .exceptions import StageException
|
28
|
+
from .utils import (
|
29
|
+
Registry,
|
30
|
+
Result,
|
31
|
+
TagFunc,
|
32
|
+
gen_id,
|
33
|
+
make_exec,
|
34
|
+
make_registry,
|
35
|
+
param2template,
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
class BaseStage(BaseModel, ABC):
|
40
|
+
"""Base Stage Model that keep only id and name fields for the stage
|
41
|
+
metadata. If you want to implement any custom stage, you can use this class
|
42
|
+
to parent and implement ``self.execute()`` method only.
|
43
|
+
"""
|
44
|
+
|
45
|
+
id: Optional[str] = Field(
|
46
|
+
default=None,
|
47
|
+
description=(
|
48
|
+
"A stage ID that use to keep execution output or getting by job "
|
49
|
+
"owner."
|
50
|
+
),
|
51
|
+
)
|
52
|
+
name: str = Field(
|
53
|
+
description="A stage name that want to logging when start execution."
|
54
|
+
)
|
55
|
+
condition: Optional[str] = Field(
|
56
|
+
default=None,
|
57
|
+
alias="if",
|
58
|
+
)
|
59
|
+
|
60
|
+
@abstractmethod
|
61
|
+
def execute(self, params: DictData) -> Result:
|
62
|
+
"""Execute abstraction method that action something by sub-model class.
|
63
|
+
This is important method that make this class is able to be the stage.
|
64
|
+
|
65
|
+
:param params: A parameter data that want to use in this execution.
|
66
|
+
:rtype: Result
|
67
|
+
"""
|
68
|
+
raise NotImplementedError("Stage should implement ``execute`` method.")
|
69
|
+
|
70
|
+
def set_outputs(self, output: DictData, params: DictData) -> DictData:
|
71
|
+
"""Set an outputs from execution process to an input params.
|
72
|
+
|
73
|
+
:param output: A output data that want to extract to an output key.
|
74
|
+
:param params: A context data that want to add output result.
|
75
|
+
:rtype: DictData
|
76
|
+
"""
|
77
|
+
if self.id:
|
78
|
+
_id: str = param2template(self.id, params)
|
79
|
+
elif str2bool(os.getenv("WORKFLOW_CORE_DEFAULT_STAGE_ID", "false")):
|
80
|
+
_id: str = gen_id(param2template(self.name, params))
|
81
|
+
else:
|
82
|
+
return params
|
83
|
+
|
84
|
+
# NOTE: Create stages key to receive an output from the stage execution.
|
85
|
+
if "stages" not in params:
|
86
|
+
params["stages"] = {}
|
87
|
+
|
88
|
+
params["stages"][_id] = {"outputs": output}
|
89
|
+
return params
|
90
|
+
|
91
|
+
def is_skip(self, params: DictData | None = None) -> bool:
|
92
|
+
"""Return true if condition of this stage do not correct.
|
93
|
+
|
94
|
+
:param params: A parameters that want to pass to condition template.
|
95
|
+
"""
|
96
|
+
params: DictData = params or {}
|
97
|
+
if self.condition is None:
|
98
|
+
return False
|
99
|
+
|
100
|
+
_g: DictData = globals() | params
|
101
|
+
try:
|
102
|
+
rs: bool = eval(
|
103
|
+
param2template(self.condition, params, repr_flag=True), _g, {}
|
104
|
+
)
|
105
|
+
if not isinstance(rs, bool):
|
106
|
+
raise TypeError("Return type of condition does not be boolean")
|
107
|
+
return not rs
|
108
|
+
except Exception as err:
|
109
|
+
logging.error(str(err))
|
110
|
+
raise StageException(str(err)) from err
|
111
|
+
|
112
|
+
|
113
|
+
class EmptyStage(BaseStage):
|
114
|
+
"""Empty stage that do nothing (context equal empty stage) and logging the
|
115
|
+
name of stage only to stdout.
|
116
|
+
"""
|
117
|
+
|
118
|
+
echo: Optional[str] = Field(
|
119
|
+
default=None,
|
120
|
+
description="A string statement that want to logging",
|
121
|
+
)
|
122
|
+
|
123
|
+
def execute(self, params: DictData) -> Result:
|
124
|
+
"""Execution method for the Empty stage that do only logging out to
|
125
|
+
stdout.
|
126
|
+
|
127
|
+
:param params: A context data that want to add output result. But this
|
128
|
+
stage does not pass any output.
|
129
|
+
"""
|
130
|
+
logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
|
131
|
+
return Result(status=0, context={})
|
132
|
+
|
133
|
+
|
134
|
+
class BashStage(BaseStage):
|
135
|
+
"""Bash execution stage that execute bash script on the current OS.
|
136
|
+
That mean if your current OS is Windows, it will running bash in the WSL.
|
137
|
+
|
138
|
+
I get some limitation when I run shell statement with the built-in
|
139
|
+
supprocess package. It does not good enough to use multiline statement.
|
140
|
+
Thus, I add writing ``.sh`` file before execution process for fix this
|
141
|
+
issue.
|
142
|
+
|
143
|
+
Data Validate:
|
144
|
+
>>> stage = {
|
145
|
+
... "name": "Shell stage execution",
|
146
|
+
... "bash": 'echo "Hello $FOO"',
|
147
|
+
... "env": {
|
148
|
+
... "FOO": "BAR",
|
149
|
+
... },
|
150
|
+
... }
|
151
|
+
"""
|
152
|
+
|
153
|
+
bash: str = Field(description="A bash statement that want to execute.")
|
154
|
+
env: DictStr = Field(
|
155
|
+
default_factory=dict,
|
156
|
+
description=(
|
157
|
+
"An environment variable mapping that want to set before execute "
|
158
|
+
"this shell statement."
|
159
|
+
),
|
160
|
+
)
|
161
|
+
|
162
|
+
@contextlib.contextmanager
|
163
|
+
def __prepare_bash(self, bash: str, env: DictStr) -> Iterator[TupleStr]:
|
164
|
+
"""Return context of prepared bash statement that want to execute. This
|
165
|
+
step will write the `.sh` file before giving this file name to context.
|
166
|
+
After that, it will auto delete this file automatic.
|
167
|
+
"""
|
168
|
+
f_name: str = f"{uuid.uuid4()}.sh"
|
169
|
+
f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
|
170
|
+
with open(f"./{f_name}", mode="w", newline="\n") as f:
|
171
|
+
# NOTE: write header of `.sh` file
|
172
|
+
f.write(f"#!/bin/{f_shebang}\n")
|
173
|
+
|
174
|
+
# NOTE: add setting environment variable before bash skip statement.
|
175
|
+
f.writelines([f"{k}='{env[k]}';\n" for k in env])
|
176
|
+
|
177
|
+
# NOTE: make sure that shell script file does not have `\r` char.
|
178
|
+
f.write(bash.replace("\r\n", "\n"))
|
179
|
+
|
180
|
+
make_exec(f"./{f_name}")
|
181
|
+
|
182
|
+
yield [f_shebang, f_name]
|
183
|
+
|
184
|
+
Path(f"./{f_name}").unlink()
|
185
|
+
|
186
|
+
def execute(self, params: DictData) -> Result:
|
187
|
+
"""Execute the Bash statement with the Python build-in ``subprocess``
|
188
|
+
package.
|
189
|
+
|
190
|
+
:param params: A parameter data that want to use in this execution.
|
191
|
+
:rtype: Result
|
192
|
+
"""
|
193
|
+
bash: str = param2template(self.bash, params)
|
194
|
+
with self.__prepare_bash(
|
195
|
+
bash=bash, env=param2template(self.env, params)
|
196
|
+
) as sh:
|
197
|
+
logging.info(f"[STAGE]: Shell-Execute: {sh}")
|
198
|
+
rs: CompletedProcess = subprocess.run(
|
199
|
+
sh,
|
200
|
+
shell=False,
|
201
|
+
capture_output=True,
|
202
|
+
text=True,
|
203
|
+
)
|
204
|
+
if rs.returncode > 0:
|
205
|
+
err: str = (
|
206
|
+
rs.stderr.encode("utf-8").decode("utf-16")
|
207
|
+
if "\\x00" in rs.stderr
|
208
|
+
else rs.stderr
|
209
|
+
)
|
210
|
+
logging.error(f"{err}\nRunning Statement:\n---\n{bash}")
|
211
|
+
raise StageException(f"{err}\nRunning Statement:\n---\n{bash}")
|
212
|
+
return Result(
|
213
|
+
status=0,
|
214
|
+
context={
|
215
|
+
"return_code": rs.returncode,
|
216
|
+
"stdout": rs.stdout.rstrip("\n"),
|
217
|
+
"stderr": rs.stderr.rstrip("\n"),
|
218
|
+
},
|
219
|
+
)
|
220
|
+
|
221
|
+
|
222
|
+
class PyStage(BaseStage):
|
223
|
+
"""Python executor stage that running the Python statement that receive
|
224
|
+
globals nad additional variables.
|
225
|
+
"""
|
226
|
+
|
227
|
+
run: str = Field(
|
228
|
+
description="A Python string statement that want to run with exec.",
|
229
|
+
)
|
230
|
+
vars: DictData = Field(
|
231
|
+
default_factory=dict,
|
232
|
+
description=(
|
233
|
+
"A mapping to variable that want to pass to globals in exec."
|
234
|
+
),
|
235
|
+
)
|
236
|
+
|
237
|
+
def set_outputs(self, output: DictData, params: DictData) -> DictData:
|
238
|
+
"""Set an outputs from the Python execution process to an input params.
|
239
|
+
|
240
|
+
:param output: A output data that want to extract to an output key.
|
241
|
+
:param params: A context data that want to add output result.
|
242
|
+
:rtype: DictData
|
243
|
+
"""
|
244
|
+
# NOTE: The output will fileter unnecessary keys from locals.
|
245
|
+
_locals: DictData = output["locals"]
|
246
|
+
super().set_outputs(
|
247
|
+
{k: _locals[k] for k in _locals if k != "__annotations__"},
|
248
|
+
params=params,
|
249
|
+
)
|
250
|
+
|
251
|
+
# NOTE:
|
252
|
+
# Override value that changing from the globals that pass via exec.
|
253
|
+
_globals: DictData = output["globals"]
|
254
|
+
params.update({k: _globals[k] for k in params if k in _globals})
|
255
|
+
return params
|
256
|
+
|
257
|
+
def execute(self, params: DictData) -> Result:
|
258
|
+
"""Execute the Python statement that pass all globals and input params
|
259
|
+
to globals argument on ``exec`` build-in function.
|
260
|
+
|
261
|
+
:param params: A parameter that want to pass before run any statement.
|
262
|
+
:rtype: Result
|
263
|
+
"""
|
264
|
+
# NOTE: create custom globals value that will pass to exec function.
|
265
|
+
_globals: DictData = (
|
266
|
+
globals() | params | param2template(self.vars, params)
|
267
|
+
)
|
268
|
+
_locals: DictData = {}
|
269
|
+
try:
|
270
|
+
logging.info(f"[STAGE]: Py-Execute: {uuid.uuid4()}")
|
271
|
+
exec(param2template(self.run, params), _globals, _locals)
|
272
|
+
except Exception as err:
|
273
|
+
raise StageException(
|
274
|
+
f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
|
275
|
+
f"{self.run}"
|
276
|
+
) from None
|
277
|
+
return Result(
|
278
|
+
status=0,
|
279
|
+
context={"locals": _locals, "globals": _globals},
|
280
|
+
)
|
281
|
+
|
282
|
+
|
283
|
+
@dataclass
|
284
|
+
class HookSearch:
|
285
|
+
"""Hook Search dataclass."""
|
286
|
+
|
287
|
+
path: str
|
288
|
+
func: str
|
289
|
+
tag: str
|
290
|
+
|
291
|
+
|
292
|
+
class HookStage(BaseStage):
|
293
|
+
"""Hook executor that hook the Python function from registry with tag
|
294
|
+
decorator function in ``utils`` module and run it with input arguments.
|
295
|
+
|
296
|
+
This stage is different with PyStage because the PyStage is just calling
|
297
|
+
a Python statement with the ``eval`` and pass that locale before eval that
|
298
|
+
statement. So, you can create your function complexly that you can for your
|
299
|
+
propose to invoked by this stage object.
|
300
|
+
|
301
|
+
Data Validate:
|
302
|
+
>>> stage = {
|
303
|
+
... "name": "Task stage execution",
|
304
|
+
... "task": "tasks/function-name@tag-name",
|
305
|
+
... "args": {
|
306
|
+
... "FOO": "BAR",
|
307
|
+
... },
|
308
|
+
... }
|
309
|
+
"""
|
310
|
+
|
311
|
+
uses: str = Field(
|
312
|
+
description="A pointer that want to load function from registry",
|
313
|
+
)
|
314
|
+
args: DictData = Field(alias="with")
|
315
|
+
|
316
|
+
@staticmethod
|
317
|
+
def extract_hook(hook: str) -> Callable[[], TagFunc]:
|
318
|
+
"""Extract Hook string value to hook function.
|
319
|
+
|
320
|
+
:param hook: A hook value that able to match with Task regex.
|
321
|
+
"""
|
322
|
+
if not (found := Re.RE_TASK_FMT.search(hook)):
|
323
|
+
raise ValueError("Task does not match with task format regex.")
|
324
|
+
|
325
|
+
# NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
|
326
|
+
hook: HookSearch = HookSearch(**found.groupdict())
|
327
|
+
|
328
|
+
# NOTE: Registry object should implement on this package only.
|
329
|
+
rgt: dict[str, Registry] = make_registry(f"{hook.path}")
|
330
|
+
if hook.func not in rgt:
|
331
|
+
raise NotImplementedError(
|
332
|
+
f"``REGISTER-MODULES.{hook.path}.registries`` does not "
|
333
|
+
f"implement registry: {hook.func!r}."
|
334
|
+
)
|
335
|
+
|
336
|
+
if hook.tag not in rgt[hook.func]:
|
337
|
+
raise NotImplementedError(
|
338
|
+
f"tag: {hook.tag!r} does not found on registry func: "
|
339
|
+
f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
|
340
|
+
)
|
341
|
+
return rgt[hook.func][hook.tag]
|
342
|
+
|
343
|
+
def execute(self, params: DictData) -> Result:
|
344
|
+
"""Execute the Task function that already mark registry.
|
345
|
+
|
346
|
+
:param params: A parameter that want to pass before run any statement.
|
347
|
+
:type params: DictData
|
348
|
+
:rtype: Result
|
349
|
+
"""
|
350
|
+
t_func: TagFunc = self.extract_hook(param2template(self.uses, params))()
|
351
|
+
if not callable(t_func):
|
352
|
+
raise ImportError("Hook caller function does not callable.")
|
353
|
+
|
354
|
+
args: DictData = param2template(self.args, params)
|
355
|
+
# VALIDATE: check input task caller parameters that exists before
|
356
|
+
# calling.
|
357
|
+
ips = inspect.signature(t_func)
|
358
|
+
if any(
|
359
|
+
k not in args
|
360
|
+
for k in ips.parameters
|
361
|
+
if ips.parameters[k].default == Parameter.empty
|
362
|
+
):
|
363
|
+
raise ValueError(
|
364
|
+
f"Necessary params, ({', '.join(ips.parameters.keys())}), "
|
365
|
+
f"does not set to args"
|
366
|
+
)
|
367
|
+
|
368
|
+
try:
|
369
|
+
logging.info(f"[STAGE]: Hook-Execute: {t_func.name}@{t_func.tag}")
|
370
|
+
rs: DictData = t_func(**param2template(args, params))
|
371
|
+
except Exception as err:
|
372
|
+
raise StageException(f"{err.__class__.__name__}: {err}") from err
|
373
|
+
return Result(status=0, context=rs)
|
374
|
+
|
375
|
+
|
376
|
+
class TriggerStage(BaseStage):
|
377
|
+
"""Trigger Pipeline execution stage that execute another pipeline object."""
|
378
|
+
|
379
|
+
trigger: str = Field(description="A trigger pipeline name.")
|
380
|
+
params: DictData = Field(default_factory=dict)
|
381
|
+
|
382
|
+
def execute(self, params: DictData) -> Result:
|
383
|
+
"""Trigger execution.
|
384
|
+
|
385
|
+
:param params: A parameter data that want to use in this execution.
|
386
|
+
:rtype: Result
|
387
|
+
"""
|
388
|
+
from .pipeline import Pipeline
|
389
|
+
|
390
|
+
pipe: Pipeline = Pipeline.from_loader(name=self.trigger, externals={})
|
391
|
+
rs = pipe.execute(params=self.params)
|
392
|
+
return Result(status=0, context=rs)
|
393
|
+
|
394
|
+
|
395
|
+
# NOTE: Order of parsing stage data
|
396
|
+
Stage = Union[
|
397
|
+
PyStage,
|
398
|
+
BashStage,
|
399
|
+
HookStage,
|
400
|
+
TriggerStage,
|
401
|
+
EmptyStage,
|
402
|
+
]
|