ddeutil-workflow 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +9 -0
- ddeutil/workflow/__types.py +43 -1
- ddeutil/workflow/exceptions.py +13 -1
- ddeutil/workflow/loader.py +13 -115
- ddeutil/workflow/on.py +78 -26
- ddeutil/workflow/pipeline.py +341 -392
- ddeutil/workflow/{__scheduler.py → scheduler.py} +73 -45
- ddeutil/workflow/stage.py +402 -0
- ddeutil/workflow/utils.py +205 -35
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/METADATA +95 -66
- ddeutil_workflow-0.0.6.dist-info/RECORD +15 -0
- ddeutil/workflow/__regex.py +0 -44
- ddeutil/workflow/tasks/__init__.py +0 -6
- ddeutil/workflow/tasks/dummy.py +0 -52
- ddeutil_workflow-0.0.5.dist-info/RECORD +0 -17
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.6.dist-info}/top_level.txt +0 -0
ddeutil/workflow/utils.py
CHANGED
@@ -6,22 +6,104 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import inspect
|
9
|
+
import os
|
9
10
|
import stat
|
10
11
|
from abc import ABC, abstractmethod
|
12
|
+
from collections.abc import Iterator
|
13
|
+
from dataclasses import dataclass, field
|
11
14
|
from datetime import date, datetime
|
12
15
|
from functools import wraps
|
16
|
+
from hashlib import md5
|
13
17
|
from importlib import import_module
|
18
|
+
from itertools import product
|
14
19
|
from pathlib import Path
|
15
20
|
from typing import Any, Callable, Literal, Optional, Protocol, Union
|
21
|
+
from zoneinfo import ZoneInfo
|
16
22
|
|
17
|
-
import
|
18
|
-
from ddeutil.
|
23
|
+
from ddeutil.core import getdot, hasdot, lazy
|
24
|
+
from ddeutil.io import PathData
|
19
25
|
from ddeutil.io.models.lineage import dt_now
|
20
26
|
from pydantic import BaseModel, Field
|
21
27
|
from pydantic.functional_validators import model_validator
|
22
28
|
from typing_extensions import Self
|
23
29
|
|
24
|
-
from .__types import DictData
|
30
|
+
from .__types import DictData, Matrix, Re
|
31
|
+
|
32
|
+
|
33
|
+
class Engine(BaseModel):
|
34
|
+
"""Engine Model"""
|
35
|
+
|
36
|
+
paths: PathData = Field(default_factory=PathData)
|
37
|
+
registry: list[str] = Field(
|
38
|
+
default_factory=lambda: [
|
39
|
+
"ddeutil.workflow",
|
40
|
+
],
|
41
|
+
)
|
42
|
+
|
43
|
+
@model_validator(mode="before")
|
44
|
+
def __prepare_registry(cls, values: DictData) -> DictData:
|
45
|
+
"""Prepare registry value that passing with string type. It convert the
|
46
|
+
string type to list of string.
|
47
|
+
"""
|
48
|
+
if (_regis := values.get("registry")) and isinstance(_regis, str):
|
49
|
+
values["registry"] = [_regis]
|
50
|
+
return values
|
51
|
+
|
52
|
+
|
53
|
+
class ConfParams(BaseModel):
|
54
|
+
"""Params Model"""
|
55
|
+
|
56
|
+
engine: Engine = Field(
|
57
|
+
default_factory=Engine,
|
58
|
+
description="A engine mapping values.",
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
def config() -> ConfParams:
|
63
|
+
"""Load Config data from ``workflows-conf.yaml`` file."""
|
64
|
+
root_path: str = os.getenv("WORKFLOW_ROOT_PATH", ".")
|
65
|
+
|
66
|
+
regis: list[str] = []
|
67
|
+
if regis_env := os.getenv("WORKFLOW_CORE_REGISTRY"):
|
68
|
+
regis = [r.strip() for r in regis_env.split(",")]
|
69
|
+
|
70
|
+
conf_path: str = (
|
71
|
+
f"{root_path}/{conf_env}"
|
72
|
+
if (conf_env := os.getenv("WORKFLOW_CORE_PATH_CONF"))
|
73
|
+
else None
|
74
|
+
)
|
75
|
+
return ConfParams.model_validate(
|
76
|
+
obj={
|
77
|
+
"engine": {
|
78
|
+
"registry": regis,
|
79
|
+
"paths": {
|
80
|
+
"root": root_path,
|
81
|
+
"conf": conf_path,
|
82
|
+
},
|
83
|
+
},
|
84
|
+
}
|
85
|
+
)
|
86
|
+
|
87
|
+
|
88
|
+
def gen_id(value: Any, *, sensitive: bool = True, unique: bool = False) -> str:
|
89
|
+
"""Generate running ID for able to tracking. This generate process use `md5`
|
90
|
+
function.
|
91
|
+
|
92
|
+
:param value:
|
93
|
+
:param sensitive:
|
94
|
+
:param unique:
|
95
|
+
:rtype: str
|
96
|
+
"""
|
97
|
+
if not isinstance(value, str):
|
98
|
+
value: str = str(value)
|
99
|
+
|
100
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
101
|
+
return md5(
|
102
|
+
(
|
103
|
+
f"{(value if sensitive else value.lower())}"
|
104
|
+
+ (f"{datetime.now(tz=tz):%Y%m%d%H%M%S%f}" if unique else "")
|
105
|
+
).encode()
|
106
|
+
).hexdigest()
|
25
107
|
|
26
108
|
|
27
109
|
class TagFunc(Protocol):
|
@@ -49,37 +131,52 @@ def tag(value: str, name: str | None = None):
|
|
49
131
|
def wrapped(*args, **kwargs):
|
50
132
|
return func(*args, **kwargs)
|
51
133
|
|
134
|
+
# TODO: pass result from a wrapped to Result model
|
135
|
+
# >>> return Result.model_validate(obj=wrapped)
|
52
136
|
return wrapped
|
53
137
|
|
54
138
|
return func_internal
|
55
139
|
|
56
140
|
|
57
|
-
|
141
|
+
Registry = dict[str, Callable[[], TagFunc]]
|
142
|
+
|
143
|
+
|
144
|
+
def make_registry(submodule: str) -> dict[str, Registry]:
|
58
145
|
"""Return registries of all functions that able to called with task.
|
59
146
|
|
60
|
-
:param
|
147
|
+
:param submodule: A module prefix that want to import registry.
|
61
148
|
"""
|
62
|
-
rs: dict[str,
|
63
|
-
for
|
64
|
-
|
65
|
-
|
66
|
-
|
149
|
+
rs: dict[str, Registry] = {}
|
150
|
+
for module in config().engine.registry:
|
151
|
+
# NOTE: try to sequential import task functions
|
152
|
+
try:
|
153
|
+
importer = import_module(f"{module}.{submodule}")
|
154
|
+
except ModuleNotFoundError:
|
67
155
|
continue
|
68
156
|
|
69
|
-
|
157
|
+
for fstr, func in inspect.getmembers(importer, inspect.isfunction):
|
158
|
+
# NOTE: check function attribute that already set tag by
|
159
|
+
# ``utils.tag`` decorator.
|
160
|
+
if not hasattr(func, "tag"):
|
161
|
+
continue
|
162
|
+
|
163
|
+
# NOTE: Create new register name if it not exists
|
164
|
+
if func.name not in rs:
|
165
|
+
rs[func.name] = {func.tag: lazy(f"{module}.{submodule}.{fstr}")}
|
166
|
+
continue
|
167
|
+
|
70
168
|
if func.tag in rs[func.name]:
|
71
169
|
raise ValueError(
|
72
|
-
f"The tag {func.tag!r} already exists on
|
170
|
+
f"The tag {func.tag!r} already exists on "
|
171
|
+
f"{module}.{submodule}, you should change this tag name or "
|
172
|
+
f"change it func name."
|
73
173
|
)
|
74
|
-
rs[func.name][func.tag] = lazy(f"{module}.{fstr}")
|
75
|
-
continue
|
174
|
+
rs[func.name][func.tag] = lazy(f"{module}.{submodule}.{fstr}")
|
76
175
|
|
77
|
-
# NOTE: Create new register name if it not exists
|
78
|
-
rs[func.name] = {func.tag: lazy(f"{module}.{fstr}")}
|
79
176
|
return rs
|
80
177
|
|
81
178
|
|
82
|
-
class
|
179
|
+
class BaseParam(BaseModel, ABC):
|
83
180
|
"""Base Parameter that use to make Params Model."""
|
84
181
|
|
85
182
|
desc: Optional[str] = None
|
@@ -93,7 +190,7 @@ class BaseParams(BaseModel, ABC):
|
|
93
190
|
)
|
94
191
|
|
95
192
|
|
96
|
-
class
|
193
|
+
class DefaultParam(BaseParam):
|
97
194
|
"""Default Parameter that will check default if it required"""
|
98
195
|
|
99
196
|
default: Optional[str] = None
|
@@ -113,7 +210,7 @@ class DefaultParams(BaseParams):
|
|
113
210
|
return self
|
114
211
|
|
115
212
|
|
116
|
-
class
|
213
|
+
class DatetimeParam(DefaultParam):
|
117
214
|
"""Datetime parameter."""
|
118
215
|
|
119
216
|
type: Literal["datetime"] = "datetime"
|
@@ -136,7 +233,7 @@ class DatetimeParams(DefaultParams):
|
|
136
233
|
return datetime.fromisoformat(value)
|
137
234
|
|
138
235
|
|
139
|
-
class
|
236
|
+
class StrParam(DefaultParam):
|
140
237
|
"""String parameter."""
|
141
238
|
|
142
239
|
type: Literal["str"] = "str"
|
@@ -147,7 +244,7 @@ class StrParams(DefaultParams):
|
|
147
244
|
return str(value)
|
148
245
|
|
149
246
|
|
150
|
-
class
|
247
|
+
class IntParam(DefaultParam):
|
151
248
|
"""Integer parameter."""
|
152
249
|
|
153
250
|
type: Literal["int"] = "int"
|
@@ -166,7 +263,7 @@ class IntParams(DefaultParams):
|
|
166
263
|
return value
|
167
264
|
|
168
265
|
|
169
|
-
class
|
266
|
+
class ChoiceParam(BaseParam):
|
170
267
|
type: Literal["choice"] = "choice"
|
171
268
|
options: list[str]
|
172
269
|
|
@@ -181,28 +278,101 @@ class ChoiceParams(BaseParams):
|
|
181
278
|
return value
|
182
279
|
|
183
280
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
281
|
+
Param = Union[
|
282
|
+
ChoiceParam,
|
283
|
+
DatetimeParam,
|
284
|
+
StrParam,
|
188
285
|
]
|
189
286
|
|
190
287
|
|
288
|
+
@dataclass
|
289
|
+
class Result:
|
290
|
+
"""Result Dataclass object for passing parameter and receiving output from
|
291
|
+
the pipeline execution.
|
292
|
+
"""
|
293
|
+
|
294
|
+
status: int = field(default=2)
|
295
|
+
context: DictData = field(default_factory=dict)
|
296
|
+
|
297
|
+
|
191
298
|
def make_exec(path: str | Path):
|
192
299
|
"""Change mode of file to be executable file."""
|
193
300
|
f: Path = Path(path) if isinstance(path, str) else path
|
194
301
|
f.chmod(f.stat().st_mode | stat.S_IEXEC)
|
195
302
|
|
196
303
|
|
197
|
-
|
198
|
-
|
199
|
-
|
304
|
+
def param2template(
|
305
|
+
value: Any,
|
306
|
+
params: dict[str, Any],
|
307
|
+
*,
|
308
|
+
repr_flag: bool = False,
|
309
|
+
) -> Any:
|
310
|
+
"""Pass param to template string that can search by ``RE_CALLER`` regular
|
311
|
+
expression.
|
312
|
+
|
313
|
+
:param value: A value that want to mapped with an params
|
314
|
+
:param params: A parameter value that getting with matched regular
|
315
|
+
expression.
|
316
|
+
:param repr_flag: A repr flag for using repr instead of str if it set be
|
317
|
+
true.
|
318
|
+
|
319
|
+
:rtype: Any
|
320
|
+
:returns: An any getter value from the params input.
|
200
321
|
"""
|
322
|
+
if isinstance(value, dict):
|
323
|
+
return {k: param2template(value[k], params) for k in value}
|
324
|
+
elif isinstance(value, (list, tuple, set)):
|
325
|
+
return type(value)([param2template(i, params) for i in value])
|
326
|
+
elif not isinstance(value, str):
|
327
|
+
return value
|
201
328
|
|
202
|
-
|
203
|
-
|
204
|
-
|
329
|
+
if not Re.RE_CALLER.search(value):
|
330
|
+
return value
|
331
|
+
|
332
|
+
for found in Re.RE_CALLER.finditer(value):
|
205
333
|
|
206
|
-
|
207
|
-
|
208
|
-
|
334
|
+
# NOTE: get caller value that setting inside; ``${{ <caller-value> }}``
|
335
|
+
caller: str = found.group("caller")
|
336
|
+
if not hasdot(caller, params):
|
337
|
+
raise ValueError(f"params does not set caller: {caller!r}")
|
338
|
+
|
339
|
+
getter: Any = getdot(caller, params)
|
340
|
+
|
341
|
+
# NOTE: check type of vars
|
342
|
+
if isinstance(getter, (str, int)):
|
343
|
+
value: str = value.replace(
|
344
|
+
found.group(0), (repr(getter) if repr_flag else str(getter)), 1
|
345
|
+
)
|
346
|
+
continue
|
347
|
+
|
348
|
+
# NOTE:
|
349
|
+
# If type of getter caller does not formatting, it will return origin
|
350
|
+
# value from the ``getdot`` function.
|
351
|
+
if value.replace(found.group(0), "", 1) != "":
|
352
|
+
raise ValueError(
|
353
|
+
"Callable variable should not pass other outside ${{ ... }}"
|
354
|
+
)
|
355
|
+
return getter
|
356
|
+
return value
|
357
|
+
|
358
|
+
|
359
|
+
def dash2underscore(
|
360
|
+
key: str,
|
361
|
+
values: DictData,
|
362
|
+
*,
|
363
|
+
fixed: str | None = None,
|
364
|
+
) -> DictData:
|
365
|
+
"""Change key name that has dash to underscore."""
|
366
|
+
if key in values:
|
367
|
+
values[(fixed or key.replace("-", "_"))] = values.pop(key)
|
368
|
+
return values
|
369
|
+
|
370
|
+
|
371
|
+
def cross_product(matrix: Matrix) -> Iterator:
|
372
|
+
"""Iterator of products value from matrix."""
|
373
|
+
yield from (
|
374
|
+
{_k: _v for e in mapped for _k, _v in e.items()}
|
375
|
+
for mapped in product(
|
376
|
+
*[[{k: v} for v in vs] for k, vs in matrix.items()]
|
377
|
+
)
|
378
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: Data Developer & Engineer Workflow Utility Objects
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -24,30 +24,32 @@ License-File: LICENSE
|
|
24
24
|
Requires-Dist: fmtutil
|
25
25
|
Requires-Dist: ddeutil-io
|
26
26
|
Requires-Dist: python-dotenv ==1.0.1
|
27
|
-
|
27
|
+
Provides-Extra: app
|
28
|
+
Requires-Dist: fastapi ==0.112.0 ; extra == 'app'
|
29
|
+
Requires-Dist: apscheduler[sqlalchemy] ==3.10.4 ; extra == 'app'
|
28
30
|
|
29
|
-
#
|
31
|
+
# Workflow
|
30
32
|
|
31
33
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
32
34
|
[](https://pypi.org/project/ddeutil-workflow/)
|
33
35
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
34
36
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
35
37
|
|
36
|
-
|
37
38
|
**Table of Contents**:
|
38
39
|
|
39
40
|
- [Installation](#installation)
|
40
41
|
- [Getting Started](#getting-started)
|
41
|
-
|
42
|
-
- [
|
43
|
-
- [
|
44
|
-
- [
|
45
|
-
- [Python &
|
46
|
-
- [
|
47
|
-
- [
|
42
|
+
- [Core Features](#core-features)
|
43
|
+
- [On](#on)
|
44
|
+
- [Pipeline](#pipeline)
|
45
|
+
- [Usage](#usage)
|
46
|
+
- [Python & Bash](#python--bash)
|
47
|
+
- [Hook (EL)](#hook-extract--load)
|
48
|
+
- [Hook (T)](#hook-transform)
|
48
49
|
- [Configuration](#configuration)
|
50
|
+
- [Deployment](#deployment)
|
49
51
|
|
50
|
-
This **
|
52
|
+
This **Workflow** objects was created for easy to make a simple metadata
|
51
53
|
driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
|
52
54
|
|
53
55
|
I think we should not create the multiple pipeline per use-case if we able to
|
@@ -70,7 +72,12 @@ pipeline.
|
|
70
72
|
pip install ddeutil-workflow
|
71
73
|
```
|
72
74
|
|
73
|
-
This project need `ddeutil-io
|
75
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
76
|
+
this package with application add-ons, you should add `app` in installation;
|
77
|
+
|
78
|
+
```shell
|
79
|
+
pip install ddeutil-workflow[app]
|
80
|
+
```
|
74
81
|
|
75
82
|
## Getting Started
|
76
83
|
|
@@ -87,38 +94,42 @@ will passing parameters and catching the output for re-use it to next step.
|
|
87
94
|
> dynamic registries instead of main features because it have a lot of maintain
|
88
95
|
> vendor codes and deps. (I do not have time to handle this features)
|
89
96
|
|
90
|
-
|
97
|
+
### On
|
91
98
|
|
92
|
-
|
99
|
+
The **On** is schedule object.
|
93
100
|
|
94
101
|
```yaml
|
95
|
-
|
96
|
-
type:
|
102
|
+
on_every_5_min:
|
103
|
+
type: on.On
|
97
104
|
cron: "*/5 * * * *"
|
98
105
|
```
|
99
106
|
|
100
107
|
```python
|
101
|
-
from ddeutil.workflow.on import
|
108
|
+
from ddeutil.workflow.on import On
|
102
109
|
|
103
|
-
|
104
|
-
assert '*/5 * * * *' == str(
|
110
|
+
schedule = On.from_loader(name='on_every_5_min', externals={})
|
111
|
+
assert '*/5 * * * *' == str(schedule.cronjob)
|
105
112
|
|
106
|
-
|
107
|
-
assert '2022-01-01 00:05:00' f"{
|
108
|
-
assert '2022-01-01 00:10:00' f"{
|
109
|
-
assert '2022-01-01 00:15:00' f"{
|
110
|
-
assert '2022-01-01 00:20:00' f"{
|
111
|
-
assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
|
113
|
+
cron_iter = schedule.generate('2022-01-01 00:00:00')
|
114
|
+
assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
115
|
+
assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
116
|
+
assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
117
|
+
assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
112
118
|
```
|
113
119
|
|
114
|
-
---
|
115
|
-
|
116
120
|
### Pipeline
|
117
121
|
|
122
|
+
The **Pipeline** object that is the core feature of this project.
|
123
|
+
|
118
124
|
```yaml
|
119
125
|
run_py_local:
|
120
126
|
type: ddeutil.workflow.pipeline.Pipeline
|
121
|
-
|
127
|
+
on: 'on_every_5_min'
|
128
|
+
params:
|
129
|
+
author-run:
|
130
|
+
type: str
|
131
|
+
run-date:
|
132
|
+
type: datetime
|
122
133
|
```
|
123
134
|
|
124
135
|
```python
|
@@ -128,27 +139,39 @@ pipe = Pipeline.from_loader(name='run_py_local', externals={})
|
|
128
139
|
pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
129
140
|
```
|
130
141
|
|
131
|
-
|
142
|
+
> [!NOTE]
|
143
|
+
> The above parameter use short declarative statement. You can pass a parameter
|
144
|
+
> type to the key of a parameter name.
|
145
|
+
> ```yaml
|
146
|
+
> params:
|
147
|
+
> author-run: str
|
148
|
+
> run-date: datetime
|
149
|
+
> ```
|
150
|
+
>
|
151
|
+
> And for the type, you can remove `ddeutil.workflow` prefix because we can find
|
152
|
+
> it by looping search from `WORKFLOW_CORE_REGISTRY` value.
|
153
|
+
|
154
|
+
## Usage
|
132
155
|
|
133
156
|
This is examples that use workflow file for running common Data Engineering
|
134
157
|
use-case.
|
135
158
|
|
136
|
-
|
159
|
+
> [!IMPORTANT]
|
160
|
+
> I recommend you to use `task` stage for all actions that you want to do with
|
161
|
+
> pipeline object.
|
137
162
|
|
138
|
-
|
163
|
+
### Python & Bash
|
139
164
|
|
140
165
|
```yaml
|
141
166
|
run_py_local:
|
142
|
-
type:
|
167
|
+
type: pipeline.Pipeline
|
143
168
|
params:
|
144
|
-
author-run:
|
145
|
-
|
146
|
-
run-date:
|
147
|
-
type: datetime
|
169
|
+
author-run: str
|
170
|
+
run-date: datetime
|
148
171
|
jobs:
|
149
172
|
first-job:
|
150
173
|
stages:
|
151
|
-
- name: Printing Information
|
174
|
+
- name: "Printing Information"
|
152
175
|
id: define-func
|
153
176
|
run: |
|
154
177
|
x = '${{ params.author-run }}'
|
@@ -157,7 +180,7 @@ run_py_local:
|
|
157
180
|
def echo(name: str):
|
158
181
|
print(f'Hello {name}')
|
159
182
|
|
160
|
-
- name: Run Sequence and use var from Above
|
183
|
+
- name: "Run Sequence and use var from Above"
|
161
184
|
vars:
|
162
185
|
x: ${{ params.author-run }}
|
163
186
|
run: |
|
@@ -165,16 +188,16 @@ run_py_local:
|
|
165
188
|
# Change x value
|
166
189
|
x: int = 1
|
167
190
|
|
168
|
-
- name: Call Function
|
191
|
+
- name: "Call Function"
|
169
192
|
vars:
|
170
193
|
echo: ${{ stages.define-func.outputs.echo }}
|
171
194
|
run: |
|
172
195
|
echo('Caller')
|
173
196
|
second-job:
|
174
197
|
stages:
|
175
|
-
- name: Echo
|
198
|
+
- name: "Echo Bash Script"
|
176
199
|
id: shell-echo
|
177
|
-
|
200
|
+
bash: |
|
178
201
|
echo "Hello World from Shell"
|
179
202
|
```
|
180
203
|
|
@@ -192,24 +215,20 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
|
192
215
|
> Hello World from Shell
|
193
216
|
```
|
194
217
|
|
195
|
-
|
196
|
-
|
197
|
-
### Tasks (Extract & Load)
|
218
|
+
### Hook (Extract & Load)
|
198
219
|
|
199
220
|
```yaml
|
200
221
|
pipe_el_pg_to_lake:
|
201
|
-
type:
|
222
|
+
type: pipeline.Pipeline
|
202
223
|
params:
|
203
|
-
run-date:
|
204
|
-
|
205
|
-
author-email:
|
206
|
-
type: str
|
224
|
+
run-date: datetime
|
225
|
+
author-email: str
|
207
226
|
jobs:
|
208
227
|
extract-load:
|
209
228
|
stages:
|
210
229
|
- name: "Extract Load from Postgres to Lake"
|
211
230
|
id: extract-load
|
212
|
-
|
231
|
+
uses: tasks/postgres-to-delta@polars
|
213
232
|
with:
|
214
233
|
source:
|
215
234
|
conn: conn_postgres_url
|
@@ -221,15 +240,11 @@ pipe_el_pg_to_lake:
|
|
221
240
|
endpoint: "/${{ params.name }}"
|
222
241
|
```
|
223
242
|
|
224
|
-
|
225
|
-
|
226
|
-
### Tasks (Transform)
|
227
|
-
|
228
|
-
> I recommend you to use task for all actions that you want to do.
|
243
|
+
### Hook (Transform)
|
229
244
|
|
230
245
|
```yaml
|
231
|
-
|
232
|
-
type:
|
246
|
+
pipeline_hook_mssql_proc:
|
247
|
+
type: pipeline.Pipeline
|
233
248
|
params:
|
234
249
|
run_date: datetime
|
235
250
|
sp_name: str
|
@@ -240,7 +255,7 @@ pipe_hook_mssql_proc:
|
|
240
255
|
stages:
|
241
256
|
- name: "Transform Data in MS SQL Server"
|
242
257
|
id: transform
|
243
|
-
|
258
|
+
uses: tasks/mssql-proc@odbc
|
244
259
|
with:
|
245
260
|
exec: ${{ params.sp_name }}
|
246
261
|
params:
|
@@ -250,16 +265,30 @@ pipe_hook_mssql_proc:
|
|
250
265
|
target: ${{ params.target_name }}
|
251
266
|
```
|
252
267
|
|
253
|
-
> [!NOTE]
|
254
|
-
> The above parameter use short declarative statement. You can pass a parameter
|
255
|
-
> type to the key of a parameter name.
|
256
|
-
|
257
268
|
## Configuration
|
258
269
|
|
259
|
-
```
|
270
|
+
```bash
|
271
|
+
export WORKFLOW_ROOT_PATH=.
|
272
|
+
export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
|
273
|
+
export WORKFLOW_CORE_PATH_CONF=conf
|
274
|
+
```
|
275
|
+
|
276
|
+
Application config:
|
260
277
|
|
278
|
+
```bash
|
279
|
+
export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
|
280
|
+
export WORKFLOW_APP_INTERVAL=10
|
261
281
|
```
|
262
282
|
|
263
|
-
##
|
283
|
+
## Deployment
|
264
284
|
|
265
|
-
This
|
285
|
+
This package able to run as a application service for receive manual trigger
|
286
|
+
from the master node via RestAPI.
|
287
|
+
|
288
|
+
> [!WARNING]
|
289
|
+
> This feature do not start yet because I still research and find the best tool
|
290
|
+
> to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
|
291
|
+
|
292
|
+
```shell
|
293
|
+
(venv) $ workflow start -p 7070
|
294
|
+
```
|
@@ -0,0 +1,15 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=VEYa91VchyTUnF57lvvquHvitTViBxxLXuhcEnr4TAY,27
|
2
|
+
ddeutil/workflow/__init__.py,sha256=Y5wLiJ0zS1CfoSOZ0oo7OL3LNMKvPmpUO4fVHuAOv8E,429
|
3
|
+
ddeutil/workflow/__types.py,sha256=PfwDZBnTwe2JImD7UFS0J6Nq-1TcjBGebOVzJZoSuTQ,1354
|
4
|
+
ddeutil/workflow/exceptions.py,sha256=9O12c4aNLi0dyjVBgCLveV3HN9PXcZfwFfLXdgm3Ffs,626
|
5
|
+
ddeutil/workflow/loader.py,sha256=_ZD-XP5P7VbUeqItrUVPaKIZu6dMUZ2aywbCbReW1hQ,2778
|
6
|
+
ddeutil/workflow/on.py,sha256=YoEqDbzJUwqOA3JRltbvlYr0rNTtxdmb7cWMxl8U19k,6717
|
7
|
+
ddeutil/workflow/pipeline.py,sha256=8mIvY34_fsiqscBa9JB94MgN3Km5fkuD2iaNZSAQVuM,17843
|
8
|
+
ddeutil/workflow/scheduler.py,sha256=FqmkvWCqwJ4eRf8aDn5Ce4FcNWqmcvu2aTTfL34lfgs,22184
|
9
|
+
ddeutil/workflow/stage.py,sha256=bDJiGS21gYlYbFDnLTKH9aIbXfej9fT-V1ADoPX7w4s,13829
|
10
|
+
ddeutil/workflow/utils.py,sha256=xapKxxnqIzlbKA45GaRcWn-VL30AhE7M8f46ynr-vbI,11173
|
11
|
+
ddeutil_workflow-0.0.6.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
12
|
+
ddeutil_workflow-0.0.6.dist-info/METADATA,sha256=5X6ewXGn96MR9rDhVmmaoTuIwuRmfi72t7ezM8wxbvw,8612
|
13
|
+
ddeutil_workflow-0.0.6.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
14
|
+
ddeutil_workflow-0.0.6.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
15
|
+
ddeutil_workflow-0.0.6.dist-info/RECORD,,
|
ddeutil/workflow/__regex.py
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
# -------------------------------------------------------------------------
|
2
|
-
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
-
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
-
# license information.
|
5
|
-
# --------------------------------------------------------------------------
|
6
|
-
import re
|
7
|
-
from re import (
|
8
|
-
IGNORECASE,
|
9
|
-
MULTILINE,
|
10
|
-
UNICODE,
|
11
|
-
VERBOSE,
|
12
|
-
Pattern,
|
13
|
-
)
|
14
|
-
|
15
|
-
|
16
|
-
class RegexConf:
|
17
|
-
"""Regular expression config."""
|
18
|
-
|
19
|
-
# NOTE: Search caller
|
20
|
-
__re_caller: str = r"""
|
21
|
-
\$
|
22
|
-
{{
|
23
|
-
\s*(?P<caller>
|
24
|
-
[a-zA-Z0-9_.\s'\"\[\]\(\)\-\{}]+?
|
25
|
-
)\s*
|
26
|
-
}}
|
27
|
-
"""
|
28
|
-
RE_CALLER: Pattern = re.compile(
|
29
|
-
__re_caller, MULTILINE | IGNORECASE | UNICODE | VERBOSE
|
30
|
-
)
|
31
|
-
|
32
|
-
# NOTE: Search task
|
33
|
-
__re_task_fmt: str = r"""
|
34
|
-
^
|
35
|
-
(?P<path>[^/@]+)
|
36
|
-
/
|
37
|
-
(?P<func>[^@]+)
|
38
|
-
@
|
39
|
-
(?P<tag>.+)
|
40
|
-
$
|
41
|
-
"""
|
42
|
-
RE_TASK_FMT: Pattern = re.compile(
|
43
|
-
__re_task_fmt, MULTILINE | IGNORECASE | UNICODE | VERBOSE
|
44
|
-
)
|