ddeutil-workflow 0.0.5__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddeutil_workflow-0.0.5/src/ddeutil_workflow.egg-info → ddeutil_workflow-0.0.6}/PKG-INFO +95 -66
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/README.md +91 -64
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/pyproject.toml +6 -1
- ddeutil_workflow-0.0.6/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.5/src/ddeutil/workflow/tasks → ddeutil_workflow-0.0.6/src/ddeutil/workflow}/__init__.py +4 -1
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/__regex.py → ddeutil_workflow-0.0.6/src/ddeutil/workflow/__types.py +13 -3
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/src/ddeutil/workflow/exceptions.py +13 -1
- ddeutil_workflow-0.0.6/src/ddeutil/workflow/loader.py +80 -0
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/src/ddeutil/workflow/on.py +78 -26
- ddeutil_workflow-0.0.6/src/ddeutil/workflow/pipeline.py +497 -0
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/__scheduler.py → ddeutil_workflow-0.0.6/src/ddeutil/workflow/scheduler.py +73 -45
- ddeutil_workflow-0.0.6/src/ddeutil/workflow/stage.py +402 -0
- ddeutil_workflow-0.0.6/src/ddeutil/workflow/utils.py +378 -0
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6/src/ddeutil_workflow.egg-info}/PKG-INFO +95 -66
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/src/ddeutil_workflow.egg-info/SOURCES.txt +13 -10
- ddeutil_workflow-0.0.6/src/ddeutil_workflow.egg-info/requires.txt +7 -0
- ddeutil_workflow-0.0.6/tests/test__conf_exist.py +11 -0
- ddeutil_workflow-0.0.5/tests/test_base_regex.py → ddeutil_workflow-0.0.6/tests/test__regex.py +11 -3
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/tests/test_on.py +14 -5
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/tests/test_pipeline.py +3 -9
- ddeutil_workflow-0.0.6/tests/test_pipeline_desc.py +11 -0
- ddeutil_workflow-0.0.6/tests/test_pipeline_if.py +28 -0
- ddeutil_workflow-0.0.6/tests/test_pipeline_matrix.py +87 -0
- ddeutil_workflow-0.0.6/tests/test_pipeline_on.py +12 -0
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/tests/test_pipeline_params.py +1 -1
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/tests/test_pipeline_run.py +40 -45
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/tests/test_pipeline_task.py +10 -12
- ddeutil_workflow-0.0.5/tests/test_base_schedule.py → ddeutil_workflow-0.0.6/tests/test_scheduler.py +61 -18
- ddeutil_workflow-0.0.6/tests/test_stage_trigger.py +10 -0
- ddeutil_workflow-0.0.6/tests/test_utils.py +8 -0
- ddeutil_workflow-0.0.6/tests/test_utils_result.py +22 -0
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/__init__.py +0 -0
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/__types.py +0 -12
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/loader.py +0 -182
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/pipeline.py +0 -548
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/tasks/dummy.py +0 -52
- ddeutil_workflow-0.0.5/src/ddeutil/workflow/utils.py +0 -208
- ddeutil_workflow-0.0.5/src/ddeutil_workflow.egg-info/requires.txt +0 -4
- ddeutil_workflow-0.0.5/tests/test_base_data.py +0 -13
- ddeutil_workflow-0.0.5/tests/test_loader.py +0 -6
- ddeutil_workflow-0.0.5/tests/test_pipeline_matrix.py +0 -29
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/LICENSE +0 -0
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.6}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- /ddeutil_workflow-0.0.5/tests/test_base_local_and_global.py → /ddeutil_workflow-0.0.6/tests/test__local_and_global.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: Data Developer & Engineer Workflow Utility Objects
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -24,30 +24,32 @@ License-File: LICENSE
|
|
24
24
|
Requires-Dist: fmtutil
|
25
25
|
Requires-Dist: ddeutil-io
|
26
26
|
Requires-Dist: python-dotenv==1.0.1
|
27
|
-
|
27
|
+
Provides-Extra: app
|
28
|
+
Requires-Dist: fastapi==0.112.0; extra == "app"
|
29
|
+
Requires-Dist: apscheduler[sqlalchemy]==3.10.4; extra == "app"
|
28
30
|
|
29
|
-
#
|
31
|
+
# Workflow
|
30
32
|
|
31
33
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
32
34
|
[](https://pypi.org/project/ddeutil-workflow/)
|
33
35
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
34
36
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
35
37
|
|
36
|
-
|
37
38
|
**Table of Contents**:
|
38
39
|
|
39
40
|
- [Installation](#installation)
|
40
41
|
- [Getting Started](#getting-started)
|
41
|
-
|
42
|
-
- [
|
43
|
-
- [
|
44
|
-
- [
|
45
|
-
- [Python &
|
46
|
-
- [
|
47
|
-
- [
|
42
|
+
- [Core Features](#core-features)
|
43
|
+
- [On](#on)
|
44
|
+
- [Pipeline](#pipeline)
|
45
|
+
- [Usage](#usage)
|
46
|
+
- [Python & Bash](#python--bash)
|
47
|
+
- [Hook (EL)](#hook-extract--load)
|
48
|
+
- [Hook (T)](#hook-transform)
|
48
49
|
- [Configuration](#configuration)
|
50
|
+
- [Deployment](#deployment)
|
49
51
|
|
50
|
-
This **
|
52
|
+
This **Workflow** objects was created for easy to make a simple metadata
|
51
53
|
driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
|
52
54
|
|
53
55
|
I think we should not create the multiple pipeline per use-case if we able to
|
@@ -70,7 +72,12 @@ pipeline.
|
|
70
72
|
pip install ddeutil-workflow
|
71
73
|
```
|
72
74
|
|
73
|
-
This project need `ddeutil-io
|
75
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
76
|
+
this package with application add-ons, you should add `app` in installation;
|
77
|
+
|
78
|
+
```shell
|
79
|
+
pip install ddeutil-workflow[app]
|
80
|
+
```
|
74
81
|
|
75
82
|
## Getting Started
|
76
83
|
|
@@ -87,38 +94,42 @@ will passing parameters and catching the output for re-use it to next step.
|
|
87
94
|
> dynamic registries instead of main features because it have a lot of maintain
|
88
95
|
> vendor codes and deps. (I do not have time to handle this features)
|
89
96
|
|
90
|
-
|
97
|
+
### On
|
91
98
|
|
92
|
-
|
99
|
+
The **On** is schedule object.
|
93
100
|
|
94
101
|
```yaml
|
95
|
-
|
96
|
-
type:
|
102
|
+
on_every_5_min:
|
103
|
+
type: on.On
|
97
104
|
cron: "*/5 * * * *"
|
98
105
|
```
|
99
106
|
|
100
107
|
```python
|
101
|
-
from ddeutil.workflow.on import
|
108
|
+
from ddeutil.workflow.on import On
|
102
109
|
|
103
|
-
|
104
|
-
assert '*/5 * * * *' == str(
|
110
|
+
schedule = On.from_loader(name='on_every_5_min', externals={})
|
111
|
+
assert '*/5 * * * *' == str(schedule.cronjob)
|
105
112
|
|
106
|
-
|
107
|
-
assert '2022-01-01 00:05:00' f"{
|
108
|
-
assert '2022-01-01 00:10:00' f"{
|
109
|
-
assert '2022-01-01 00:15:00' f"{
|
110
|
-
assert '2022-01-01 00:20:00' f"{
|
111
|
-
assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
|
113
|
+
cron_iter = schedule.generate('2022-01-01 00:00:00')
|
114
|
+
assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
115
|
+
assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
116
|
+
assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
117
|
+
assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
112
118
|
```
|
113
119
|
|
114
|
-
---
|
115
|
-
|
116
120
|
### Pipeline
|
117
121
|
|
122
|
+
The **Pipeline** object that is the core feature of this project.
|
123
|
+
|
118
124
|
```yaml
|
119
125
|
run_py_local:
|
120
126
|
type: ddeutil.workflow.pipeline.Pipeline
|
121
|
-
|
127
|
+
on: 'on_every_5_min'
|
128
|
+
params:
|
129
|
+
author-run:
|
130
|
+
type: str
|
131
|
+
run-date:
|
132
|
+
type: datetime
|
122
133
|
```
|
123
134
|
|
124
135
|
```python
|
@@ -128,27 +139,39 @@ pipe = Pipeline.from_loader(name='run_py_local', externals={})
|
|
128
139
|
pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
129
140
|
```
|
130
141
|
|
131
|
-
|
142
|
+
> [!NOTE]
|
143
|
+
> The above parameter use short declarative statement. You can pass a parameter
|
144
|
+
> type to the key of a parameter name.
|
145
|
+
> ```yaml
|
146
|
+
> params:
|
147
|
+
> author-run: str
|
148
|
+
> run-date: datetime
|
149
|
+
> ```
|
150
|
+
>
|
151
|
+
> And for the type, you can remove `ddeutil.workflow` prefix because we can find
|
152
|
+
> it by looping search from `WORKFLOW_CORE_REGISTRY` value.
|
153
|
+
|
154
|
+
## Usage
|
132
155
|
|
133
156
|
This is examples that use workflow file for running common Data Engineering
|
134
157
|
use-case.
|
135
158
|
|
136
|
-
|
159
|
+
> [!IMPORTANT]
|
160
|
+
> I recommend you to use `task` stage for all actions that you want to do with
|
161
|
+
> pipeline object.
|
137
162
|
|
138
|
-
|
163
|
+
### Python & Bash
|
139
164
|
|
140
165
|
```yaml
|
141
166
|
run_py_local:
|
142
|
-
type:
|
167
|
+
type: pipeline.Pipeline
|
143
168
|
params:
|
144
|
-
author-run:
|
145
|
-
|
146
|
-
run-date:
|
147
|
-
type: datetime
|
169
|
+
author-run: str
|
170
|
+
run-date: datetime
|
148
171
|
jobs:
|
149
172
|
first-job:
|
150
173
|
stages:
|
151
|
-
- name: Printing Information
|
174
|
+
- name: "Printing Information"
|
152
175
|
id: define-func
|
153
176
|
run: |
|
154
177
|
x = '${{ params.author-run }}'
|
@@ -157,7 +180,7 @@ run_py_local:
|
|
157
180
|
def echo(name: str):
|
158
181
|
print(f'Hello {name}')
|
159
182
|
|
160
|
-
- name: Run Sequence and use var from Above
|
183
|
+
- name: "Run Sequence and use var from Above"
|
161
184
|
vars:
|
162
185
|
x: ${{ params.author-run }}
|
163
186
|
run: |
|
@@ -165,16 +188,16 @@ run_py_local:
|
|
165
188
|
# Change x value
|
166
189
|
x: int = 1
|
167
190
|
|
168
|
-
- name: Call Function
|
191
|
+
- name: "Call Function"
|
169
192
|
vars:
|
170
193
|
echo: ${{ stages.define-func.outputs.echo }}
|
171
194
|
run: |
|
172
195
|
echo('Caller')
|
173
196
|
second-job:
|
174
197
|
stages:
|
175
|
-
- name: Echo
|
198
|
+
- name: "Echo Bash Script"
|
176
199
|
id: shell-echo
|
177
|
-
|
200
|
+
bash: |
|
178
201
|
echo "Hello World from Shell"
|
179
202
|
```
|
180
203
|
|
@@ -192,24 +215,20 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
|
192
215
|
> Hello World from Shell
|
193
216
|
```
|
194
217
|
|
195
|
-
|
196
|
-
|
197
|
-
### Tasks (Extract & Load)
|
218
|
+
### Hook (Extract & Load)
|
198
219
|
|
199
220
|
```yaml
|
200
221
|
pipe_el_pg_to_lake:
|
201
|
-
type:
|
222
|
+
type: pipeline.Pipeline
|
202
223
|
params:
|
203
|
-
run-date:
|
204
|
-
|
205
|
-
author-email:
|
206
|
-
type: str
|
224
|
+
run-date: datetime
|
225
|
+
author-email: str
|
207
226
|
jobs:
|
208
227
|
extract-load:
|
209
228
|
stages:
|
210
229
|
- name: "Extract Load from Postgres to Lake"
|
211
230
|
id: extract-load
|
212
|
-
|
231
|
+
uses: tasks/postgres-to-delta@polars
|
213
232
|
with:
|
214
233
|
source:
|
215
234
|
conn: conn_postgres_url
|
@@ -221,15 +240,11 @@ pipe_el_pg_to_lake:
|
|
221
240
|
endpoint: "/${{ params.name }}"
|
222
241
|
```
|
223
242
|
|
224
|
-
|
225
|
-
|
226
|
-
### Tasks (Transform)
|
227
|
-
|
228
|
-
> I recommend you to use task for all actions that you want to do.
|
243
|
+
### Hook (Transform)
|
229
244
|
|
230
245
|
```yaml
|
231
|
-
|
232
|
-
type:
|
246
|
+
pipeline_hook_mssql_proc:
|
247
|
+
type: pipeline.Pipeline
|
233
248
|
params:
|
234
249
|
run_date: datetime
|
235
250
|
sp_name: str
|
@@ -240,7 +255,7 @@ pipe_hook_mssql_proc:
|
|
240
255
|
stages:
|
241
256
|
- name: "Transform Data in MS SQL Server"
|
242
257
|
id: transform
|
243
|
-
|
258
|
+
uses: tasks/mssql-proc@odbc
|
244
259
|
with:
|
245
260
|
exec: ${{ params.sp_name }}
|
246
261
|
params:
|
@@ -250,16 +265,30 @@ pipe_hook_mssql_proc:
|
|
250
265
|
target: ${{ params.target_name }}
|
251
266
|
```
|
252
267
|
|
253
|
-
> [!NOTE]
|
254
|
-
> The above parameter use short declarative statement. You can pass a parameter
|
255
|
-
> type to the key of a parameter name.
|
256
|
-
|
257
268
|
## Configuration
|
258
269
|
|
259
|
-
```
|
270
|
+
```bash
|
271
|
+
export WORKFLOW_ROOT_PATH=.
|
272
|
+
export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
|
273
|
+
export WORKFLOW_CORE_PATH_CONF=conf
|
274
|
+
```
|
275
|
+
|
276
|
+
Application config:
|
260
277
|
|
278
|
+
```bash
|
279
|
+
export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
|
280
|
+
export WORKFLOW_APP_INTERVAL=10
|
261
281
|
```
|
262
282
|
|
263
|
-
##
|
283
|
+
## Deployment
|
264
284
|
|
265
|
-
This
|
285
|
+
This package able to run as a application service for receive manual trigger
|
286
|
+
from the master node via RestAPI.
|
287
|
+
|
288
|
+
> [!WARNING]
|
289
|
+
> This feature do not start yet because I still research and find the best tool
|
290
|
+
> to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
|
291
|
+
|
292
|
+
```shell
|
293
|
+
(venv) $ workflow start -p 7070
|
294
|
+
```
|
@@ -1,25 +1,25 @@
|
|
1
|
-
#
|
1
|
+
# Workflow
|
2
2
|
|
3
3
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
4
4
|
[](https://pypi.org/project/ddeutil-workflow/)
|
5
5
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
6
6
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
7
7
|
|
8
|
-
|
9
8
|
**Table of Contents**:
|
10
9
|
|
11
10
|
- [Installation](#installation)
|
12
11
|
- [Getting Started](#getting-started)
|
13
|
-
|
14
|
-
- [
|
15
|
-
- [
|
16
|
-
- [
|
17
|
-
- [Python &
|
18
|
-
- [
|
19
|
-
- [
|
12
|
+
- [Core Features](#core-features)
|
13
|
+
- [On](#on)
|
14
|
+
- [Pipeline](#pipeline)
|
15
|
+
- [Usage](#usage)
|
16
|
+
- [Python & Bash](#python--bash)
|
17
|
+
- [Hook (EL)](#hook-extract--load)
|
18
|
+
- [Hook (T)](#hook-transform)
|
20
19
|
- [Configuration](#configuration)
|
20
|
+
- [Deployment](#deployment)
|
21
21
|
|
22
|
-
This **
|
22
|
+
This **Workflow** objects was created for easy to make a simple metadata
|
23
23
|
driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
|
24
24
|
|
25
25
|
I think we should not create the multiple pipeline per use-case if we able to
|
@@ -42,7 +42,12 @@ pipeline.
|
|
42
42
|
pip install ddeutil-workflow
|
43
43
|
```
|
44
44
|
|
45
|
-
This project need `ddeutil-io
|
45
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
46
|
+
this package with application add-ons, you should add `app` in installation;
|
47
|
+
|
48
|
+
```shell
|
49
|
+
pip install ddeutil-workflow[app]
|
50
|
+
```
|
46
51
|
|
47
52
|
## Getting Started
|
48
53
|
|
@@ -59,38 +64,42 @@ will passing parameters and catching the output for re-use it to next step.
|
|
59
64
|
> dynamic registries instead of main features because it have a lot of maintain
|
60
65
|
> vendor codes and deps. (I do not have time to handle this features)
|
61
66
|
|
62
|
-
|
67
|
+
### On
|
63
68
|
|
64
|
-
|
69
|
+
The **On** is schedule object.
|
65
70
|
|
66
71
|
```yaml
|
67
|
-
|
68
|
-
type:
|
72
|
+
on_every_5_min:
|
73
|
+
type: on.On
|
69
74
|
cron: "*/5 * * * *"
|
70
75
|
```
|
71
76
|
|
72
77
|
```python
|
73
|
-
from ddeutil.workflow.on import
|
78
|
+
from ddeutil.workflow.on import On
|
74
79
|
|
75
|
-
|
76
|
-
assert '*/5 * * * *' == str(
|
80
|
+
schedule = On.from_loader(name='on_every_5_min', externals={})
|
81
|
+
assert '*/5 * * * *' == str(schedule.cronjob)
|
77
82
|
|
78
|
-
|
79
|
-
assert '2022-01-01 00:05:00' f"{
|
80
|
-
assert '2022-01-01 00:10:00' f"{
|
81
|
-
assert '2022-01-01 00:15:00' f"{
|
82
|
-
assert '2022-01-01 00:20:00' f"{
|
83
|
-
assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
|
83
|
+
cron_iter = schedule.generate('2022-01-01 00:00:00')
|
84
|
+
assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
85
|
+
assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
86
|
+
assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
87
|
+
assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
84
88
|
```
|
85
89
|
|
86
|
-
---
|
87
|
-
|
88
90
|
### Pipeline
|
89
91
|
|
92
|
+
The **Pipeline** object that is the core feature of this project.
|
93
|
+
|
90
94
|
```yaml
|
91
95
|
run_py_local:
|
92
96
|
type: ddeutil.workflow.pipeline.Pipeline
|
93
|
-
|
97
|
+
on: 'on_every_5_min'
|
98
|
+
params:
|
99
|
+
author-run:
|
100
|
+
type: str
|
101
|
+
run-date:
|
102
|
+
type: datetime
|
94
103
|
```
|
95
104
|
|
96
105
|
```python
|
@@ -100,27 +109,39 @@ pipe = Pipeline.from_loader(name='run_py_local', externals={})
|
|
100
109
|
pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
101
110
|
```
|
102
111
|
|
103
|
-
|
112
|
+
> [!NOTE]
|
113
|
+
> The above parameter use short declarative statement. You can pass a parameter
|
114
|
+
> type to the key of a parameter name.
|
115
|
+
> ```yaml
|
116
|
+
> params:
|
117
|
+
> author-run: str
|
118
|
+
> run-date: datetime
|
119
|
+
> ```
|
120
|
+
>
|
121
|
+
> And for the type, you can remove `ddeutil.workflow` prefix because we can find
|
122
|
+
> it by looping search from `WORKFLOW_CORE_REGISTRY` value.
|
123
|
+
|
124
|
+
## Usage
|
104
125
|
|
105
126
|
This is examples that use workflow file for running common Data Engineering
|
106
127
|
use-case.
|
107
128
|
|
108
|
-
|
129
|
+
> [!IMPORTANT]
|
130
|
+
> I recommend you to use `task` stage for all actions that you want to do with
|
131
|
+
> pipeline object.
|
109
132
|
|
110
|
-
|
133
|
+
### Python & Bash
|
111
134
|
|
112
135
|
```yaml
|
113
136
|
run_py_local:
|
114
|
-
type:
|
137
|
+
type: pipeline.Pipeline
|
115
138
|
params:
|
116
|
-
author-run:
|
117
|
-
|
118
|
-
run-date:
|
119
|
-
type: datetime
|
139
|
+
author-run: str
|
140
|
+
run-date: datetime
|
120
141
|
jobs:
|
121
142
|
first-job:
|
122
143
|
stages:
|
123
|
-
- name: Printing Information
|
144
|
+
- name: "Printing Information"
|
124
145
|
id: define-func
|
125
146
|
run: |
|
126
147
|
x = '${{ params.author-run }}'
|
@@ -129,7 +150,7 @@ run_py_local:
|
|
129
150
|
def echo(name: str):
|
130
151
|
print(f'Hello {name}')
|
131
152
|
|
132
|
-
- name: Run Sequence and use var from Above
|
153
|
+
- name: "Run Sequence and use var from Above"
|
133
154
|
vars:
|
134
155
|
x: ${{ params.author-run }}
|
135
156
|
run: |
|
@@ -137,16 +158,16 @@ run_py_local:
|
|
137
158
|
# Change x value
|
138
159
|
x: int = 1
|
139
160
|
|
140
|
-
- name: Call Function
|
161
|
+
- name: "Call Function"
|
141
162
|
vars:
|
142
163
|
echo: ${{ stages.define-func.outputs.echo }}
|
143
164
|
run: |
|
144
165
|
echo('Caller')
|
145
166
|
second-job:
|
146
167
|
stages:
|
147
|
-
- name: Echo
|
168
|
+
- name: "Echo Bash Script"
|
148
169
|
id: shell-echo
|
149
|
-
|
170
|
+
bash: |
|
150
171
|
echo "Hello World from Shell"
|
151
172
|
```
|
152
173
|
|
@@ -164,24 +185,20 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
|
164
185
|
> Hello World from Shell
|
165
186
|
```
|
166
187
|
|
167
|
-
|
168
|
-
|
169
|
-
### Tasks (Extract & Load)
|
188
|
+
### Hook (Extract & Load)
|
170
189
|
|
171
190
|
```yaml
|
172
191
|
pipe_el_pg_to_lake:
|
173
|
-
type:
|
192
|
+
type: pipeline.Pipeline
|
174
193
|
params:
|
175
|
-
run-date:
|
176
|
-
|
177
|
-
author-email:
|
178
|
-
type: str
|
194
|
+
run-date: datetime
|
195
|
+
author-email: str
|
179
196
|
jobs:
|
180
197
|
extract-load:
|
181
198
|
stages:
|
182
199
|
- name: "Extract Load from Postgres to Lake"
|
183
200
|
id: extract-load
|
184
|
-
|
201
|
+
uses: tasks/postgres-to-delta@polars
|
185
202
|
with:
|
186
203
|
source:
|
187
204
|
conn: conn_postgres_url
|
@@ -193,15 +210,11 @@ pipe_el_pg_to_lake:
|
|
193
210
|
endpoint: "/${{ params.name }}"
|
194
211
|
```
|
195
212
|
|
196
|
-
|
197
|
-
|
198
|
-
### Tasks (Transform)
|
199
|
-
|
200
|
-
> I recommend you to use task for all actions that you want to do.
|
213
|
+
### Hook (Transform)
|
201
214
|
|
202
215
|
```yaml
|
203
|
-
|
204
|
-
type:
|
216
|
+
pipeline_hook_mssql_proc:
|
217
|
+
type: pipeline.Pipeline
|
205
218
|
params:
|
206
219
|
run_date: datetime
|
207
220
|
sp_name: str
|
@@ -212,7 +225,7 @@ pipe_hook_mssql_proc:
|
|
212
225
|
stages:
|
213
226
|
- name: "Transform Data in MS SQL Server"
|
214
227
|
id: transform
|
215
|
-
|
228
|
+
uses: tasks/mssql-proc@odbc
|
216
229
|
with:
|
217
230
|
exec: ${{ params.sp_name }}
|
218
231
|
params:
|
@@ -222,16 +235,30 @@ pipe_hook_mssql_proc:
|
|
222
235
|
target: ${{ params.target_name }}
|
223
236
|
```
|
224
237
|
|
225
|
-
> [!NOTE]
|
226
|
-
> The above parameter use short declarative statement. You can pass a parameter
|
227
|
-
> type to the key of a parameter name.
|
228
|
-
|
229
238
|
## Configuration
|
230
239
|
|
231
|
-
```
|
240
|
+
```bash
|
241
|
+
export WORKFLOW_ROOT_PATH=.
|
242
|
+
export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
|
243
|
+
export WORKFLOW_CORE_PATH_CONF=conf
|
244
|
+
```
|
245
|
+
|
246
|
+
Application config:
|
232
247
|
|
248
|
+
```bash
|
249
|
+
export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
|
250
|
+
export WORKFLOW_APP_INTERVAL=10
|
233
251
|
```
|
234
252
|
|
235
|
-
##
|
253
|
+
## Deployment
|
236
254
|
|
237
|
-
This
|
255
|
+
This package able to run as a application service for receive manual trigger
|
256
|
+
from the master node via RestAPI.
|
257
|
+
|
258
|
+
> [!WARNING]
|
259
|
+
> This feature do not start yet because I still research and find the best tool
|
260
|
+
> to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
|
261
|
+
|
262
|
+
```shell
|
263
|
+
(venv) $ workflow start -p 7070
|
264
|
+
```
|
@@ -28,10 +28,15 @@ dependencies = [
|
|
28
28
|
"fmtutil",
|
29
29
|
"ddeutil-io",
|
30
30
|
"python-dotenv==1.0.1",
|
31
|
-
"schedule==1.2.2",
|
32
31
|
]
|
33
32
|
dynamic = ["version"]
|
34
33
|
|
34
|
+
[project.optional-dependencies]
|
35
|
+
app = [
|
36
|
+
"fastapi==0.112.0",
|
37
|
+
"apscheduler[sqlalchemy]==3.10.4",
|
38
|
+
]
|
39
|
+
|
35
40
|
[project.urls]
|
36
41
|
Homepage = "https://github.com/ddeutils/ddeutil-workflow/"
|
37
42
|
"Source Code" = "https://github.com/ddeutils/ddeutil-workflow/"
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.6"
|
@@ -3,4 +3,7 @@
|
|
3
3
|
# Licensed under the MIT License. See LICENSE in the project root for
|
4
4
|
# license information.
|
5
5
|
# ------------------------------------------------------------------------------
|
6
|
-
from .
|
6
|
+
from .exceptions import StageException
|
7
|
+
from .on import On
|
8
|
+
from .pipeline import Pipeline
|
9
|
+
from .stage import Stage
|
@@ -1,8 +1,10 @@
|
|
1
|
-
#
|
1
|
+
# ------------------------------------------------------------------------------
|
2
2
|
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
3
|
# Licensed under the MIT License. See LICENSE in the project root for
|
4
4
|
# license information.
|
5
|
-
#
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
6
8
|
import re
|
7
9
|
from re import (
|
8
10
|
IGNORECASE,
|
@@ -11,9 +13,17 @@ from re import (
|
|
11
13
|
VERBOSE,
|
12
14
|
Pattern,
|
13
15
|
)
|
16
|
+
from typing import Any, Union
|
17
|
+
|
18
|
+
TupleStr = tuple[str, ...]
|
19
|
+
DictData = dict[str, Any]
|
20
|
+
DictStr = dict[str, str]
|
21
|
+
Matrix = dict[str, Union[list[str], list[int]]]
|
22
|
+
MatrixInclude = list[dict[str, Union[str, int]]]
|
23
|
+
MatrixExclude = list[dict[str, Union[str, int]]]
|
14
24
|
|
15
25
|
|
16
|
-
class
|
26
|
+
class Re:
|
17
27
|
"""Regular expression config."""
|
18
28
|
|
19
29
|
# NOTE: Search caller
|
@@ -9,4 +9,16 @@ Define Errors Object for Node package
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
11
|
|
12
|
-
class
|
12
|
+
class WorkflowException(Exception): ...
|
13
|
+
|
14
|
+
|
15
|
+
class UtilException(WorkflowException): ...
|
16
|
+
|
17
|
+
|
18
|
+
class StageException(WorkflowException): ...
|
19
|
+
|
20
|
+
|
21
|
+
class JobException(WorkflowException): ...
|
22
|
+
|
23
|
+
|
24
|
+
class PipelineException(WorkflowException): ...
|