ddeutil-workflow 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +26 -4
- ddeutil/workflow/__types.py +11 -1
- ddeutil/workflow/api.py +120 -0
- ddeutil/workflow/app.py +45 -0
- ddeutil/workflow/exceptions.py +3 -3
- ddeutil/workflow/log.py +79 -0
- ddeutil/workflow/pipeline.py +516 -120
- ddeutil/workflow/repeat.py +134 -0
- ddeutil/workflow/route.py +78 -0
- ddeutil/workflow/stage.py +209 -86
- ddeutil/workflow/utils.py +368 -66
- {ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/METADATA +48 -76
- ddeutil_workflow-0.0.8.dist-info/RECORD +20 -0
- {ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.6.dist-info/RECORD +0 -15
- {ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: Data Developer & Engineer Workflow Utility Objects
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -24,9 +24,12 @@ License-File: LICENSE
|
|
24
24
|
Requires-Dist: fmtutil
|
25
25
|
Requires-Dist: ddeutil-io
|
26
26
|
Requires-Dist: python-dotenv ==1.0.1
|
27
|
+
Provides-Extra: api
|
28
|
+
Requires-Dist: fastapi[standard] ==0.112.0 ; extra == 'api'
|
29
|
+
Requires-Dist: apscheduler[sqlalchemy] <4.0.0,==3.10.4 ; extra == 'api'
|
30
|
+
Requires-Dist: croniter ==3.0.3 ; extra == 'api'
|
27
31
|
Provides-Extra: app
|
28
|
-
Requires-Dist:
|
29
|
-
Requires-Dist: apscheduler[sqlalchemy] ==3.10.4 ; extra == 'app'
|
32
|
+
Requires-Dist: schedule <2.0.0,==1.2.2 ; extra == 'app'
|
30
33
|
|
31
34
|
# Workflow
|
32
35
|
|
@@ -39,7 +42,6 @@ Requires-Dist: apscheduler[sqlalchemy] ==3.10.4 ; extra == 'app'
|
|
39
42
|
|
40
43
|
- [Installation](#installation)
|
41
44
|
- [Getting Started](#getting-started)
|
42
|
-
- [Core Features](#core-features)
|
43
45
|
- [On](#on)
|
44
46
|
- [Pipeline](#pipeline)
|
45
47
|
- [Usage](#usage)
|
@@ -50,12 +52,14 @@ Requires-Dist: apscheduler[sqlalchemy] ==3.10.4 ; extra == 'app'
|
|
50
52
|
- [Deployment](#deployment)
|
51
53
|
|
52
54
|
This **Workflow** objects was created for easy to make a simple metadata
|
53
|
-
driven pipeline that able to **ETL, T, EL, or
|
55
|
+
driven for data pipeline orchestration that able to use for **ETL, T, EL, or
|
56
|
+
ELT** by a `.yaml` file template.
|
54
57
|
|
55
|
-
I think
|
56
|
-
write
|
57
|
-
|
58
|
-
|
58
|
+
In my opinion, I think it should not create duplicate pipeline codes if I can
|
59
|
+
write with dynamic input parameters on the one template pipeline that just change
|
60
|
+
the input parameters per use-case instead.
|
61
|
+
This way I can handle a lot of logical pipelines in our orgs with only metadata
|
62
|
+
configuration. It called **Metadata Driven Data Pipeline**.
|
59
63
|
|
60
64
|
Next, we should get some monitoring tools for manage logging that return from
|
61
65
|
pipeline running. Because it not show us what is a use-case that running data
|
@@ -75,9 +79,10 @@ pip install ddeutil-workflow
|
|
75
79
|
This project need `ddeutil-io` extension namespace packages. If you want to install
|
76
80
|
this package with application add-ons, you should add `app` in installation;
|
77
81
|
|
78
|
-
|
79
|
-
|
80
|
-
|
82
|
+
| Usecase | Install Optional |
|
83
|
+
|--------------------|---------------------------|
|
84
|
+
| Scheduler Service | `ddeutil-workflow[app]` |
|
85
|
+
| FastAPI Server | `ddeutil-workflow[api]` |
|
81
86
|
|
82
87
|
## Getting Started
|
83
88
|
|
@@ -160,11 +165,12 @@ use-case.
|
|
160
165
|
> I recommend you to use `task` stage for all actions that you want to do with
|
161
166
|
> pipeline object.
|
162
167
|
|
163
|
-
### Python & Bash
|
164
|
-
|
165
168
|
```yaml
|
166
169
|
run_py_local:
|
167
170
|
type: pipeline.Pipeline
|
171
|
+
on:
|
172
|
+
- cronjob: '* * * * *'
|
173
|
+
timezone: "Asia/Bangkok"
|
168
174
|
params:
|
169
175
|
author-run: str
|
170
176
|
run-date: datetime
|
@@ -174,12 +180,11 @@ run_py_local:
|
|
174
180
|
- name: "Printing Information"
|
175
181
|
id: define-func
|
176
182
|
run: |
|
177
|
-
x = '${{ params.
|
178
|
-
print(f'Hello {x}')
|
183
|
+
x = '${{ params.run-date | fmt("%Y%m%d") }}'
|
184
|
+
print(f'Hello at {x}')
|
179
185
|
|
180
186
|
def echo(name: str):
|
181
187
|
print(f'Hello {name}')
|
182
|
-
|
183
188
|
- name: "Run Sequence and use var from Above"
|
184
189
|
vars:
|
185
190
|
x: ${{ params.author-run }}
|
@@ -187,7 +192,6 @@ run_py_local:
|
|
187
192
|
print(f'Receive x from above with {x}')
|
188
193
|
# Change x value
|
189
194
|
x: int = 1
|
190
|
-
|
191
195
|
- name: "Call Function"
|
192
196
|
vars:
|
193
197
|
echo: ${{ stages.define-func.outputs.echo }}
|
@@ -202,75 +206,34 @@ run_py_local:
|
|
202
206
|
```
|
203
207
|
|
204
208
|
```python
|
209
|
+
from datetime import datetime
|
205
210
|
from ddeutil.workflow.pipeline import Pipeline
|
206
211
|
|
207
|
-
pipe = Pipeline.from_loader(name='run_py_local', externals={})
|
208
|
-
pipe.execute(params={
|
212
|
+
pipe: Pipeline = Pipeline.from_loader(name='run_py_local', externals={})
|
213
|
+
pipe.execute(params={
|
214
|
+
'author-run': 'Local Workflow',
|
215
|
+
'run-date': datetime(2024, 1, 1),
|
216
|
+
})
|
209
217
|
```
|
210
218
|
|
211
219
|
```shell
|
212
|
-
> Hello
|
220
|
+
> Hello at 20240101
|
213
221
|
> Receive x from above with Local Workflow
|
214
222
|
> Hello Caller
|
215
223
|
> Hello World from Shell
|
216
224
|
```
|
217
225
|
|
218
|
-
### Hook (Extract & Load)
|
219
|
-
|
220
|
-
```yaml
|
221
|
-
pipe_el_pg_to_lake:
|
222
|
-
type: pipeline.Pipeline
|
223
|
-
params:
|
224
|
-
run-date: datetime
|
225
|
-
author-email: str
|
226
|
-
jobs:
|
227
|
-
extract-load:
|
228
|
-
stages:
|
229
|
-
- name: "Extract Load from Postgres to Lake"
|
230
|
-
id: extract-load
|
231
|
-
uses: tasks/postgres-to-delta@polars
|
232
|
-
with:
|
233
|
-
source:
|
234
|
-
conn: conn_postgres_url
|
235
|
-
query: |
|
236
|
-
select * from ${{ params.name }}
|
237
|
-
where update_date = '${{ params.datetime }}'
|
238
|
-
sink:
|
239
|
-
conn: conn_az_lake
|
240
|
-
endpoint: "/${{ params.name }}"
|
241
|
-
```
|
242
|
-
|
243
|
-
### Hook (Transform)
|
244
|
-
|
245
|
-
```yaml
|
246
|
-
pipeline_hook_mssql_proc:
|
247
|
-
type: pipeline.Pipeline
|
248
|
-
params:
|
249
|
-
run_date: datetime
|
250
|
-
sp_name: str
|
251
|
-
source_name: str
|
252
|
-
target_name: str
|
253
|
-
jobs:
|
254
|
-
transform:
|
255
|
-
stages:
|
256
|
-
- name: "Transform Data in MS SQL Server"
|
257
|
-
id: transform
|
258
|
-
uses: tasks/mssql-proc@odbc
|
259
|
-
with:
|
260
|
-
exec: ${{ params.sp_name }}
|
261
|
-
params:
|
262
|
-
run_mode: "T"
|
263
|
-
run_date: ${{ params.run_date }}
|
264
|
-
source: ${{ params.source_name }}
|
265
|
-
target: ${{ params.target_name }}
|
266
|
-
```
|
267
|
-
|
268
226
|
## Configuration
|
269
227
|
|
270
228
|
```bash
|
271
229
|
export WORKFLOW_ROOT_PATH=.
|
272
230
|
export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
|
231
|
+
export WORKFLOW_CORE_REGISTRY_FILTER=ddeutil.workflow.utils
|
273
232
|
export WORKFLOW_CORE_PATH_CONF=conf
|
233
|
+
export WORKFLOW_CORE_TIMEZONE=Asia/Bangkok
|
234
|
+
export WORKFLOW_CORE_DEFAULT_STAGE_ID=true
|
235
|
+
export WORKFLOW_CORE_MAX_PIPELINE_POKING=4
|
236
|
+
export WORKFLOW_CORE_MAX_JOB_PARALLEL=2
|
274
237
|
```
|
275
238
|
|
276
239
|
Application config:
|
@@ -283,12 +246,21 @@ export WORKFLOW_APP_INTERVAL=10
|
|
283
246
|
## Deployment
|
284
247
|
|
285
248
|
This package able to run as a application service for receive manual trigger
|
286
|
-
from the master node via RestAPI
|
249
|
+
from the master node via RestAPI or use to be Scheduler background service
|
250
|
+
like crontab job but via Python API.
|
287
251
|
|
288
|
-
|
289
|
-
> This feature do not start yet because I still research and find the best tool
|
290
|
-
> to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
|
252
|
+
### Schedule Service
|
291
253
|
|
292
254
|
```shell
|
293
|
-
(venv) $ workflow
|
255
|
+
(venv) $ python src.ddeutil.workflow.app
|
294
256
|
```
|
257
|
+
|
258
|
+
### API Server
|
259
|
+
|
260
|
+
```shell
|
261
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
|
262
|
+
```
|
263
|
+
|
264
|
+
> [!NOTE]
|
265
|
+
> If this package already deploy, it able to use
|
266
|
+
> `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
|
@@ -0,0 +1,20 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=FA15NQYpQvn7SrHupxQQQ9Ad5ZzEXOvwDS5UyB1h1bo,27
|
2
|
+
ddeutil/workflow/__init__.py,sha256=4PEL3RdHmUowK0Dz-tK7fO0wvFX4u9CLd0Up7b3lrAQ,760
|
3
|
+
ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
|
4
|
+
ddeutil/workflow/api.py,sha256=d2Mmv9jTtN3FITIy-2mivyAKdBOGZxtkNWRMPbCLlFI,3341
|
5
|
+
ddeutil/workflow/app.py,sha256=BuYhOoSJCHiSoj3xb2I5QoxaHrD3bKdmoJua3bKBetc,1165
|
6
|
+
ddeutil/workflow/exceptions.py,sha256=zuCcsfJ1hFivubXz6lXCpGYXk07d_PkRaUD5ew3_LC0,632
|
7
|
+
ddeutil/workflow/loader.py,sha256=_ZD-XP5P7VbUeqItrUVPaKIZu6dMUZ2aywbCbReW1hQ,2778
|
8
|
+
ddeutil/workflow/log.py,sha256=N2TyjcuAoH0YTvzJCHTO037IHgVkLA986Xhtz1LSgE4,1742
|
9
|
+
ddeutil/workflow/on.py,sha256=YoEqDbzJUwqOA3JRltbvlYr0rNTtxdmb7cWMxl8U19k,6717
|
10
|
+
ddeutil/workflow/pipeline.py,sha256=VC6VDxycUdGKn13V42RZxAlCFySYb2HIZGq_ku5Kp5k,30844
|
11
|
+
ddeutil/workflow/repeat.py,sha256=sNoRfbOR4cYm_edrSvlVy9N8Dk_osLIq9FC5GMZz32M,4621
|
12
|
+
ddeutil/workflow/route.py,sha256=Ck_O1xJwI-vKkMJr37El0-1PGKlwKF8__DDNWVQrf0A,2079
|
13
|
+
ddeutil/workflow/scheduler.py,sha256=FqmkvWCqwJ4eRf8aDn5Ce4FcNWqmcvu2aTTfL34lfgs,22184
|
14
|
+
ddeutil/workflow/stage.py,sha256=tbxENx_-2BQ6peXKM_s6RQ1oGzTlXcZ4yDpP1Hufkdk,18095
|
15
|
+
ddeutil/workflow/utils.py,sha256=seyU81JXfb2zz6QbJvVEb2Wn4qt8f-FBA6QFC97xY5k,21240
|
16
|
+
ddeutil_workflow-0.0.8.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
17
|
+
ddeutil_workflow-0.0.8.dist-info/METADATA,sha256=9i7Jk3CZlBpNkmFFjD247opgYA6Mc8AT6CtZjcvamYI,8314
|
18
|
+
ddeutil_workflow-0.0.8.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
19
|
+
ddeutil_workflow-0.0.8.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
20
|
+
ddeutil_workflow-0.0.8.dist-info/RECORD,,
|
@@ -1,15 +0,0 @@
|
|
1
|
-
ddeutil/workflow/__about__.py,sha256=VEYa91VchyTUnF57lvvquHvitTViBxxLXuhcEnr4TAY,27
|
2
|
-
ddeutil/workflow/__init__.py,sha256=Y5wLiJ0zS1CfoSOZ0oo7OL3LNMKvPmpUO4fVHuAOv8E,429
|
3
|
-
ddeutil/workflow/__types.py,sha256=PfwDZBnTwe2JImD7UFS0J6Nq-1TcjBGebOVzJZoSuTQ,1354
|
4
|
-
ddeutil/workflow/exceptions.py,sha256=9O12c4aNLi0dyjVBgCLveV3HN9PXcZfwFfLXdgm3Ffs,626
|
5
|
-
ddeutil/workflow/loader.py,sha256=_ZD-XP5P7VbUeqItrUVPaKIZu6dMUZ2aywbCbReW1hQ,2778
|
6
|
-
ddeutil/workflow/on.py,sha256=YoEqDbzJUwqOA3JRltbvlYr0rNTtxdmb7cWMxl8U19k,6717
|
7
|
-
ddeutil/workflow/pipeline.py,sha256=8mIvY34_fsiqscBa9JB94MgN3Km5fkuD2iaNZSAQVuM,17843
|
8
|
-
ddeutil/workflow/scheduler.py,sha256=FqmkvWCqwJ4eRf8aDn5Ce4FcNWqmcvu2aTTfL34lfgs,22184
|
9
|
-
ddeutil/workflow/stage.py,sha256=bDJiGS21gYlYbFDnLTKH9aIbXfej9fT-V1ADoPX7w4s,13829
|
10
|
-
ddeutil/workflow/utils.py,sha256=xapKxxnqIzlbKA45GaRcWn-VL30AhE7M8f46ynr-vbI,11173
|
11
|
-
ddeutil_workflow-0.0.6.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
12
|
-
ddeutil_workflow-0.0.6.dist-info/METADATA,sha256=5X6ewXGn96MR9rDhVmmaoTuIwuRmfi72t7ezM8wxbvw,8612
|
13
|
-
ddeutil_workflow-0.0.6.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
14
|
-
ddeutil_workflow-0.0.6.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
15
|
-
ddeutil_workflow-0.0.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|