ddeutil-workflow 0.0.27__tar.gz → 0.0.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil_workflow-0.0.28/PKG-INFO +284 -0
- ddeutil_workflow-0.0.28/README.md +252 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/pyproject.toml +9 -7
- ddeutil_workflow-0.0.28/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/api/api.py +1 -53
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/conf.py +15 -11
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/cron.py +7 -7
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/exceptions.py +1 -1
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/hook.py +17 -2
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/job.py +17 -16
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/params.py +3 -3
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/result.py +3 -3
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/scheduler.py +9 -9
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/stage.py +84 -115
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/templates.py +10 -8
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/utils.py +20 -13
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/workflow.py +20 -20
- ddeutil_workflow-0.0.28/src/ddeutil_workflow.egg-info/PKG-INFO +284 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil_workflow.egg-info/requires.txt +2 -2
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_stage.py +6 -4
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_stage_exec_bash.py +3 -3
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_stage_exec_hook.py +5 -5
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_stage_exec_py.py +6 -4
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_stage_exec_trigger.py +1 -1
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_utils.py +15 -9
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_exec_hook.py +10 -10
- ddeutil_workflow-0.0.27/PKG-INFO +0 -230
- ddeutil_workflow-0.0.27/README.md +0 -198
- ddeutil_workflow-0.0.27/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.27/src/ddeutil_workflow.egg-info/PKG-INFO +0 -230
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/LICENSE +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/__cron.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/__init__.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/__types.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/api/__init__.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/api/repeat.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil/workflow/api/route.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil_workflow.egg-info/SOURCES.txt +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test__cron.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test__regex.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_conf.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_conf_log.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_cron_on.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_hook_tag.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_job.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_job_exec_py.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_job_exec_strategy.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_job_strategy.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_params.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_result.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_schedule.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_schedule_control.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_schedule_tasks.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_templates.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_templates_filter.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_exec.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_exec_needs.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_job_exec.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_poke.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_release.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_release_and_queue.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_schedule.py +0 -0
- {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.28}/tests/test_workflow_task.py +0 -0
@@ -0,0 +1,284 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: ddeutil-workflow
|
3
|
+
Version: 0.0.28
|
4
|
+
Summary: Lightweight workflow orchestration
|
5
|
+
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
|
8
|
+
Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
|
9
|
+
Keywords: orchestration,workflow
|
10
|
+
Classifier: Topic :: Utilities
|
11
|
+
Classifier: Natural Language :: English
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Classifier: Programming Language :: Python
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
22
|
+
Requires-Python: >=3.9.13
|
23
|
+
Description-Content-Type: text/markdown
|
24
|
+
License-File: LICENSE
|
25
|
+
Requires-Dist: ddeutil==0.4.6
|
26
|
+
Requires-Dist: ddeutil-io[toml,yaml]>=0.2.3
|
27
|
+
Requires-Dist: pydantic==2.10.6
|
28
|
+
Requires-Dist: python-dotenv==1.0.1
|
29
|
+
Requires-Dist: schedule<2.0.0,==1.2.2
|
30
|
+
Provides-Extra: api
|
31
|
+
Requires-Dist: fastapi<1.0.0,>=0.115.0; extra == "api"
|
32
|
+
|
33
|
+
# Workflow Orchestration
|
34
|
+
|
35
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
36
|
+
[](https://codecov.io/gh/ddeutils/ddeutil-workflow)
|
37
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
38
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
39
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
40
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
41
|
+
[](https://github.com/psf/black)
|
42
|
+
|
43
|
+
The **Lightweight Workflow Orchestration** with fewer dependencies the was created
|
44
|
+
for easy to make a simple metadata driven data workflow. It can use for data operator
|
45
|
+
by a `.yaml` template.
|
46
|
+
|
47
|
+
> [!WARNING]
|
48
|
+
> This package provide only orchestration workload. That mean you should not
|
49
|
+
> use the workflow stage to process any large volume data which use a lot of compute
|
50
|
+
> resource :cold_sweat:.
|
51
|
+
|
52
|
+
In my opinion, I think it should not create duplicate workflow codes if I can
|
53
|
+
write with dynamic input parameters on the one template workflow that just change
|
54
|
+
the input parameters per use-case instead.
|
55
|
+
This way I can handle a lot of logical workflows in our orgs with only metadata
|
56
|
+
configuration. It called **Metadata Driven Data Workflow**.
|
57
|
+
|
58
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
59
|
+
|
60
|
+
1. The Minimum frequency unit of scheduling is **1 minute** :warning:
|
61
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
62
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
63
|
+
(Python 3.13 unlock GIL :unlock:)
|
64
|
+
|
65
|
+
**:memo: <u>Workflow Diagrams</u>**:
|
66
|
+
|
67
|
+
```mermaid
|
68
|
+
flowchart LR
|
69
|
+
subgraph Interface
|
70
|
+
A((User))
|
71
|
+
subgraph Docker Container
|
72
|
+
G@{ shape: rounded, label: "Observe<br>Application" }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
A --->|action| B(Workflow<br>Application)
|
77
|
+
B ---> |response| A
|
78
|
+
B -..-> |response| G
|
79
|
+
G -..-> |request| B
|
80
|
+
|
81
|
+
subgraph Docker Container
|
82
|
+
B
|
83
|
+
end
|
84
|
+
|
85
|
+
subgraph Data Context
|
86
|
+
D@{ shape: processes, label: "Logs" }
|
87
|
+
E@{ shape: lin-cyl, label: "Metadata" }
|
88
|
+
end
|
89
|
+
|
90
|
+
subgraph Git Context
|
91
|
+
F@{ shape: tag-rect, label: "YAML<br>files" }
|
92
|
+
end
|
93
|
+
|
94
|
+
B --->|disable| F
|
95
|
+
F --->|read| B
|
96
|
+
|
97
|
+
B --->|write| E
|
98
|
+
E --->|read| B
|
99
|
+
B --->|write| D
|
100
|
+
|
101
|
+
D -.->|read| G
|
102
|
+
E -.->|read| G
|
103
|
+
```
|
104
|
+
|
105
|
+
> [!NOTE]
|
106
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
107
|
+
> with `.yml` files and all configs file from several data orchestration framework
|
108
|
+
> tools from my experience on Data Engineer. :grimacing:
|
109
|
+
>
|
110
|
+
> Other workflow tools that I interest on them and pick some interested feature
|
111
|
+
> implement to this package:
|
112
|
+
>
|
113
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
114
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
115
|
+
|
116
|
+
## :round_pushpin: Installation
|
117
|
+
|
118
|
+
This project need `ddeutil` and `ddeutil-io` extension namespace packages.
|
119
|
+
If you want to install this package with application add-ons, you should add
|
120
|
+
`app` in installation;
|
121
|
+
|
122
|
+
| Use-case | Install Optional | Support |
|
123
|
+
|----------------|--------------------------|--------------------|
|
124
|
+
| Python | `ddeutil-workflow` | :heavy_check_mark: |
|
125
|
+
| FastAPI Server | `ddeutil-workflow[api]` | :heavy_check_mark: |
|
126
|
+
|
127
|
+
## :beers: Usage
|
128
|
+
|
129
|
+
This is examples that use workflow file for running common Data Engineering
|
130
|
+
use-case.
|
131
|
+
|
132
|
+
> [!IMPORTANT]
|
133
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
134
|
+
> with workflow activity that you want to orchestrate. Because it is able to
|
135
|
+
> dynamic an input argument with the same hook function that make you use less
|
136
|
+
> time to maintenance your data workflows.
|
137
|
+
|
138
|
+
```yaml
|
139
|
+
run-py-local:
|
140
|
+
|
141
|
+
# Validate model that use to parsing exists for template file
|
142
|
+
type: Workflow
|
143
|
+
on:
|
144
|
+
# If workflow deploy to schedule, it will run every 5 minutes
|
145
|
+
# with Asia/Bangkok timezone.
|
146
|
+
- cronjob: '*/5 * * * *'
|
147
|
+
timezone: "Asia/Bangkok"
|
148
|
+
params:
|
149
|
+
# Incoming execution parameters will validate with this type. It allows
|
150
|
+
# to set default value or templating.
|
151
|
+
source-extract: str
|
152
|
+
run-date: datetime
|
153
|
+
jobs:
|
154
|
+
getting-api-data:
|
155
|
+
stages:
|
156
|
+
- name: "Retrieve API Data"
|
157
|
+
id: retrieve-api
|
158
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
159
|
+
with:
|
160
|
+
# Arguments of source data that want to retrieve.
|
161
|
+
method: post
|
162
|
+
url: https://finances/open-data/currency-pairs/
|
163
|
+
body:
|
164
|
+
resource: ${{ params.source-extract }}
|
165
|
+
|
166
|
+
# You can use filtering like Jinja template but this
|
167
|
+
# package does not use it.
|
168
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
169
|
+
auth:
|
170
|
+
type: bearer
|
171
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
172
|
+
|
173
|
+
# Arguments of target data that want to land.
|
174
|
+
writing_mode: flatten
|
175
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
176
|
+
|
177
|
+
# This Authentication code should implement with your custom hook
|
178
|
+
# function. The template allow you to use environment variable.
|
179
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
180
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
181
|
+
```
|
182
|
+
|
183
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
184
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
185
|
+
execution time such as `run-date` should change base on that workflow running date.
|
186
|
+
|
187
|
+
So, this package provide the `Schedule` template for this action.
|
188
|
+
|
189
|
+
```yaml
|
190
|
+
schedule-run-local-wf:
|
191
|
+
|
192
|
+
# Validate model that use to parsing exists for template file
|
193
|
+
type: Schedule
|
194
|
+
workflows:
|
195
|
+
|
196
|
+
# Map existing workflow that want to deploy with scheduler application.
|
197
|
+
# It allows you to pass release parameter that dynamic change depend on the
|
198
|
+
# current context of this scheduler application releasing that time.
|
199
|
+
- name: run-py-local
|
200
|
+
params:
|
201
|
+
source-extract: "USD-THB"
|
202
|
+
asat-dt: "${{ release.logical_date }}"
|
203
|
+
```
|
204
|
+
|
205
|
+
## :cookie: Configuration
|
206
|
+
|
207
|
+
The main configuration that use to dynamic changing with your objective of this
|
208
|
+
application. If any configuration values do not set yet, it will use default value
|
209
|
+
and do not raise any error to you.
|
210
|
+
|
211
|
+
> [!IMPORTANT]
|
212
|
+
> The config value that you will set on the environment should combine with
|
213
|
+
> prefix, component, and name which is `WORKFLOW_{component}_{name}` (Upper case).
|
214
|
+
|
215
|
+
| Name | Component | Default | Description |
|
216
|
+
|:-----------------------------|:---------:|:----------------------------------|:-------------------------------------------------------------------------------------------------------------------|
|
217
|
+
| **ROOT_PATH** | Core | `.` | The root path of the workflow application. |
|
218
|
+
| **REGISTRY** | Core | `src` | List of importable string for the hook stage. |
|
219
|
+
| **REGISTRY_FILTER** | Core | `ddeutil.workflow.utils` | List of importable string for the filter template. |
|
220
|
+
| **CONF_PATH** | Core | `conf` | The config path that keep all template `.yaml` files. |
|
221
|
+
| **TIMEZONE** | Core | `Asia/Bangkok` | A Timezone string value that will pass to `ZoneInfo` object. |
|
222
|
+
| **STAGE_DEFAULT_ID** | Core | `true` | A flag that enable default stage ID that use for catch an execution output. |
|
223
|
+
| **STAGE_RAISE_ERROR** | Core | `false` | A flag that all stage raise StageException from stage execution. |
|
224
|
+
| **JOB_DEFAULT_ID** | Core | `false` | A flag that enable default job ID that use for catch an execution output. The ID that use will be sequence number. |
|
225
|
+
| **JOB_RAISE_ERROR** | Core | `true` | A flag that all job raise JobException from job strategy execution. |
|
226
|
+
| **MAX_NUM_POKING** | Core | `4` | . |
|
227
|
+
| **MAX_JOB_PARALLEL** | Core | `2` | The maximum job number that able to run parallel in workflow executor. |
|
228
|
+
| **MAX_JOB_EXEC_TIMEOUT** | Core | `600` | |
|
229
|
+
| **MAX_CRON_PER_WORKFLOW** | Core | `5` | |
|
230
|
+
| **MAX_QUEUE_COMPLETE_HIST** | Core | `16` | |
|
231
|
+
| **GENERATE_ID_SIMPLE_MODE** | Core | `true` | A flog that enable generating ID with `md5` algorithm. |
|
232
|
+
| **PATH** | Log | `./logs` | The log path of the workflow saving log. |
|
233
|
+
| **DEBUG_MODE** | Log | `true` | A flag that enable logging with debug level mode. |
|
234
|
+
| **ENABLE_WRITE** | Log | `true` | A flag that enable logging object saving log to its destination. |
|
235
|
+
| **MAX_PROCESS** | App | `2` | The maximum process worker number that run in scheduler app module. |
|
236
|
+
| **MAX_SCHEDULE_PER_PROCESS** | App | `100` | A schedule per process that run parallel. |
|
237
|
+
| **STOP_BOUNDARY_DELTA** | App | `'{"minutes": 5, "seconds": 20}'` | A time delta value that use to stop scheduler app in json string format. |
|
238
|
+
|
239
|
+
**API Application**:
|
240
|
+
|
241
|
+
| Environment | Component | Default | Description |
|
242
|
+
|:---------------------------|:-----------:|---------|------------------------------------------------------------------------------------|
|
243
|
+
| **ENABLE_ROUTE_WORKFLOW** | API | `true` | A flag that enable workflow route to manage execute manually and workflow logging. |
|
244
|
+
| **ENABLE_ROUTE_SCHEDULE** | API | `true` | A flag that enable run scheduler. |
|
245
|
+
|
246
|
+
## :rocket: Deployment
|
247
|
+
|
248
|
+
This package able to run as an application service for receive manual trigger
|
249
|
+
from the master node via RestAPI or use to be Scheduler background service
|
250
|
+
like crontab job but via Python API.
|
251
|
+
|
252
|
+
### API Server
|
253
|
+
|
254
|
+
```shell
|
255
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app \
|
256
|
+
--host 127.0.0.1 \
|
257
|
+
--port 80 \
|
258
|
+
--no-access-log
|
259
|
+
```
|
260
|
+
|
261
|
+
> [!NOTE]
|
262
|
+
> If this package already deploy, it is able to use multiprocess;
|
263
|
+
> `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
|
264
|
+
|
265
|
+
### Docker Container
|
266
|
+
|
267
|
+
Create Docker image;
|
268
|
+
|
269
|
+
```shell
|
270
|
+
$ docker build -t ddeutil-workflow:latest -f .container/Dockerfile .
|
271
|
+
```
|
272
|
+
|
273
|
+
Run the above Docker image;
|
274
|
+
|
275
|
+
```shell
|
276
|
+
$ docker run -i ddeutil-workflow:latest
|
277
|
+
```
|
278
|
+
|
279
|
+
## :speech_balloon: Contribute
|
280
|
+
|
281
|
+
I do not think this project will go around the world because it has specific propose,
|
282
|
+
and you can create by your coding without this project dependency for long term
|
283
|
+
solution. So, on this time, you can open [the GitHub issue on this project :raised_hands:](https://github.com/ddeutils/ddeutil-workflow/issues)
|
284
|
+
for fix bug or request new feature if you want it.
|
@@ -0,0 +1,252 @@
|
|
1
|
+
# Workflow Orchestration
|
2
|
+
|
3
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
4
|
+
[](https://codecov.io/gh/ddeutils/ddeutil-workflow)
|
5
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
6
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
7
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
8
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
9
|
+
[](https://github.com/psf/black)
|
10
|
+
|
11
|
+
The **Lightweight Workflow Orchestration** with fewer dependencies the was created
|
12
|
+
for easy to make a simple metadata driven data workflow. It can use for data operator
|
13
|
+
by a `.yaml` template.
|
14
|
+
|
15
|
+
> [!WARNING]
|
16
|
+
> This package provide only orchestration workload. That mean you should not
|
17
|
+
> use the workflow stage to process any large volume data which use a lot of compute
|
18
|
+
> resource :cold_sweat:.
|
19
|
+
|
20
|
+
In my opinion, I think it should not create duplicate workflow codes if I can
|
21
|
+
write with dynamic input parameters on the one template workflow that just change
|
22
|
+
the input parameters per use-case instead.
|
23
|
+
This way I can handle a lot of logical workflows in our orgs with only metadata
|
24
|
+
configuration. It called **Metadata Driven Data Workflow**.
|
25
|
+
|
26
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
27
|
+
|
28
|
+
1. The Minimum frequency unit of scheduling is **1 minute** :warning:
|
29
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
30
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
31
|
+
(Python 3.13 unlock GIL :unlock:)
|
32
|
+
|
33
|
+
**:memo: <u>Workflow Diagrams</u>**:
|
34
|
+
|
35
|
+
```mermaid
|
36
|
+
flowchart LR
|
37
|
+
subgraph Interface
|
38
|
+
A((User))
|
39
|
+
subgraph Docker Container
|
40
|
+
G@{ shape: rounded, label: "Observe<br>Application" }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
A --->|action| B(Workflow<br>Application)
|
45
|
+
B ---> |response| A
|
46
|
+
B -..-> |response| G
|
47
|
+
G -..-> |request| B
|
48
|
+
|
49
|
+
subgraph Docker Container
|
50
|
+
B
|
51
|
+
end
|
52
|
+
|
53
|
+
subgraph Data Context
|
54
|
+
D@{ shape: processes, label: "Logs" }
|
55
|
+
E@{ shape: lin-cyl, label: "Metadata" }
|
56
|
+
end
|
57
|
+
|
58
|
+
subgraph Git Context
|
59
|
+
F@{ shape: tag-rect, label: "YAML<br>files" }
|
60
|
+
end
|
61
|
+
|
62
|
+
B --->|disable| F
|
63
|
+
F --->|read| B
|
64
|
+
|
65
|
+
B --->|write| E
|
66
|
+
E --->|read| B
|
67
|
+
B --->|write| D
|
68
|
+
|
69
|
+
D -.->|read| G
|
70
|
+
E -.->|read| G
|
71
|
+
```
|
72
|
+
|
73
|
+
> [!NOTE]
|
74
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
75
|
+
> with `.yml` files and all configs file from several data orchestration framework
|
76
|
+
> tools from my experience on Data Engineer. :grimacing:
|
77
|
+
>
|
78
|
+
> Other workflow tools that I interest on them and pick some interested feature
|
79
|
+
> implement to this package:
|
80
|
+
>
|
81
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
82
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
83
|
+
|
84
|
+
## :round_pushpin: Installation
|
85
|
+
|
86
|
+
This project need `ddeutil` and `ddeutil-io` extension namespace packages.
|
87
|
+
If you want to install this package with application add-ons, you should add
|
88
|
+
`app` in installation;
|
89
|
+
|
90
|
+
| Use-case | Install Optional | Support |
|
91
|
+
|----------------|--------------------------|--------------------|
|
92
|
+
| Python | `ddeutil-workflow` | :heavy_check_mark: |
|
93
|
+
| FastAPI Server | `ddeutil-workflow[api]` | :heavy_check_mark: |
|
94
|
+
|
95
|
+
## :beers: Usage
|
96
|
+
|
97
|
+
This is examples that use workflow file for running common Data Engineering
|
98
|
+
use-case.
|
99
|
+
|
100
|
+
> [!IMPORTANT]
|
101
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
102
|
+
> with workflow activity that you want to orchestrate. Because it is able to
|
103
|
+
> dynamic an input argument with the same hook function that make you use less
|
104
|
+
> time to maintenance your data workflows.
|
105
|
+
|
106
|
+
```yaml
|
107
|
+
run-py-local:
|
108
|
+
|
109
|
+
# Validate model that use to parsing exists for template file
|
110
|
+
type: Workflow
|
111
|
+
on:
|
112
|
+
# If workflow deploy to schedule, it will run every 5 minutes
|
113
|
+
# with Asia/Bangkok timezone.
|
114
|
+
- cronjob: '*/5 * * * *'
|
115
|
+
timezone: "Asia/Bangkok"
|
116
|
+
params:
|
117
|
+
# Incoming execution parameters will validate with this type. It allows
|
118
|
+
# to set default value or templating.
|
119
|
+
source-extract: str
|
120
|
+
run-date: datetime
|
121
|
+
jobs:
|
122
|
+
getting-api-data:
|
123
|
+
stages:
|
124
|
+
- name: "Retrieve API Data"
|
125
|
+
id: retrieve-api
|
126
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
127
|
+
with:
|
128
|
+
# Arguments of source data that want to retrieve.
|
129
|
+
method: post
|
130
|
+
url: https://finances/open-data/currency-pairs/
|
131
|
+
body:
|
132
|
+
resource: ${{ params.source-extract }}
|
133
|
+
|
134
|
+
# You can use filtering like Jinja template but this
|
135
|
+
# package does not use it.
|
136
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
137
|
+
auth:
|
138
|
+
type: bearer
|
139
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
140
|
+
|
141
|
+
# Arguments of target data that want to land.
|
142
|
+
writing_mode: flatten
|
143
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
144
|
+
|
145
|
+
# This Authentication code should implement with your custom hook
|
146
|
+
# function. The template allow you to use environment variable.
|
147
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
148
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
149
|
+
```
|
150
|
+
|
151
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
152
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
153
|
+
execution time such as `run-date` should change base on that workflow running date.
|
154
|
+
|
155
|
+
So, this package provide the `Schedule` template for this action.
|
156
|
+
|
157
|
+
```yaml
|
158
|
+
schedule-run-local-wf:
|
159
|
+
|
160
|
+
# Validate model that use to parsing exists for template file
|
161
|
+
type: Schedule
|
162
|
+
workflows:
|
163
|
+
|
164
|
+
# Map existing workflow that want to deploy with scheduler application.
|
165
|
+
# It allows you to pass release parameter that dynamic change depend on the
|
166
|
+
# current context of this scheduler application releasing that time.
|
167
|
+
- name: run-py-local
|
168
|
+
params:
|
169
|
+
source-extract: "USD-THB"
|
170
|
+
asat-dt: "${{ release.logical_date }}"
|
171
|
+
```
|
172
|
+
|
173
|
+
## :cookie: Configuration
|
174
|
+
|
175
|
+
The main configuration that use to dynamic changing with your objective of this
|
176
|
+
application. If any configuration values do not set yet, it will use default value
|
177
|
+
and do not raise any error to you.
|
178
|
+
|
179
|
+
> [!IMPORTANT]
|
180
|
+
> The config value that you will set on the environment should combine with
|
181
|
+
> prefix, component, and name which is `WORKFLOW_{component}_{name}` (Upper case).
|
182
|
+
|
183
|
+
| Name | Component | Default | Description |
|
184
|
+
|:-----------------------------|:---------:|:----------------------------------|:-------------------------------------------------------------------------------------------------------------------|
|
185
|
+
| **ROOT_PATH** | Core | `.` | The root path of the workflow application. |
|
186
|
+
| **REGISTRY** | Core | `src` | List of importable string for the hook stage. |
|
187
|
+
| **REGISTRY_FILTER** | Core | `ddeutil.workflow.utils` | List of importable string for the filter template. |
|
188
|
+
| **CONF_PATH** | Core | `conf` | The config path that keep all template `.yaml` files. |
|
189
|
+
| **TIMEZONE** | Core | `Asia/Bangkok` | A Timezone string value that will pass to `ZoneInfo` object. |
|
190
|
+
| **STAGE_DEFAULT_ID** | Core | `true` | A flag that enable default stage ID that use for catch an execution output. |
|
191
|
+
| **STAGE_RAISE_ERROR** | Core | `false` | A flag that all stage raise StageException from stage execution. |
|
192
|
+
| **JOB_DEFAULT_ID** | Core | `false` | A flag that enable default job ID that use for catch an execution output. The ID that use will be sequence number. |
|
193
|
+
| **JOB_RAISE_ERROR** | Core | `true` | A flag that all job raise JobException from job strategy execution. |
|
194
|
+
| **MAX_NUM_POKING** | Core | `4` | . |
|
195
|
+
| **MAX_JOB_PARALLEL** | Core | `2` | The maximum job number that able to run parallel in workflow executor. |
|
196
|
+
| **MAX_JOB_EXEC_TIMEOUT** | Core | `600` | |
|
197
|
+
| **MAX_CRON_PER_WORKFLOW** | Core | `5` | |
|
198
|
+
| **MAX_QUEUE_COMPLETE_HIST** | Core | `16` | |
|
199
|
+
| **GENERATE_ID_SIMPLE_MODE** | Core | `true` | A flog that enable generating ID with `md5` algorithm. |
|
200
|
+
| **PATH** | Log | `./logs` | The log path of the workflow saving log. |
|
201
|
+
| **DEBUG_MODE** | Log | `true` | A flag that enable logging with debug level mode. |
|
202
|
+
| **ENABLE_WRITE** | Log | `true` | A flag that enable logging object saving log to its destination. |
|
203
|
+
| **MAX_PROCESS** | App | `2` | The maximum process worker number that run in scheduler app module. |
|
204
|
+
| **MAX_SCHEDULE_PER_PROCESS** | App | `100` | A schedule per process that run parallel. |
|
205
|
+
| **STOP_BOUNDARY_DELTA** | App | `'{"minutes": 5, "seconds": 20}'` | A time delta value that use to stop scheduler app in json string format. |
|
206
|
+
|
207
|
+
**API Application**:
|
208
|
+
|
209
|
+
| Environment | Component | Default | Description |
|
210
|
+
|:---------------------------|:-----------:|---------|------------------------------------------------------------------------------------|
|
211
|
+
| **ENABLE_ROUTE_WORKFLOW** | API | `true` | A flag that enable workflow route to manage execute manually and workflow logging. |
|
212
|
+
| **ENABLE_ROUTE_SCHEDULE** | API | `true` | A flag that enable run scheduler. |
|
213
|
+
|
214
|
+
## :rocket: Deployment
|
215
|
+
|
216
|
+
This package able to run as an application service for receive manual trigger
|
217
|
+
from the master node via RestAPI or use to be Scheduler background service
|
218
|
+
like crontab job but via Python API.
|
219
|
+
|
220
|
+
### API Server
|
221
|
+
|
222
|
+
```shell
|
223
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app \
|
224
|
+
--host 127.0.0.1 \
|
225
|
+
--port 80 \
|
226
|
+
--no-access-log
|
227
|
+
```
|
228
|
+
|
229
|
+
> [!NOTE]
|
230
|
+
> If this package already deploy, it is able to use multiprocess;
|
231
|
+
> `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
|
232
|
+
|
233
|
+
### Docker Container
|
234
|
+
|
235
|
+
Create Docker image;
|
236
|
+
|
237
|
+
```shell
|
238
|
+
$ docker build -t ddeutil-workflow:latest -f .container/Dockerfile .
|
239
|
+
```
|
240
|
+
|
241
|
+
Run the above Docker image;
|
242
|
+
|
243
|
+
```shell
|
244
|
+
$ docker run -i ddeutil-workflow:latest
|
245
|
+
```
|
246
|
+
|
247
|
+
## :speech_balloon: Contribute
|
248
|
+
|
249
|
+
I do not think this project will go around the world because it has specific propose,
|
250
|
+
and you can create by your coding without this project dependency for long term
|
251
|
+
solution. So, on this time, you can open [the GitHub issue on this project :raised_hands:](https://github.com/ddeutils/ddeutil-workflow/issues)
|
252
|
+
for fix bug or request new feature if you want it.
|
@@ -1,10 +1,10 @@
|
|
1
1
|
[build-system]
|
2
|
-
requires = ["setuptools>=68.0"]
|
2
|
+
requires = [ "setuptools>=68.0" ]
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "ddeutil-workflow"
|
7
|
-
description = "Lightweight workflow orchestration
|
7
|
+
description = "Lightweight workflow orchestration"
|
8
8
|
readme = {file = "README.md", content-type = "text/markdown"}
|
9
9
|
license = {text = "MIT"}
|
10
10
|
authors = [{ name = "ddeutils", email = "korawich.anu@gmail.com" }]
|
@@ -26,18 +26,16 @@ classifiers = [
|
|
26
26
|
]
|
27
27
|
requires-python = ">=3.9.13"
|
28
28
|
dependencies = [
|
29
|
-
"ddeutil
|
29
|
+
"ddeutil==0.4.6",
|
30
30
|
"ddeutil-io[yaml,toml]>=0.2.3",
|
31
|
-
"pydantic==2.10.
|
31
|
+
"pydantic==2.10.6",
|
32
32
|
"python-dotenv==1.0.1",
|
33
33
|
"schedule==1.2.2,<2.0.0",
|
34
34
|
]
|
35
35
|
dynamic = ["version"]
|
36
36
|
|
37
37
|
[project.optional-dependencies]
|
38
|
-
api = [
|
39
|
-
"fastapi>=0.115.0,<1.0.0",
|
40
|
-
]
|
38
|
+
api = [ "fastapi>=0.115.0,<1.0.0" ]
|
41
39
|
|
42
40
|
[project.urls]
|
43
41
|
Homepage = "https://github.com/ddeutils/ddeutil-workflow/"
|
@@ -52,6 +50,10 @@ where = ["src"]
|
|
52
50
|
[tool.shelf.version]
|
53
51
|
version = "./src/ddeutil/workflow/__about__.py"
|
54
52
|
changelog = "CHANGELOG.md"
|
53
|
+
commit_msg_format = "- {subject}"
|
54
|
+
|
55
|
+
[tool.shelf.git]
|
56
|
+
commit_prefix_force_fix = true
|
55
57
|
|
56
58
|
[tool.coverage.run]
|
57
59
|
branch = true
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.28"
|