ddeutil-workflow 0.0.8__tar.gz → 0.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil_workflow-0.0.10/PKG-INFO +182 -0
- ddeutil_workflow-0.0.10/README.md +151 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/pyproject.toml +10 -10
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/__init__.py +3 -14
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/api.py +89 -0
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/cli.py +134 -0
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/scheduler.py → ddeutil_workflow-0.0.10/src/ddeutil/workflow/cron.py +116 -26
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/exceptions.py +3 -0
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/log.py +184 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/on.py +27 -18
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/pipeline.py +527 -234
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/repeat.py +71 -40
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/route.py +92 -0
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/scheduler.py +620 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/stage.py +158 -82
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/utils.py +273 -46
- ddeutil_workflow-0.0.10/src/ddeutil_workflow.egg-info/PKG-INFO +182 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil_workflow.egg-info/SOURCES.txt +15 -5
- ddeutil_workflow-0.0.10/src/ddeutil_workflow.egg-info/entry_points.txt +2 -0
- ddeutil_workflow-0.0.10/src/ddeutil_workflow.egg-info/requires.txt +9 -0
- ddeutil_workflow-0.0.10/tests/test__conf_exist.py +11 -0
- ddeutil_workflow-0.0.10/tests/test_conf.py +8 -0
- ddeutil_workflow-0.0.8/tests/test_scheduler.py → ddeutil_workflow-0.0.10/tests/test_cron.py +74 -18
- ddeutil_workflow-0.0.10/tests/test_job.py +18 -0
- ddeutil_workflow-0.0.10/tests/test_job_py.py +33 -0
- ddeutil_workflow-0.0.10/tests/test_log.py +24 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_on.py +21 -0
- ddeutil_workflow-0.0.10/tests/test_params.py +13 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline.py +8 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_desc.py +1 -1
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_matrix.py +4 -4
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_on.py +1 -1
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_params.py +1 -1
- ddeutil_workflow-0.0.10/tests/test_pipeline_run.py +22 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_run_raise.py +1 -1
- ddeutil_workflow-0.0.10/tests/test_poke.py +13 -0
- ddeutil_workflow-0.0.10/tests/test_scheduler.py +12 -0
- ddeutil_workflow-0.0.10/tests/test_stage.py +31 -0
- ddeutil_workflow-0.0.10/tests/test_stage_bash.py +40 -0
- ddeutil_workflow-0.0.10/tests/test_stage_condition.py +14 -0
- ddeutil_workflow-0.0.10/tests/test_stage_hook.py +56 -0
- ddeutil_workflow-0.0.10/tests/test_stage_py.py +56 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_stage_trigger.py +3 -3
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_utils_result.py +11 -22
- ddeutil_workflow-0.0.8/tests/test_utils_param2template.py → ddeutil_workflow-0.0.10/tests/test_utils_template.py +54 -0
- ddeutil_workflow-0.0.8/PKG-INFO +0 -266
- ddeutil_workflow-0.0.8/README.md +0 -233
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/api.py +0 -120
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/app.py +0 -45
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/loader.py +0 -80
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/log.py +0 -79
- ddeutil_workflow-0.0.8/src/ddeutil/workflow/route.py +0 -78
- ddeutil_workflow-0.0.8/src/ddeutil_workflow.egg-info/PKG-INFO +0 -266
- ddeutil_workflow-0.0.8/src/ddeutil_workflow.egg-info/requires.txt +0 -11
- ddeutil_workflow-0.0.8/tests/test__conf_exist.py +0 -11
- ddeutil_workflow-0.0.8/tests/test_job.py +0 -7
- ddeutil_workflow-0.0.8/tests/test_pipeline_on_ready.py +0 -26
- ddeutil_workflow-0.0.8/tests/test_pipeline_run.py +0 -107
- ddeutil_workflow-0.0.8/tests/test_stage.py +0 -15
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/__types.py +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test__local_and_global.py +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test__regex.py +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_if.py +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_task.py +0 -0
- {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_utils.py +0 -0
@@ -0,0 +1,182 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: ddeutil-workflow
|
3
|
+
Version: 0.0.10
|
4
|
+
Summary: Lightweight workflow orchestration with less dependencies
|
5
|
+
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
|
8
|
+
Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
|
9
|
+
Keywords: orchestration,workflow
|
10
|
+
Classifier: Topic :: Utilities
|
11
|
+
Classifier: Natural Language :: English
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Classifier: Programming Language :: Python
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Requires-Python: >=3.9.13
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Requires-Dist: ddeutil-io
|
25
|
+
Requires-Dist: python-dotenv==1.0.1
|
26
|
+
Requires-Dist: typer<1.0.0,==0.12.5
|
27
|
+
Provides-Extra: schedule
|
28
|
+
Requires-Dist: schedule<2.0.0,==1.2.2; extra == "schedule"
|
29
|
+
Provides-Extra: api
|
30
|
+
Requires-Dist: fastapi[standard]<1.0.0,==0.112.2; extra == "api"
|
31
|
+
|
32
|
+
# Workflow
|
33
|
+
|
34
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
35
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
36
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
37
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
38
|
+
[](https://github.com/psf/black)
|
39
|
+
|
40
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
41
|
+
for easy to make a simple metadata driven for data pipeline orchestration.
|
42
|
+
It can to use for data operator by a `.yaml` template.
|
43
|
+
|
44
|
+
> [!WARNING]
|
45
|
+
> This package provide only orchestration workload. That mean you should not use
|
46
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
47
|
+
|
48
|
+
In my opinion, I think it should not create duplicate pipeline codes if I can
|
49
|
+
write with dynamic input parameters on the one template pipeline that just change
|
50
|
+
the input parameters per use-case instead.
|
51
|
+
This way I can handle a lot of logical pipelines in our orgs with only metadata
|
52
|
+
configuration. It called **Metadata Driven Data Pipeline**.
|
53
|
+
|
54
|
+
Next, we should get some monitoring tools for manage logging that return from
|
55
|
+
pipeline running. Because it not show us what is a use-case that running data
|
56
|
+
pipeline.
|
57
|
+
|
58
|
+
> [!NOTE]
|
59
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
60
|
+
> and all of config file from several data orchestration framework tools from my
|
61
|
+
> experience on Data Engineer.
|
62
|
+
|
63
|
+
**Rules of This Workflow engine**:
|
64
|
+
|
65
|
+
1. Minimum unit of scheduling is 1 minute
|
66
|
+
2. Cannot re-run only failed stage and its pending downstream
|
67
|
+
3. All parallel tasks inside workflow engine use Threading
|
68
|
+
(Because Python 3.13 unlock GIL)
|
69
|
+
|
70
|
+
## Installation
|
71
|
+
|
72
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
73
|
+
this package with application add-ons, you should add `app` in installation;
|
74
|
+
|
75
|
+
| Usecase | Install Optional | Support |
|
76
|
+
|-------------------|------------------------------------------|--------------------|
|
77
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
78
|
+
| Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
|
79
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
|
80
|
+
|
81
|
+
|
82
|
+
> I added this feature to the main milestone.
|
83
|
+
>
|
84
|
+
> **Docker Images** supported:
|
85
|
+
>
|
86
|
+
> | Docker Image | Python Version | Support |
|
87
|
+
> |-----------------------------|----------------|---------|
|
88
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
89
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
90
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
91
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
92
|
+
|
93
|
+
## Usage
|
94
|
+
|
95
|
+
This is examples that use workflow file for running common Data Engineering
|
96
|
+
use-case.
|
97
|
+
|
98
|
+
> [!IMPORTANT]
|
99
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
100
|
+
> with pipeline activity that you want to orchestrate. Because it able to dynamic
|
101
|
+
> an input argument with the same hook function that make you use less time to
|
102
|
+
> maintenance your data pipelines.
|
103
|
+
|
104
|
+
```yaml
|
105
|
+
run_py_local:
|
106
|
+
type: pipeline.Pipeline
|
107
|
+
on:
|
108
|
+
- cronjob: '*/5 * * * *'
|
109
|
+
timezone: "Asia/Bangkok"
|
110
|
+
params:
|
111
|
+
author-run: str
|
112
|
+
run-date: datetime
|
113
|
+
jobs:
|
114
|
+
getting-api-data:
|
115
|
+
stages:
|
116
|
+
- name: "Retrieve API Data"
|
117
|
+
id: retrieve-api
|
118
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
119
|
+
with:
|
120
|
+
url: https://open-data/
|
121
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
122
|
+
aws_s3_path: my-data/open-data/
|
123
|
+
|
124
|
+
# This Authentication code should implement with your custom hook function.
|
125
|
+
# The template allow you to use environment variable.
|
126
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
127
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
128
|
+
```
|
129
|
+
|
130
|
+
## Configuration
|
131
|
+
|
132
|
+
| Environment | Component | Default | Description |
|
133
|
+
|-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
|
134
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
135
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
136
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
137
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
138
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
139
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
140
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
141
|
+
| `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
|
142
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
|
143
|
+
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
|
144
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
145
|
+
|
146
|
+
|
147
|
+
**Application**:
|
148
|
+
|
149
|
+
| Environment | Default | Description |
|
150
|
+
|-------------------------------------|----------------------------------|-------------------------------------------------------------------------|
|
151
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | 2 | The maximum process worker number that run in scheduler app module |
|
152
|
+
| `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | 100 | A schedule per process that run parallel |
|
153
|
+
| `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
|
154
|
+
|
155
|
+
**API server**:
|
156
|
+
|
157
|
+
| Environment | Default | Description |
|
158
|
+
|--------------------------------------|---------|-----------------------------------------------------------------------------------|
|
159
|
+
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
160
|
+
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
|
161
|
+
|
162
|
+
## Deployment
|
163
|
+
|
164
|
+
This package able to run as a application service for receive manual trigger
|
165
|
+
from the master node via RestAPI or use to be Scheduler background service
|
166
|
+
like crontab job but via Python API.
|
167
|
+
|
168
|
+
### Schedule Service
|
169
|
+
|
170
|
+
```shell
|
171
|
+
(venv) $ python src.ddeutil.workflow.app
|
172
|
+
```
|
173
|
+
|
174
|
+
### API Server
|
175
|
+
|
176
|
+
```shell
|
177
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
|
178
|
+
```
|
179
|
+
|
180
|
+
> [!NOTE]
|
181
|
+
> If this package already deploy, it able to use
|
182
|
+
> `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --workers 4`
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# Workflow
|
2
|
+
|
3
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
4
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
5
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
6
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
7
|
+
[](https://github.com/psf/black)
|
8
|
+
|
9
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
10
|
+
for easy to make a simple metadata driven for data pipeline orchestration.
|
11
|
+
It can to use for data operator by a `.yaml` template.
|
12
|
+
|
13
|
+
> [!WARNING]
|
14
|
+
> This package provide only orchestration workload. That mean you should not use
|
15
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
16
|
+
|
17
|
+
In my opinion, I think it should not create duplicate pipeline codes if I can
|
18
|
+
write with dynamic input parameters on the one template pipeline that just change
|
19
|
+
the input parameters per use-case instead.
|
20
|
+
This way I can handle a lot of logical pipelines in our orgs with only metadata
|
21
|
+
configuration. It called **Metadata Driven Data Pipeline**.
|
22
|
+
|
23
|
+
Next, we should get some monitoring tools for manage logging that return from
|
24
|
+
pipeline running. Because it not show us what is a use-case that running data
|
25
|
+
pipeline.
|
26
|
+
|
27
|
+
> [!NOTE]
|
28
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
29
|
+
> and all of config file from several data orchestration framework tools from my
|
30
|
+
> experience on Data Engineer.
|
31
|
+
|
32
|
+
**Rules of This Workflow engine**:
|
33
|
+
|
34
|
+
1. Minimum unit of scheduling is 1 minute
|
35
|
+
2. Cannot re-run only failed stage and its pending downstream
|
36
|
+
3. All parallel tasks inside workflow engine use Threading
|
37
|
+
(Because Python 3.13 unlock GIL)
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
42
|
+
this package with application add-ons, you should add `app` in installation;
|
43
|
+
|
44
|
+
| Usecase | Install Optional | Support |
|
45
|
+
|-------------------|------------------------------------------|--------------------|
|
46
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
47
|
+
| Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
|
48
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
|
49
|
+
|
50
|
+
|
51
|
+
> I added this feature to the main milestone.
|
52
|
+
>
|
53
|
+
> **Docker Images** supported:
|
54
|
+
>
|
55
|
+
> | Docker Image | Python Version | Support |
|
56
|
+
> |-----------------------------|----------------|---------|
|
57
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
58
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
59
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
60
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
61
|
+
|
62
|
+
## Usage
|
63
|
+
|
64
|
+
This is examples that use workflow file for running common Data Engineering
|
65
|
+
use-case.
|
66
|
+
|
67
|
+
> [!IMPORTANT]
|
68
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
69
|
+
> with pipeline activity that you want to orchestrate. Because it able to dynamic
|
70
|
+
> an input argument with the same hook function that make you use less time to
|
71
|
+
> maintenance your data pipelines.
|
72
|
+
|
73
|
+
```yaml
|
74
|
+
run_py_local:
|
75
|
+
type: pipeline.Pipeline
|
76
|
+
on:
|
77
|
+
- cronjob: '*/5 * * * *'
|
78
|
+
timezone: "Asia/Bangkok"
|
79
|
+
params:
|
80
|
+
author-run: str
|
81
|
+
run-date: datetime
|
82
|
+
jobs:
|
83
|
+
getting-api-data:
|
84
|
+
stages:
|
85
|
+
- name: "Retrieve API Data"
|
86
|
+
id: retrieve-api
|
87
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
88
|
+
with:
|
89
|
+
url: https://open-data/
|
90
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
91
|
+
aws_s3_path: my-data/open-data/
|
92
|
+
|
93
|
+
# This Authentication code should implement with your custom hook function.
|
94
|
+
# The template allow you to use environment variable.
|
95
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
96
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
97
|
+
```
|
98
|
+
|
99
|
+
## Configuration
|
100
|
+
|
101
|
+
| Environment | Component | Default | Description |
|
102
|
+
|-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
|
103
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
104
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
105
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
106
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
107
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
108
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
109
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
110
|
+
| `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
|
111
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
|
112
|
+
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
|
113
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
114
|
+
|
115
|
+
|
116
|
+
**Application**:
|
117
|
+
|
118
|
+
| Environment | Default | Description |
|
119
|
+
|-------------------------------------|----------------------------------|-------------------------------------------------------------------------|
|
120
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | 2 | The maximum process worker number that run in scheduler app module |
|
121
|
+
| `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | 100 | A schedule per process that run parallel |
|
122
|
+
| `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
|
123
|
+
|
124
|
+
**API server**:
|
125
|
+
|
126
|
+
| Environment | Default | Description |
|
127
|
+
|--------------------------------------|---------|-----------------------------------------------------------------------------------|
|
128
|
+
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
129
|
+
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
|
130
|
+
|
131
|
+
## Deployment
|
132
|
+
|
133
|
+
This package able to run as a application service for receive manual trigger
|
134
|
+
from the master node via RestAPI or use to be Scheduler background service
|
135
|
+
like crontab job but via Python API.
|
136
|
+
|
137
|
+
### Schedule Service
|
138
|
+
|
139
|
+
```shell
|
140
|
+
(venv) $ python src.ddeutil.workflow.app
|
141
|
+
```
|
142
|
+
|
143
|
+
### API Server
|
144
|
+
|
145
|
+
```shell
|
146
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
|
147
|
+
```
|
148
|
+
|
149
|
+
> [!NOTE]
|
150
|
+
> If this package already deploy, it able to use
|
151
|
+
> `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --workers 4`
|
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "ddeutil-workflow"
|
7
|
-
description = "
|
7
|
+
description = "Lightweight workflow orchestration with less dependencies"
|
8
8
|
readme = {file = "README.md", content-type = "text/markdown"}
|
9
9
|
license = {text = "MIT"}
|
10
10
|
authors = [{ name = "ddeutils", email = "korawich.anu@gmail.com" }]
|
11
|
-
keywords = ['
|
11
|
+
keywords = ['orchestration', 'workflow']
|
12
12
|
classifiers = [
|
13
13
|
"Topic :: Utilities",
|
14
14
|
"Natural Language :: English",
|
@@ -25,26 +25,27 @@ classifiers = [
|
|
25
25
|
]
|
26
26
|
requires-python = ">=3.9.13"
|
27
27
|
dependencies = [
|
28
|
-
"fmtutil",
|
29
28
|
"ddeutil-io",
|
30
29
|
"python-dotenv==1.0.1",
|
30
|
+
"typer==0.12.5,<1.0.0",
|
31
31
|
]
|
32
32
|
dynamic = ["version"]
|
33
33
|
|
34
34
|
[project.optional-dependencies]
|
35
|
-
|
35
|
+
schedule = [
|
36
36
|
"schedule==1.2.2,<2.0.0",
|
37
37
|
]
|
38
38
|
api = [
|
39
|
-
"fastapi[standard]==0.112.0",
|
40
|
-
"apscheduler[sqlalchemy]==3.10.4,<4.0.0",
|
41
|
-
"croniter==3.0.3",
|
39
|
+
"fastapi[standard]==0.112.2,<1.0.0",
|
42
40
|
]
|
43
41
|
|
44
42
|
[project.urls]
|
45
43
|
Homepage = "https://github.com/ddeutils/ddeutil-workflow/"
|
46
44
|
"Source Code" = "https://github.com/ddeutils/ddeutil-workflow/"
|
47
45
|
|
46
|
+
[project.scripts]
|
47
|
+
ddeutil-workflow = "ddeutil.workflow.cli:cli"
|
48
|
+
|
48
49
|
[tool.setuptools.dynamic]
|
49
50
|
version = {attr = "ddeutil.workflow.__about__.__version__"}
|
50
51
|
|
@@ -62,8 +63,7 @@ concurrency = ["thread", "multiprocessing"]
|
|
62
63
|
source = ["ddeutil.workflow", "tests"]
|
63
64
|
omit = [
|
64
65
|
"scripts/",
|
65
|
-
|
66
|
-
"src/ddeutil/workflow/app.py",
|
66
|
+
# Omit this files because it does not ready to production.
|
67
67
|
"src/ddeutil/workflow/repeat.py",
|
68
68
|
"src/ddeutil/workflow/route.py",
|
69
69
|
"tests/utils.py",
|
@@ -83,7 +83,7 @@ addopts = [
|
|
83
83
|
filterwarnings = ["error"]
|
84
84
|
log_cli = true
|
85
85
|
log_cli_level = "DEBUG"
|
86
|
-
log_cli_format = "%(asctime)s [%(levelname)-7s] %(message)-
|
86
|
+
log_cli_format = "%(asctime)s [%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
|
87
87
|
log_cli_date_format = "%Y%m%d %H:%M:%S"
|
88
88
|
|
89
89
|
[tool.black]
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.10"
|
@@ -10,22 +10,11 @@ from .exceptions import (
|
|
10
10
|
StageException,
|
11
11
|
UtilException,
|
12
12
|
)
|
13
|
-
from .on import
|
14
|
-
from .pipeline import Job, Pipeline
|
15
|
-
from .stage import
|
16
|
-
BashStage,
|
17
|
-
EmptyStage,
|
18
|
-
HookStage,
|
19
|
-
PyStage,
|
20
|
-
Stage,
|
21
|
-
TriggerStage,
|
22
|
-
)
|
13
|
+
from .on import On, interval2crontab
|
14
|
+
from .pipeline import Job, Pipeline, Strategy
|
15
|
+
from .stage import Stage, handler_result
|
23
16
|
from .utils import (
|
24
|
-
ChoiceParam,
|
25
|
-
DatetimeParam,
|
26
|
-
IntParam,
|
27
17
|
Param,
|
28
|
-
StrParam,
|
29
18
|
dash2underscore,
|
30
19
|
param2template,
|
31
20
|
)
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import os
|
10
|
+
import uuid
|
11
|
+
from queue import Empty, Queue
|
12
|
+
|
13
|
+
from ddeutil.core import str2bool
|
14
|
+
from dotenv import load_dotenv
|
15
|
+
from fastapi import FastAPI
|
16
|
+
from fastapi.middleware.gzip import GZipMiddleware
|
17
|
+
from fastapi.responses import UJSONResponse
|
18
|
+
from pydantic import BaseModel
|
19
|
+
|
20
|
+
from .__about__ import __version__
|
21
|
+
from .log import get_logger
|
22
|
+
from .repeat import repeat_every
|
23
|
+
|
24
|
+
load_dotenv()
|
25
|
+
logger = get_logger("ddeutil.workflow")
|
26
|
+
|
27
|
+
|
28
|
+
app = FastAPI(
|
29
|
+
titile="Workflow API",
|
30
|
+
description=(
|
31
|
+
"This is workflow FastAPI web application that use to manage manual "
|
32
|
+
"execute or schedule workflow via RestAPI."
|
33
|
+
),
|
34
|
+
version=__version__,
|
35
|
+
)
|
36
|
+
app.add_middleware(GZipMiddleware, minimum_size=1000)
|
37
|
+
app.queue = Queue()
|
38
|
+
app.output_dict = {}
|
39
|
+
app.queue_limit = 5
|
40
|
+
|
41
|
+
|
42
|
+
@app.on_event("startup")
|
43
|
+
@repeat_every(seconds=10)
|
44
|
+
def broker_upper_messages():
|
45
|
+
"""Broker for receive message from the `/upper` path and change it to upper
|
46
|
+
case. This broker use interval running in background every 10 seconds.
|
47
|
+
"""
|
48
|
+
for _ in range(app.queue_limit):
|
49
|
+
try:
|
50
|
+
obj = app.queue.get_nowait()
|
51
|
+
app.output_dict[obj["request_id"]] = obj["text"].upper()
|
52
|
+
logger.info(f"Upper message: {app.output_dict}")
|
53
|
+
except Empty:
|
54
|
+
pass
|
55
|
+
|
56
|
+
|
57
|
+
class Payload(BaseModel):
|
58
|
+
text: str
|
59
|
+
|
60
|
+
|
61
|
+
async def get_result(request_id):
|
62
|
+
"""Get data from output dict that global."""
|
63
|
+
while True:
|
64
|
+
if request_id in app.output_dict:
|
65
|
+
result = app.output_dict[request_id]
|
66
|
+
del app.output_dict[request_id]
|
67
|
+
return {"message": result}
|
68
|
+
await asyncio.sleep(0.0025)
|
69
|
+
|
70
|
+
|
71
|
+
@app.post("/upper", response_class=UJSONResponse)
|
72
|
+
async def message_upper(payload: Payload):
|
73
|
+
"""Convert message from any case to the upper case."""
|
74
|
+
request_id: str = str(uuid.uuid4())
|
75
|
+
app.queue.put(
|
76
|
+
{"text": payload.text, "request_id": request_id},
|
77
|
+
)
|
78
|
+
return await get_result(request_id)
|
79
|
+
|
80
|
+
|
81
|
+
if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")):
|
82
|
+
from .route import workflow
|
83
|
+
|
84
|
+
app.include_router(workflow)
|
85
|
+
|
86
|
+
if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")):
|
87
|
+
from .route import schedule
|
88
|
+
|
89
|
+
app.include_router(schedule)
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import json
|
9
|
+
import os
|
10
|
+
from datetime import datetime
|
11
|
+
from enum import Enum
|
12
|
+
from typing import Annotated, Optional
|
13
|
+
from zoneinfo import ZoneInfo
|
14
|
+
|
15
|
+
from ddeutil.core import str2list
|
16
|
+
from typer import Argument, Option, Typer
|
17
|
+
|
18
|
+
from .log import get_logger
|
19
|
+
|
20
|
+
logger = get_logger("ddeutil.workflow")
|
21
|
+
cli: Typer = Typer()
|
22
|
+
cli_log: Typer = Typer()
|
23
|
+
cli.add_typer(
|
24
|
+
cli_log,
|
25
|
+
name="log",
|
26
|
+
help="Logging of workflow CLI",
|
27
|
+
)
|
28
|
+
|
29
|
+
|
30
|
+
@cli.command()
|
31
|
+
def run(
|
32
|
+
pipeline: Annotated[
|
33
|
+
str,
|
34
|
+
Argument(help="A pipeline name that want to run manually"),
|
35
|
+
],
|
36
|
+
params: Annotated[
|
37
|
+
str,
|
38
|
+
Argument(
|
39
|
+
help="A json string for parameters of this pipeline execution."
|
40
|
+
),
|
41
|
+
],
|
42
|
+
):
|
43
|
+
"""Run pipeline workflow manually with an input custom parameters that able
|
44
|
+
to receive with pipeline params config.
|
45
|
+
"""
|
46
|
+
logger.info(f"Running pipeline name: {pipeline}")
|
47
|
+
logger.info(f"... with Parameters: {json.dumps(json.loads(params))}")
|
48
|
+
|
49
|
+
|
50
|
+
@cli.command()
|
51
|
+
def schedule(
|
52
|
+
stop: Annotated[
|
53
|
+
Optional[datetime],
|
54
|
+
Argument(
|
55
|
+
formats=["%Y-%m-%d", "%Y-%m-%d %H:%M:%S"],
|
56
|
+
help="A stopping datetime that want to stop on schedule app.",
|
57
|
+
),
|
58
|
+
] = None,
|
59
|
+
excluded: Annotated[
|
60
|
+
Optional[str],
|
61
|
+
Argument(help="A list of exclude workflow name in str."),
|
62
|
+
] = None,
|
63
|
+
externals: Annotated[
|
64
|
+
Optional[str],
|
65
|
+
Argument(
|
66
|
+
help="A json string for parameters of this pipeline execution."
|
67
|
+
),
|
68
|
+
] = None,
|
69
|
+
):
|
70
|
+
"""Start workflow scheduler that will call workflow function from scheduler
|
71
|
+
module.
|
72
|
+
"""
|
73
|
+
excluded: list[str] = str2list(excluded) if excluded else []
|
74
|
+
externals: str = externals or "{}"
|
75
|
+
if stop:
|
76
|
+
stop: datetime = stop.astimezone(
|
77
|
+
tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
78
|
+
)
|
79
|
+
|
80
|
+
from .scheduler import workflow
|
81
|
+
|
82
|
+
# NOTE: Start running workflow scheduler application.
|
83
|
+
workflow_rs: list[str] = workflow(
|
84
|
+
stop=stop, excluded=excluded, externals=json.loads(externals)
|
85
|
+
)
|
86
|
+
logger.info(f"Application run success: {workflow_rs}")
|
87
|
+
|
88
|
+
|
89
|
+
@cli_log.command("pipeline-get")
|
90
|
+
def pipeline_log_get(
|
91
|
+
name: Annotated[
|
92
|
+
str,
|
93
|
+
Argument(help="A pipeline name that want to getting log"),
|
94
|
+
],
|
95
|
+
limit: Annotated[
|
96
|
+
int,
|
97
|
+
Argument(help="A number of the limitation of logging"),
|
98
|
+
] = 100,
|
99
|
+
desc: Annotated[
|
100
|
+
bool,
|
101
|
+
Option(
|
102
|
+
"--desc",
|
103
|
+
help="A descending flag that order by logging release datetime.",
|
104
|
+
),
|
105
|
+
] = True,
|
106
|
+
):
|
107
|
+
logger.info(f"{name} : limit {limit} : desc: {desc}")
|
108
|
+
return [""]
|
109
|
+
|
110
|
+
|
111
|
+
class LogMode(str, Enum):
|
112
|
+
get = "get"
|
113
|
+
delete = "delete"
|
114
|
+
|
115
|
+
|
116
|
+
@cli_log.command("pipeline-delete")
|
117
|
+
def pipeline_log_delete(
|
118
|
+
mode: Annotated[
|
119
|
+
LogMode,
|
120
|
+
Argument(case_sensitive=True),
|
121
|
+
]
|
122
|
+
):
|
123
|
+
logger.info(mode)
|
124
|
+
|
125
|
+
|
126
|
+
@cli.callback()
|
127
|
+
def main():
|
128
|
+
"""
|
129
|
+
Manage workflow with CLI.
|
130
|
+
"""
|
131
|
+
|
132
|
+
|
133
|
+
if __name__ == "__main__":
|
134
|
+
cli()
|