ddeutil-workflow 0.0.10__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/PKG-INFO +66 -70
- ddeutil_workflow-0.0.11/README.md +148 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/pyproject.toml +2 -4
- ddeutil_workflow-0.0.11/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/__init__.py +3 -2
- ddeutil_workflow-0.0.11/src/ddeutil/workflow/api.py +157 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/cli.py +14 -14
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/exceptions.py +6 -6
- ddeutil_workflow-0.0.11/src/ddeutil/workflow/job.py +572 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/log.py +10 -10
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/repeat.py +4 -2
- ddeutil_workflow-0.0.11/src/ddeutil/workflow/route.py +221 -0
- ddeutil_workflow-0.0.11/src/ddeutil/workflow/scheduler.py +1243 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/stage.py +12 -12
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/utils.py +4 -4
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/PKG-INFO +66 -70
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/SOURCES.txt +1 -1
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/requires.txt +2 -4
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_job.py +1 -1
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_job_py.py +7 -7
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_log.py +2 -2
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline.py +3 -3
- ddeutil_workflow-0.0.11/tests/test_pipeline_desc.py +11 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_if.py +7 -7
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_matrix.py +11 -11
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_on.py +4 -4
- ddeutil_workflow-0.0.11/tests/test_pipeline_params.py +12 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_run.py +4 -4
- ddeutil_workflow-0.0.11/tests/test_pipeline_run_raise.py +12 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_task.py +9 -9
- ddeutil_workflow-0.0.11/tests/test_poke.py +13 -0
- ddeutil_workflow-0.0.11/tests/test_scheduler.py +68 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_bash.py +10 -10
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_condition.py +4 -4
- ddeutil_workflow-0.0.11/tests/test_stage_hook.py +56 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_py.py +11 -11
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_trigger.py +5 -5
- ddeutil_workflow-0.0.10/README.md +0 -151
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/api.py +0 -89
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/pipeline.py +0 -1186
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/route.py +0 -92
- ddeutil_workflow-0.0.10/src/ddeutil/workflow/scheduler.py +0 -620
- ddeutil_workflow-0.0.10/tests/test_pipeline_desc.py +0 -11
- ddeutil_workflow-0.0.10/tests/test_pipeline_params.py +0 -12
- ddeutil_workflow-0.0.10/tests/test_pipeline_run_raise.py +0 -12
- ddeutil_workflow-0.0.10/tests/test_poke.py +0 -13
- ddeutil_workflow-0.0.10/tests/test_scheduler.py +0 -12
- ddeutil_workflow-0.0.10/tests/test_stage_hook.py +0 -56
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/LICENSE +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/__types.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/cron.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/on.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test__conf_exist.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test__local_and_global.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test__regex.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_conf.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_cron.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_on.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_params.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_utils.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_utils_result.py +0 -0
- {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_utils_template.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.11
|
4
4
|
Summary: Lightweight workflow orchestration with less dependencies
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -24,10 +24,9 @@ License-File: LICENSE
|
|
24
24
|
Requires-Dist: ddeutil-io
|
25
25
|
Requires-Dist: python-dotenv==1.0.1
|
26
26
|
Requires-Dist: typer<1.0.0,==0.12.5
|
27
|
-
|
28
|
-
Requires-Dist: schedule<2.0.0,==1.2.2; extra == "schedule"
|
27
|
+
Requires-Dist: schedule<2.0.0,==1.2.2
|
29
28
|
Provides-Extra: api
|
30
|
-
Requires-Dist: fastapi
|
29
|
+
Requires-Dist: fastapi<1.0.0,==0.112.2; extra == "api"
|
31
30
|
|
32
31
|
# Workflow
|
33
32
|
|
@@ -38,22 +37,22 @@ Requires-Dist: fastapi[standard]<1.0.0,==0.112.2; extra == "api"
|
|
38
37
|
[](https://github.com/psf/black)
|
39
38
|
|
40
39
|
The **Lightweight workflow orchestration** with less dependencies the was created
|
41
|
-
for easy to make a simple metadata driven for data
|
40
|
+
for easy to make a simple metadata driven for data workflow orchestration.
|
42
41
|
It can to use for data operator by a `.yaml` template.
|
43
42
|
|
44
43
|
> [!WARNING]
|
45
44
|
> This package provide only orchestration workload. That mean you should not use
|
46
45
|
> workflow stage to process any large data which use lot of compute usecase.
|
47
46
|
|
48
|
-
In my opinion, I think it should not create duplicate
|
49
|
-
write with dynamic input parameters on the one template
|
47
|
+
In my opinion, I think it should not create duplicate workflow codes if I can
|
48
|
+
write with dynamic input parameters on the one template workflow that just change
|
50
49
|
the input parameters per use-case instead.
|
51
|
-
This way I can handle a lot of logical
|
52
|
-
configuration. It called **Metadata Driven Data
|
50
|
+
This way I can handle a lot of logical workflows in our orgs with only metadata
|
51
|
+
configuration. It called **Metadata Driven Data Workflow**.
|
53
52
|
|
54
53
|
Next, we should get some monitoring tools for manage logging that return from
|
55
|
-
|
56
|
-
|
54
|
+
workflow running. Because it not show us what is a use-case that running data
|
55
|
+
workflow.
|
57
56
|
|
58
57
|
> [!NOTE]
|
59
58
|
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
@@ -75,8 +74,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
75
74
|
| Usecase | Install Optional | Support |
|
76
75
|
|-------------------|------------------------------------------|--------------------|
|
77
76
|
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
78
|
-
|
|
79
|
-
| FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
|
77
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :heavy_check_mark: |
|
80
78
|
|
81
79
|
|
82
80
|
> I added this feature to the main milestone.
|
@@ -97,67 +95,65 @@ use-case.
|
|
97
95
|
|
98
96
|
> [!IMPORTANT]
|
99
97
|
> I recommend you to use the `hook` stage for all actions that you want to do
|
100
|
-
> with
|
98
|
+
> with workflow activity that you want to orchestrate. Because it able to dynamic
|
101
99
|
> an input argument with the same hook function that make you use less time to
|
102
|
-
> maintenance your data
|
100
|
+
> maintenance your data workflows.
|
103
101
|
|
104
102
|
```yaml
|
105
103
|
run_py_local:
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
104
|
+
type: Workflow
|
105
|
+
on:
|
106
|
+
# If workflow deploy to schedule, it will running every 5 minutes
|
107
|
+
# with Asia/Bangkok timezone.
|
108
|
+
- cronjob: '*/5 * * * *'
|
109
|
+
timezone: "Asia/Bangkok"
|
110
|
+
params:
|
111
|
+
# Incoming execution parameters will validate with this type. It allow
|
112
|
+
# to set default value or templating.
|
113
|
+
author-run: str
|
114
|
+
run-date: datetime
|
115
|
+
jobs:
|
116
|
+
getting-api-data:
|
117
|
+
stages:
|
118
|
+
- name: "Retrieve API Data"
|
119
|
+
id: retrieve-api
|
120
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
121
|
+
with:
|
122
|
+
url: https://open-data/
|
123
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
124
|
+
aws_s3_path: my-data/open-data/
|
125
|
+
|
126
|
+
# This Authentication code should implement with your custom hook function.
|
127
|
+
# The template allow you to use environment variable.
|
128
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
129
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
128
130
|
```
|
129
131
|
|
130
132
|
## Configuration
|
131
133
|
|
132
|
-
| Environment | Component | Default
|
133
|
-
|
134
|
-
| `WORKFLOW_ROOT_PATH` | Core | .
|
135
|
-
| `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
136
|
-
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils
|
137
|
-
| `WORKFLOW_CORE_PATH_CONF` | Core | conf
|
138
|
-
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok
|
139
|
-
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true
|
140
|
-
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true
|
141
|
-
| `
|
142
|
-
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2
|
143
|
-
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true
|
144
|
-
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
|
152
|
-
|
153
|
-
| `
|
154
|
-
|
155
|
-
**API server**:
|
156
|
-
|
157
|
-
| Environment | Default | Description |
|
158
|
-
|--------------------------------------|---------|-----------------------------------------------------------------------------------|
|
159
|
-
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
160
|
-
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
|
134
|
+
| Environment | Component | Default | Description |
|
135
|
+
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
136
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
137
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | src.ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
138
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
139
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
140
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
141
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
142
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
143
|
+
| `WORKFLOW_CORE_MAX_NUM_POKING` | Core | 4 | |
|
144
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in workflow executor |
|
145
|
+
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
|
146
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
147
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | Schedule | 2 | The maximum process worker number that run in scheduler app module |
|
148
|
+
| `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | Schedule | 100 | A schedule per process that run parallel |
|
149
|
+
| `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | Schedule | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
|
150
|
+
|
151
|
+
**API Application**:
|
152
|
+
|
153
|
+
| Environment | Component | Default | Description |
|
154
|
+
|--------------------------------------|-----------|---------|-----------------------------------------------------------------------------------|
|
155
|
+
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
156
|
+
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
161
157
|
|
162
158
|
## Deployment
|
163
159
|
|
@@ -165,18 +161,18 @@ This package able to run as a application service for receive manual trigger
|
|
165
161
|
from the master node via RestAPI or use to be Scheduler background service
|
166
162
|
like crontab job but via Python API.
|
167
163
|
|
168
|
-
### Schedule
|
164
|
+
### Schedule App
|
169
165
|
|
170
166
|
```shell
|
171
|
-
(venv) $
|
167
|
+
(venv) $ ddeutil-workflow schedule
|
172
168
|
```
|
173
169
|
|
174
170
|
### API Server
|
175
171
|
|
176
172
|
```shell
|
177
|
-
(venv) $ uvicorn src.ddeutil.workflow.api:app --host
|
173
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 127.0.0.1 --port 80
|
178
174
|
```
|
179
175
|
|
180
176
|
> [!NOTE]
|
181
177
|
> If this package already deploy, it able to use
|
182
|
-
> `uvicorn ddeutil.workflow.api:app --host
|
178
|
+
> `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# Workflow
|
2
|
+
|
3
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
4
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
5
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
6
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
7
|
+
[](https://github.com/psf/black)
|
8
|
+
|
9
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
10
|
+
for easy to make a simple metadata driven for data workflow orchestration.
|
11
|
+
It can to use for data operator by a `.yaml` template.
|
12
|
+
|
13
|
+
> [!WARNING]
|
14
|
+
> This package provide only orchestration workload. That mean you should not use
|
15
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
16
|
+
|
17
|
+
In my opinion, I think it should not create duplicate workflow codes if I can
|
18
|
+
write with dynamic input parameters on the one template workflow that just change
|
19
|
+
the input parameters per use-case instead.
|
20
|
+
This way I can handle a lot of logical workflows in our orgs with only metadata
|
21
|
+
configuration. It called **Metadata Driven Data Workflow**.
|
22
|
+
|
23
|
+
Next, we should get some monitoring tools for manage logging that return from
|
24
|
+
workflow running. Because it not show us what is a use-case that running data
|
25
|
+
workflow.
|
26
|
+
|
27
|
+
> [!NOTE]
|
28
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
29
|
+
> and all of config file from several data orchestration framework tools from my
|
30
|
+
> experience on Data Engineer.
|
31
|
+
|
32
|
+
**Rules of This Workflow engine**:
|
33
|
+
|
34
|
+
1. Minimum unit of scheduling is 1 minute
|
35
|
+
2. Cannot re-run only failed stage and its pending downstream
|
36
|
+
3. All parallel tasks inside workflow engine use Threading
|
37
|
+
(Because Python 3.13 unlock GIL)
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
42
|
+
this package with application add-ons, you should add `app` in installation;
|
43
|
+
|
44
|
+
| Usecase | Install Optional | Support |
|
45
|
+
|-------------------|------------------------------------------|--------------------|
|
46
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
47
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :heavy_check_mark: |
|
48
|
+
|
49
|
+
|
50
|
+
> I added this feature to the main milestone.
|
51
|
+
>
|
52
|
+
> **Docker Images** supported:
|
53
|
+
>
|
54
|
+
> | Docker Image | Python Version | Support |
|
55
|
+
> |-----------------------------|----------------|---------|
|
56
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
57
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
58
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
59
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
60
|
+
|
61
|
+
## Usage
|
62
|
+
|
63
|
+
This is examples that use workflow file for running common Data Engineering
|
64
|
+
use-case.
|
65
|
+
|
66
|
+
> [!IMPORTANT]
|
67
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
68
|
+
> with workflow activity that you want to orchestrate. Because it able to dynamic
|
69
|
+
> an input argument with the same hook function that make you use less time to
|
70
|
+
> maintenance your data workflows.
|
71
|
+
|
72
|
+
```yaml
|
73
|
+
run_py_local:
|
74
|
+
type: Workflow
|
75
|
+
on:
|
76
|
+
# If workflow deploy to schedule, it will running every 5 minutes
|
77
|
+
# with Asia/Bangkok timezone.
|
78
|
+
- cronjob: '*/5 * * * *'
|
79
|
+
timezone: "Asia/Bangkok"
|
80
|
+
params:
|
81
|
+
# Incoming execution parameters will validate with this type. It allow
|
82
|
+
# to set default value or templating.
|
83
|
+
author-run: str
|
84
|
+
run-date: datetime
|
85
|
+
jobs:
|
86
|
+
getting-api-data:
|
87
|
+
stages:
|
88
|
+
- name: "Retrieve API Data"
|
89
|
+
id: retrieve-api
|
90
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
91
|
+
with:
|
92
|
+
url: https://open-data/
|
93
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
94
|
+
aws_s3_path: my-data/open-data/
|
95
|
+
|
96
|
+
# This Authentication code should implement with your custom hook function.
|
97
|
+
# The template allow you to use environment variable.
|
98
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
99
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
100
|
+
```
|
101
|
+
|
102
|
+
## Configuration
|
103
|
+
|
104
|
+
| Environment | Component | Default | Description |
|
105
|
+
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
106
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
107
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | src.ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
108
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
109
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
110
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
111
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
112
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
113
|
+
| `WORKFLOW_CORE_MAX_NUM_POKING` | Core | 4 | |
|
114
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in workflow executor |
|
115
|
+
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
|
116
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
117
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | Schedule | 2 | The maximum process worker number that run in scheduler app module |
|
118
|
+
| `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | Schedule | 100 | A schedule per process that run parallel |
|
119
|
+
| `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | Schedule | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
|
120
|
+
|
121
|
+
**API Application**:
|
122
|
+
|
123
|
+
| Environment | Component | Default | Description |
|
124
|
+
|--------------------------------------|-----------|---------|-----------------------------------------------------------------------------------|
|
125
|
+
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
126
|
+
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
127
|
+
|
128
|
+
## Deployment
|
129
|
+
|
130
|
+
This package able to run as a application service for receive manual trigger
|
131
|
+
from the master node via RestAPI or use to be Scheduler background service
|
132
|
+
like crontab job but via Python API.
|
133
|
+
|
134
|
+
### Schedule App
|
135
|
+
|
136
|
+
```shell
|
137
|
+
(venv) $ ddeutil-workflow schedule
|
138
|
+
```
|
139
|
+
|
140
|
+
### API Server
|
141
|
+
|
142
|
+
```shell
|
143
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 127.0.0.1 --port 80
|
144
|
+
```
|
145
|
+
|
146
|
+
> [!NOTE]
|
147
|
+
> If this package already deploy, it able to use
|
148
|
+
> `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
|
@@ -28,15 +28,13 @@ dependencies = [
|
|
28
28
|
"ddeutil-io",
|
29
29
|
"python-dotenv==1.0.1",
|
30
30
|
"typer==0.12.5,<1.0.0",
|
31
|
+
"schedule==1.2.2,<2.0.0",
|
31
32
|
]
|
32
33
|
dynamic = ["version"]
|
33
34
|
|
34
35
|
[project.optional-dependencies]
|
35
|
-
schedule = [
|
36
|
-
"schedule==1.2.2,<2.0.0",
|
37
|
-
]
|
38
36
|
api = [
|
39
|
-
"fastapi
|
37
|
+
"fastapi==0.112.2,<1.0.0",
|
40
38
|
]
|
41
39
|
|
42
40
|
[project.urls]
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.11"
|
@@ -6,12 +6,13 @@
|
|
6
6
|
from .exceptions import (
|
7
7
|
JobException,
|
8
8
|
ParamValueException,
|
9
|
-
PipelineException,
|
10
9
|
StageException,
|
11
10
|
UtilException,
|
11
|
+
WorkflowException,
|
12
12
|
)
|
13
|
+
from .job import Job, Strategy
|
13
14
|
from .on import On, interval2crontab
|
14
|
-
from .
|
15
|
+
from .scheduler import Workflow
|
15
16
|
from .stage import Stage, handler_result
|
16
17
|
from .utils import (
|
17
18
|
Param,
|
@@ -0,0 +1,157 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import contextlib
|
10
|
+
import os
|
11
|
+
import uuid
|
12
|
+
from collections.abc import AsyncIterator
|
13
|
+
from datetime import datetime, timedelta
|
14
|
+
from queue import Empty, Queue
|
15
|
+
from threading import Thread
|
16
|
+
from typing import TypedDict
|
17
|
+
|
18
|
+
from ddeutil.core import str2bool
|
19
|
+
from dotenv import load_dotenv
|
20
|
+
from fastapi import FastAPI
|
21
|
+
from fastapi.middleware.gzip import GZipMiddleware
|
22
|
+
from fastapi.responses import UJSONResponse
|
23
|
+
from pydantic import BaseModel
|
24
|
+
|
25
|
+
from .__about__ import __version__
|
26
|
+
from .log import get_logger
|
27
|
+
from .repeat import repeat_at, repeat_every
|
28
|
+
from .scheduler import WorkflowTask
|
29
|
+
|
30
|
+
load_dotenv()
|
31
|
+
logger = get_logger("ddeutil.workflow")
|
32
|
+
|
33
|
+
|
34
|
+
class State(TypedDict):
|
35
|
+
upper_queue: Queue
|
36
|
+
upper_result: dict[str, str]
|
37
|
+
scheduler: list[str]
|
38
|
+
workflow_threads: dict[str, Thread]
|
39
|
+
workflow_tasks: list[WorkflowTask]
|
40
|
+
workflow_queue: dict[str, list[datetime]]
|
41
|
+
workflow_running: dict[str, list[datetime]]
|
42
|
+
|
43
|
+
|
44
|
+
@contextlib.asynccontextmanager
|
45
|
+
async def lifespan(a: FastAPI) -> AsyncIterator[State]:
|
46
|
+
a.state.upper_queue = Queue()
|
47
|
+
a.state.upper_result = {}
|
48
|
+
a.state.scheduler = []
|
49
|
+
a.state.workflow_threads = {}
|
50
|
+
a.state.workflow_tasks = []
|
51
|
+
a.state.workflow_queue = {}
|
52
|
+
a.state.workflow_running = {}
|
53
|
+
|
54
|
+
await asyncio.create_task(broker_upper_messages())
|
55
|
+
|
56
|
+
yield {
|
57
|
+
"upper_queue": a.state.upper_queue,
|
58
|
+
"upper_result": a.state.upper_result,
|
59
|
+
# NOTE: Scheduler value should be contain a key of workflow workflow and
|
60
|
+
# list of datetime of queue and running.
|
61
|
+
#
|
62
|
+
# ... {
|
63
|
+
# ... '<workflow-name>': (
|
64
|
+
# ... [<running-datetime>, ...], [<queue-datetime>, ...]
|
65
|
+
# ... )
|
66
|
+
# ... }
|
67
|
+
#
|
68
|
+
"scheduler": a.state.scheduler,
|
69
|
+
"workflow_queue": a.state.workflow_queue,
|
70
|
+
"workflow_running": a.state.workflow_running,
|
71
|
+
"workflow_threads": a.state.workflow_threads,
|
72
|
+
"workflow_tasks": a.state.workflow_tasks,
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
app = FastAPI(
|
77
|
+
titile="Workflow API",
|
78
|
+
description=(
|
79
|
+
"This is workflow FastAPI web application that use to manage manual "
|
80
|
+
"execute or schedule workflow via RestAPI."
|
81
|
+
),
|
82
|
+
version=__version__,
|
83
|
+
lifespan=lifespan,
|
84
|
+
default_response_class=UJSONResponse,
|
85
|
+
)
|
86
|
+
app.add_middleware(GZipMiddleware, minimum_size=1000)
|
87
|
+
|
88
|
+
|
89
|
+
@repeat_every(seconds=10)
|
90
|
+
async def broker_upper_messages():
|
91
|
+
"""Broker for receive message from the `/upper` path and change it to upper
|
92
|
+
case. This broker use interval running in background every 10 seconds.
|
93
|
+
"""
|
94
|
+
for _ in range(10):
|
95
|
+
try:
|
96
|
+
obj = app.state.upper_queue.get_nowait()
|
97
|
+
app.state.upper_result[obj["request_id"]] = obj["text"].upper()
|
98
|
+
logger.info(f"Upper message: {app.state.upper_result}")
|
99
|
+
except Empty:
|
100
|
+
pass
|
101
|
+
await asyncio.sleep(0.0001)
|
102
|
+
|
103
|
+
|
104
|
+
class Payload(BaseModel):
|
105
|
+
text: str
|
106
|
+
|
107
|
+
|
108
|
+
async def get_result(request_id: str) -> dict[str, str]:
|
109
|
+
"""Get data from output dict that global."""
|
110
|
+
while True:
|
111
|
+
if request_id in app.state.upper_result:
|
112
|
+
result: str = app.state.upper_result[request_id]
|
113
|
+
del app.state.upper_result[request_id]
|
114
|
+
return {"message": result}
|
115
|
+
await asyncio.sleep(0.0025)
|
116
|
+
|
117
|
+
|
118
|
+
@app.get("/")
|
119
|
+
@app.get("/api")
|
120
|
+
async def health():
|
121
|
+
return {"message": "Workflow API already start up"}
|
122
|
+
|
123
|
+
|
124
|
+
@app.post("/api")
|
125
|
+
async def message_upper(payload: Payload):
|
126
|
+
"""Convert message from any case to the upper case."""
|
127
|
+
request_id: str = str(uuid.uuid4())
|
128
|
+
app.state.upper_queue.put(
|
129
|
+
{"text": payload.text, "request_id": request_id},
|
130
|
+
)
|
131
|
+
return await get_result(request_id)
|
132
|
+
|
133
|
+
|
134
|
+
if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")):
|
135
|
+
from .route import workflow
|
136
|
+
|
137
|
+
app.include_router(workflow)
|
138
|
+
|
139
|
+
if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")):
|
140
|
+
from .route import schedule
|
141
|
+
from .scheduler import workflow_task
|
142
|
+
|
143
|
+
app.include_router(schedule)
|
144
|
+
|
145
|
+
@schedule.on_event("startup")
|
146
|
+
@repeat_at(cron="* * * * *", delay=2)
|
147
|
+
def schedule_broker_up():
|
148
|
+
logger.debug(
|
149
|
+
f"[SCHEDULER]: Start listening schedule from queue "
|
150
|
+
f"{app.state.scheduler}"
|
151
|
+
)
|
152
|
+
if app.state.workflow_tasks:
|
153
|
+
workflow_task(
|
154
|
+
app.state.workflow_tasks,
|
155
|
+
stop=datetime.now() + timedelta(minutes=1),
|
156
|
+
threads=app.state.workflow_threads,
|
157
|
+
)
|
@@ -29,21 +29,21 @@ cli.add_typer(
|
|
29
29
|
|
30
30
|
@cli.command()
|
31
31
|
def run(
|
32
|
-
|
32
|
+
workflow: Annotated[
|
33
33
|
str,
|
34
|
-
Argument(help="A
|
34
|
+
Argument(help="A workflow name that want to run manually"),
|
35
35
|
],
|
36
36
|
params: Annotated[
|
37
37
|
str,
|
38
38
|
Argument(
|
39
|
-
help="A json string for parameters of this
|
39
|
+
help="A json string for parameters of this workflow execution."
|
40
40
|
),
|
41
41
|
],
|
42
42
|
):
|
43
|
-
"""Run
|
44
|
-
to receive with
|
43
|
+
"""Run workflow workflow manually with an input custom parameters that able
|
44
|
+
to receive with workflow params config.
|
45
45
|
"""
|
46
|
-
logger.info(f"Running
|
46
|
+
logger.info(f"Running workflow name: {workflow}")
|
47
47
|
logger.info(f"... with Parameters: {json.dumps(json.loads(params))}")
|
48
48
|
|
49
49
|
|
@@ -63,7 +63,7 @@ def schedule(
|
|
63
63
|
externals: Annotated[
|
64
64
|
Optional[str],
|
65
65
|
Argument(
|
66
|
-
help="A json string for parameters of this
|
66
|
+
help="A json string for parameters of this workflow execution."
|
67
67
|
),
|
68
68
|
] = None,
|
69
69
|
):
|
@@ -77,20 +77,20 @@ def schedule(
|
|
77
77
|
tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
78
78
|
)
|
79
79
|
|
80
|
-
from .scheduler import
|
80
|
+
from .scheduler import workflow_runner
|
81
81
|
|
82
82
|
# NOTE: Start running workflow scheduler application.
|
83
|
-
workflow_rs: list[str] =
|
83
|
+
workflow_rs: list[str] = workflow_runner(
|
84
84
|
stop=stop, excluded=excluded, externals=json.loads(externals)
|
85
85
|
)
|
86
86
|
logger.info(f"Application run success: {workflow_rs}")
|
87
87
|
|
88
88
|
|
89
|
-
@cli_log.command("
|
90
|
-
def
|
89
|
+
@cli_log.command("workflow-get")
|
90
|
+
def workflow_log_get(
|
91
91
|
name: Annotated[
|
92
92
|
str,
|
93
|
-
Argument(help="A
|
93
|
+
Argument(help="A workflow name that want to getting log"),
|
94
94
|
],
|
95
95
|
limit: Annotated[
|
96
96
|
int,
|
@@ -113,8 +113,8 @@ class LogMode(str, Enum):
|
|
113
113
|
delete = "delete"
|
114
114
|
|
115
115
|
|
116
|
-
@cli_log.command("
|
117
|
-
def
|
116
|
+
@cli_log.command("workflow-delete")
|
117
|
+
def workflow_log_delete(
|
118
118
|
mode: Annotated[
|
119
119
|
LogMode,
|
120
120
|
Argument(case_sensitive=True),
|
@@ -6,22 +6,22 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
|
9
|
-
class
|
9
|
+
class BaseWorkflowException(Exception): ...
|
10
10
|
|
11
11
|
|
12
|
-
class UtilException(
|
12
|
+
class UtilException(BaseWorkflowException): ...
|
13
13
|
|
14
14
|
|
15
|
-
class StageException(
|
15
|
+
class StageException(BaseWorkflowException): ...
|
16
16
|
|
17
17
|
|
18
|
-
class JobException(
|
18
|
+
class JobException(BaseWorkflowException): ...
|
19
19
|
|
20
20
|
|
21
|
-
class
|
21
|
+
class WorkflowException(BaseWorkflowException): ...
|
22
22
|
|
23
23
|
|
24
|
-
class
|
24
|
+
class WorkflowFailException(WorkflowException): ...
|
25
25
|
|
26
26
|
|
27
27
|
class ParamValueException(WorkflowException): ...
|