ddeutil-workflow 0.0.12__tar.gz → 0.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddeutil_workflow-0.0.12/src/ddeutil_workflow.egg-info → ddeutil_workflow-0.0.14}/PKG-INFO +80 -32
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/README.md +75 -29
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/pyproject.toml +7 -3
- ddeutil_workflow-0.0.14/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/__init__.py +4 -1
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/__types.py +24 -8
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/api.py +2 -2
- ddeutil_workflow-0.0.14/src/ddeutil/workflow/conf.py +41 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/cron.py +19 -12
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/job.py +251 -184
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/log.py +28 -14
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/on.py +5 -2
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/scheduler.py +262 -140
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/stage.py +105 -39
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/utils.py +106 -40
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14/src/ddeutil_workflow.egg-info}/PKG-INFO +80 -32
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil_workflow.egg-info/SOURCES.txt +5 -4
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil_workflow.egg-info/requires.txt +3 -2
- ddeutil_workflow-0.0.14/tests/test__conf_exist.py +12 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test__regex.py +24 -5
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_job_py.py +16 -2
- ddeutil_workflow-0.0.14/tests/test_job_strategy.py +68 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_on.py +2 -2
- ddeutil_workflow-0.0.14/tests/test_pipeline_run.py +127 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_scheduler.py +19 -12
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_stage.py +13 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_stage_py.py +19 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_utils.py +1 -1
- ddeutil_workflow-0.0.14/tests/test_utils_result.py +82 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_utils_template.py +15 -7
- ddeutil_workflow-0.0.12/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.12/tests/test__conf_exist.py +0 -11
- ddeutil_workflow-0.0.12/tests/test__local_and_global.py +0 -158
- ddeutil_workflow-0.0.12/tests/test_pipeline_run.py +0 -22
- ddeutil_workflow-0.0.12/tests/test_utils_result.py +0 -32
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/LICENSE +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/cli.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/exceptions.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/repeat.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil/workflow/route.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil_workflow.egg-info/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_conf.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_cron.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_job.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_log.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_params.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_pipeline_if.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_pipeline_matrix.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_pipeline_on.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_pipeline_params.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_pipeline_run_raise.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_pipeline_task.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_poke.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_stage_bash.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_stage_condition.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_stage_hook.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.14}/tests/test_stage_trigger.py +0 -0
- /ddeutil_workflow-0.0.12/tests/test_pipeline.py → /ddeutil_workflow-0.0.14/tests/test_workflow.py +0 -0
- /ddeutil_workflow-0.0.12/tests/test_pipeline_desc.py → /ddeutil_workflow-0.0.14/tests/test_workflow_desc.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.14
|
4
4
|
Summary: Lightweight workflow orchestration with less dependencies
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -18,19 +18,22 @@ Classifier: Programming Language :: Python :: 3.9
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
21
22
|
Requires-Python: >=3.9.13
|
22
23
|
Description-Content-Type: text/markdown
|
23
24
|
License-File: LICENSE
|
24
|
-
Requires-Dist: ddeutil
|
25
|
+
Requires-Dist: ddeutil>=0.4.0
|
26
|
+
Requires-Dist: ddeutil-io>=0.1.13
|
25
27
|
Requires-Dist: python-dotenv==1.0.1
|
26
28
|
Requires-Dist: typer<1.0.0,==0.12.5
|
27
29
|
Requires-Dist: schedule<2.0.0,==1.2.2
|
28
30
|
Provides-Extra: api
|
29
|
-
Requires-Dist: fastapi<1.0.0
|
31
|
+
Requires-Dist: fastapi<1.0.0,>=0.115.0; extra == "api"
|
30
32
|
|
31
33
|
# Workflow
|
32
34
|
|
33
35
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
36
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
34
37
|
[](https://pypi.org/project/ddeutil-workflow/)
|
35
38
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
36
39
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
@@ -41,8 +44,9 @@ for easy to make a simple metadata driven for data workflow orchestration.
|
|
41
44
|
It can to use for data operator by a `.yaml` template.
|
42
45
|
|
43
46
|
> [!WARNING]
|
44
|
-
> This package provide only orchestration workload. That mean you should not
|
45
|
-
> workflow stage to process any large data which use lot of compute
|
47
|
+
> This package provide only orchestration workload task. That mean you should not
|
48
|
+
> use the workflow stage to process any large volume data which use lot of compute
|
49
|
+
> resource. :cold_sweat:
|
46
50
|
|
47
51
|
In my opinion, I think it should not create duplicate workflow codes if I can
|
48
52
|
write with dynamic input parameters on the one template workflow that just change
|
@@ -50,23 +54,25 @@ the input parameters per use-case instead.
|
|
50
54
|
This way I can handle a lot of logical workflows in our orgs with only metadata
|
51
55
|
configuration. It called **Metadata Driven Data Workflow**.
|
52
56
|
|
53
|
-
|
54
|
-
workflow running. Because it not show us what is a use-case that running data
|
55
|
-
workflow.
|
57
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
**Rules of This Workflow engine**:
|
59
|
+
1. Minimum frequency unit of scheduling is **1 minute** :warning:
|
60
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
61
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
62
|
+
(Because Python 3.13 unlock GIL :unlock:)
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
> [!NOTE]
|
65
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
66
|
+
> with `.yml` files and all of config file from several data orchestration framework
|
67
|
+
> tools from my experience on Data Engineer. :grimacing:
|
68
|
+
>
|
69
|
+
> Other workflow that I interest on them and pick some interested feature to this
|
70
|
+
> package:
|
71
|
+
>
|
72
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
73
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
68
74
|
|
69
|
-
## Installation
|
75
|
+
## :round_pushpin: Installation
|
70
76
|
|
71
77
|
This project need `ddeutil-io` extension namespace packages. If you want to install
|
72
78
|
this package with application add-ons, you should add `app` in installation;
|
@@ -79,7 +85,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
79
85
|
|
80
86
|
> I added this feature to the main milestone.
|
81
87
|
>
|
82
|
-
> **Docker Images** supported:
|
88
|
+
> :egg: **Docker Images** supported:
|
83
89
|
>
|
84
90
|
> | Docker Image | Python Version | Support |
|
85
91
|
> |-----------------------------|----------------|---------|
|
@@ -87,8 +93,9 @@ this package with application add-ons, you should add `app` in installation;
|
|
87
93
|
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
88
94
|
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
89
95
|
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
96
|
+
> | ddeutil-workflow:python3.12 | `3.13` | :x: |
|
90
97
|
|
91
|
-
## Usage
|
98
|
+
## :beers: Usage
|
92
99
|
|
93
100
|
This is examples that use workflow file for running common Data Engineering
|
94
101
|
use-case.
|
@@ -100,8 +107,10 @@ use-case.
|
|
100
107
|
> maintenance your data workflows.
|
101
108
|
|
102
109
|
```yaml
|
103
|
-
|
104
|
-
|
110
|
+
run-py-local:
|
111
|
+
|
112
|
+
# Validate model that use to parsing exists for template file
|
113
|
+
type: ddeutil.workflow.Workflow
|
105
114
|
on:
|
106
115
|
# If workflow deploy to schedule, it will running every 5 minutes
|
107
116
|
# with Asia/Bangkok timezone.
|
@@ -110,7 +119,7 @@ run_py_local:
|
|
110
119
|
params:
|
111
120
|
# Incoming execution parameters will validate with this type. It allow
|
112
121
|
# to set default value or templating.
|
113
|
-
|
122
|
+
source-extract: str
|
114
123
|
run-date: datetime
|
115
124
|
jobs:
|
116
125
|
getting-api-data:
|
@@ -119,17 +128,56 @@ run_py_local:
|
|
119
128
|
id: retrieve-api
|
120
129
|
uses: tasks/get-api-with-oauth-to-s3@requests
|
121
130
|
with:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
131
|
+
# Arguments of source data that want to retrieve.
|
132
|
+
method: post
|
133
|
+
url: https://finances/open-data/currency-pairs/
|
134
|
+
body:
|
135
|
+
resource: ${{ params.source-extract }}
|
136
|
+
|
137
|
+
# You can able to use filtering like Jinja template but this
|
138
|
+
# package does not use it.
|
139
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
140
|
+
auth:
|
141
|
+
type: bearer
|
142
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
143
|
+
|
144
|
+
# Arguments of target data that want to landing.
|
145
|
+
writing_mode: flatten
|
146
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
147
|
+
|
148
|
+
# This Authentication code should implement with your custom hook
|
149
|
+
# function. The template allow you to use environment variable.
|
128
150
|
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
129
151
|
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
130
152
|
```
|
131
153
|
|
132
|
-
|
154
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
155
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
156
|
+
execution time such as `run-date` should change base on that workflow running date.
|
157
|
+
|
158
|
+
So, this package provide the `Schedule` template for this action.
|
159
|
+
|
160
|
+
```yaml
|
161
|
+
schedule-run-local-wf:
|
162
|
+
|
163
|
+
# Validate model that use to parsing exists for template file
|
164
|
+
type: ddeutil.workflow.scheduler.Schedule
|
165
|
+
workflows:
|
166
|
+
|
167
|
+
# Map existing workflow that want to deploy with scheduler application.
|
168
|
+
# It allow you to passing release parameter that dynamic change depend the
|
169
|
+
# current context of this scheduler application releasing that time.
|
170
|
+
- name: run-py-local
|
171
|
+
params:
|
172
|
+
source-extract: "USD-THB"
|
173
|
+
asat-dt: "${{ release.logical_date }}"
|
174
|
+
```
|
175
|
+
|
176
|
+
## :cookie: Configuration
|
177
|
+
|
178
|
+
The main configuration that use to dynamic changing with your propose of this
|
179
|
+
application. If any configuration values do not set yet, it will use default value
|
180
|
+
and do not raise any error to you.
|
133
181
|
|
134
182
|
| Environment | Component | Default | Description |
|
135
183
|
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
@@ -155,7 +203,7 @@ run_py_local:
|
|
155
203
|
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
156
204
|
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
157
205
|
|
158
|
-
## Deployment
|
206
|
+
## :rocket: Deployment
|
159
207
|
|
160
208
|
This package able to run as a application service for receive manual trigger
|
161
209
|
from the master node via RestAPI or use to be Scheduler background service
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Workflow
|
2
2
|
|
3
3
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
4
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
4
5
|
[](https://pypi.org/project/ddeutil-workflow/)
|
5
6
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
6
7
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
@@ -11,8 +12,9 @@ for easy to make a simple metadata driven for data workflow orchestration.
|
|
11
12
|
It can to use for data operator by a `.yaml` template.
|
12
13
|
|
13
14
|
> [!WARNING]
|
14
|
-
> This package provide only orchestration workload. That mean you should not
|
15
|
-
> workflow stage to process any large data which use lot of compute
|
15
|
+
> This package provide only orchestration workload task. That mean you should not
|
16
|
+
> use the workflow stage to process any large volume data which use lot of compute
|
17
|
+
> resource. :cold_sweat:
|
16
18
|
|
17
19
|
In my opinion, I think it should not create duplicate workflow codes if I can
|
18
20
|
write with dynamic input parameters on the one template workflow that just change
|
@@ -20,23 +22,25 @@ the input parameters per use-case instead.
|
|
20
22
|
This way I can handle a lot of logical workflows in our orgs with only metadata
|
21
23
|
configuration. It called **Metadata Driven Data Workflow**.
|
22
24
|
|
23
|
-
|
24
|
-
workflow running. Because it not show us what is a use-case that running data
|
25
|
-
workflow.
|
25
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
**Rules of This Workflow engine**:
|
27
|
+
1. Minimum frequency unit of scheduling is **1 minute** :warning:
|
28
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
29
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
30
|
+
(Because Python 3.13 unlock GIL :unlock:)
|
33
31
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
> [!NOTE]
|
33
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
34
|
+
> with `.yml` files and all of config file from several data orchestration framework
|
35
|
+
> tools from my experience on Data Engineer. :grimacing:
|
36
|
+
>
|
37
|
+
> Other workflow that I interest on them and pick some interested feature to this
|
38
|
+
> package:
|
39
|
+
>
|
40
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
41
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
38
42
|
|
39
|
-
## Installation
|
43
|
+
## :round_pushpin: Installation
|
40
44
|
|
41
45
|
This project need `ddeutil-io` extension namespace packages. If you want to install
|
42
46
|
this package with application add-ons, you should add `app` in installation;
|
@@ -49,7 +53,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
49
53
|
|
50
54
|
> I added this feature to the main milestone.
|
51
55
|
>
|
52
|
-
> **Docker Images** supported:
|
56
|
+
> :egg: **Docker Images** supported:
|
53
57
|
>
|
54
58
|
> | Docker Image | Python Version | Support |
|
55
59
|
> |-----------------------------|----------------|---------|
|
@@ -57,8 +61,9 @@ this package with application add-ons, you should add `app` in installation;
|
|
57
61
|
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
58
62
|
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
59
63
|
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
64
|
+
> | ddeutil-workflow:python3.12 | `3.13` | :x: |
|
60
65
|
|
61
|
-
## Usage
|
66
|
+
## :beers: Usage
|
62
67
|
|
63
68
|
This is examples that use workflow file for running common Data Engineering
|
64
69
|
use-case.
|
@@ -70,8 +75,10 @@ use-case.
|
|
70
75
|
> maintenance your data workflows.
|
71
76
|
|
72
77
|
```yaml
|
73
|
-
|
74
|
-
|
78
|
+
run-py-local:
|
79
|
+
|
80
|
+
# Validate model that use to parsing exists for template file
|
81
|
+
type: ddeutil.workflow.Workflow
|
75
82
|
on:
|
76
83
|
# If workflow deploy to schedule, it will running every 5 minutes
|
77
84
|
# with Asia/Bangkok timezone.
|
@@ -80,7 +87,7 @@ run_py_local:
|
|
80
87
|
params:
|
81
88
|
# Incoming execution parameters will validate with this type. It allow
|
82
89
|
# to set default value or templating.
|
83
|
-
|
90
|
+
source-extract: str
|
84
91
|
run-date: datetime
|
85
92
|
jobs:
|
86
93
|
getting-api-data:
|
@@ -89,17 +96,56 @@ run_py_local:
|
|
89
96
|
id: retrieve-api
|
90
97
|
uses: tasks/get-api-with-oauth-to-s3@requests
|
91
98
|
with:
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
99
|
+
# Arguments of source data that want to retrieve.
|
100
|
+
method: post
|
101
|
+
url: https://finances/open-data/currency-pairs/
|
102
|
+
body:
|
103
|
+
resource: ${{ params.source-extract }}
|
104
|
+
|
105
|
+
# You can able to use filtering like Jinja template but this
|
106
|
+
# package does not use it.
|
107
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
108
|
+
auth:
|
109
|
+
type: bearer
|
110
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
111
|
+
|
112
|
+
# Arguments of target data that want to landing.
|
113
|
+
writing_mode: flatten
|
114
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
115
|
+
|
116
|
+
# This Authentication code should implement with your custom hook
|
117
|
+
# function. The template allow you to use environment variable.
|
98
118
|
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
99
119
|
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
100
120
|
```
|
101
121
|
|
102
|
-
|
122
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
123
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
124
|
+
execution time such as `run-date` should change base on that workflow running date.
|
125
|
+
|
126
|
+
So, this package provide the `Schedule` template for this action.
|
127
|
+
|
128
|
+
```yaml
|
129
|
+
schedule-run-local-wf:
|
130
|
+
|
131
|
+
# Validate model that use to parsing exists for template file
|
132
|
+
type: ddeutil.workflow.scheduler.Schedule
|
133
|
+
workflows:
|
134
|
+
|
135
|
+
# Map existing workflow that want to deploy with scheduler application.
|
136
|
+
# It allow you to passing release parameter that dynamic change depend the
|
137
|
+
# current context of this scheduler application releasing that time.
|
138
|
+
- name: run-py-local
|
139
|
+
params:
|
140
|
+
source-extract: "USD-THB"
|
141
|
+
asat-dt: "${{ release.logical_date }}"
|
142
|
+
```
|
143
|
+
|
144
|
+
## :cookie: Configuration
|
145
|
+
|
146
|
+
The main configuration that use to dynamic changing with your propose of this
|
147
|
+
application. If any configuration values do not set yet, it will use default value
|
148
|
+
and do not raise any error to you.
|
103
149
|
|
104
150
|
| Environment | Component | Default | Description |
|
105
151
|
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
@@ -125,7 +171,7 @@ run_py_local:
|
|
125
171
|
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
126
172
|
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
127
173
|
|
128
|
-
## Deployment
|
174
|
+
## :rocket: Deployment
|
129
175
|
|
130
176
|
This package able to run as a application service for receive manual trigger
|
131
177
|
from the master node via RestAPI or use to be Scheduler background service
|
@@ -22,10 +22,12 @@ classifiers = [
|
|
22
22
|
"Programming Language :: Python :: 3.10",
|
23
23
|
"Programming Language :: Python :: 3.11",
|
24
24
|
"Programming Language :: Python :: 3.12",
|
25
|
+
"Programming Language :: Python :: 3.13",
|
25
26
|
]
|
26
27
|
requires-python = ">=3.9.13"
|
27
28
|
dependencies = [
|
28
|
-
"ddeutil
|
29
|
+
"ddeutil>=0.4.0",
|
30
|
+
"ddeutil-io>=0.1.13",
|
29
31
|
"python-dotenv==1.0.1",
|
30
32
|
"typer==0.12.5,<1.0.0",
|
31
33
|
"schedule==1.2.2,<2.0.0",
|
@@ -34,7 +36,7 @@ dynamic = ["version"]
|
|
34
36
|
|
35
37
|
[project.optional-dependencies]
|
36
38
|
api = [
|
37
|
-
"fastapi
|
39
|
+
"fastapi>=0.115.0,<1.0.0",
|
38
40
|
]
|
39
41
|
|
40
42
|
[project.urls]
|
@@ -60,8 +62,10 @@ relative_files = true
|
|
60
62
|
concurrency = ["thread", "multiprocessing"]
|
61
63
|
source = ["ddeutil.workflow", "tests"]
|
62
64
|
omit = [
|
63
|
-
"
|
65
|
+
"src/ddeutil/workflow/__about__.py",
|
64
66
|
# Omit this files because it does not ready to production.
|
67
|
+
"src/ddeutil/workflow/api.py",
|
68
|
+
"src/ddeutil/workflow/cli.py",
|
65
69
|
"src/ddeutil/workflow/repeat.py",
|
66
70
|
"src/ddeutil/workflow/route.py",
|
67
71
|
"tests/utils.py",
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.14"
|
@@ -12,7 +12,10 @@ from .exceptions import (
|
|
12
12
|
)
|
13
13
|
from .job import Job, Strategy
|
14
14
|
from .on import On, interval2crontab
|
15
|
-
from .scheduler import
|
15
|
+
from .scheduler import (
|
16
|
+
Schedule,
|
17
|
+
Workflow,
|
18
|
+
)
|
16
19
|
from .stage import Stage, handler_result
|
17
20
|
from .utils import (
|
18
21
|
Param,
|
@@ -24,20 +24,32 @@ MatrixExclude = list[dict[str, Union[str, int]]]
|
|
24
24
|
|
25
25
|
|
26
26
|
class Re:
|
27
|
-
"""Regular expression config."""
|
27
|
+
"""Regular expression config for this package."""
|
28
28
|
|
29
|
-
# NOTE:
|
30
|
-
#
|
29
|
+
# NOTE:
|
30
|
+
# Regular expression:
|
31
|
+
# - Version 1:
|
32
|
+
# \${{\s*(?P<caller>[a-zA-Z0-9_.\s'\"\[\]\(\)\-\{}]+?)\s*(?P<post_filters>(?:\|\s*(?:[a-zA-Z0-9_]{3,}[a-zA-Z0-9_.,-\\%\s'\"[\]()\{}]+)\s*)*)}}
|
33
|
+
# - Version 2 (2024-09-30):
|
34
|
+
# \${{\s*(?P<caller>(?P<caller_prefix>[a-zA-Z_-]+\.)*(?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+))\s*(?P<post_filters>(?:\|\s*(?:[a-zA-Z0-9_]{3,}[a-zA-Z0-9_.,-\\%\s'\"[\]()\{}]+)\s*)*)}}
|
35
|
+
#
|
36
|
+
# Examples:
|
37
|
+
# - ${{ params.asat_dt }}
|
38
|
+
# - ${{ params.source.table }}
|
39
|
+
#
|
31
40
|
__re_caller: str = r"""
|
32
41
|
\$
|
33
42
|
{{
|
34
43
|
\s*
|
35
44
|
(?P<caller>
|
36
|
-
[a-zA-
|
37
|
-
|
45
|
+
(?P<caller_prefix>[a-zA-Z_-]+\.)*
|
46
|
+
(?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+)
|
47
|
+
)
|
48
|
+
\s*
|
38
49
|
(?P<post_filters>
|
39
50
|
(?:
|
40
|
-
|
51
|
+
\|
|
52
|
+
\s*
|
41
53
|
(?:[a-zA-Z0-9_]{3,}[a-zA-Z0-9_.,-\\%\s'\"[\]()\{}]*)
|
42
54
|
\s*
|
43
55
|
)*
|
@@ -48,8 +60,12 @@ class Re:
|
|
48
60
|
__re_caller, MULTILINE | IGNORECASE | UNICODE | VERBOSE
|
49
61
|
)
|
50
62
|
|
51
|
-
# NOTE:
|
52
|
-
#
|
63
|
+
# NOTE:
|
64
|
+
# Regular expression:
|
65
|
+
# ^(?P<path>[^/@]+)/(?P<func>[^@]+)@(?P<tag>.+)$
|
66
|
+
#
|
67
|
+
# Examples:
|
68
|
+
# - tasks/function@dummy
|
53
69
|
__re_task_fmt: str = r"""
|
54
70
|
^
|
55
71
|
(?P<path>[^/@]+)
|
@@ -25,7 +25,7 @@ from pydantic import BaseModel
|
|
25
25
|
from .__about__ import __version__
|
26
26
|
from .log import get_logger
|
27
27
|
from .repeat import repeat_at, repeat_every
|
28
|
-
from .scheduler import
|
28
|
+
from .scheduler import WorkflowTaskData
|
29
29
|
|
30
30
|
load_dotenv()
|
31
31
|
logger = get_logger("ddeutil.workflow")
|
@@ -36,7 +36,7 @@ class State(TypedDict):
|
|
36
36
|
upper_result: dict[str, str]
|
37
37
|
scheduler: list[str]
|
38
38
|
workflow_threads: dict[str, Thread]
|
39
|
-
workflow_tasks: list[
|
39
|
+
workflow_tasks: list[WorkflowTaskData]
|
40
40
|
workflow_queue: dict[str, list[datetime]]
|
41
41
|
workflow_running: dict[str, list[datetime]]
|
42
42
|
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import os
|
9
|
+
from zoneinfo import ZoneInfo
|
10
|
+
|
11
|
+
from ddeutil.core import str2bool
|
12
|
+
from dotenv import load_dotenv
|
13
|
+
|
14
|
+
load_dotenv()
|
15
|
+
env = os.getenv
|
16
|
+
|
17
|
+
|
18
|
+
class Config:
|
19
|
+
# NOTE: Core
|
20
|
+
tz: ZoneInfo = ZoneInfo(env("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
21
|
+
|
22
|
+
# NOTE: Stage
|
23
|
+
stage_raise_error: bool = str2bool(
|
24
|
+
env("WORKFLOW_CORE_STAGE_RAISE_ERROR", "true")
|
25
|
+
)
|
26
|
+
stage_default_id: bool = str2bool(
|
27
|
+
env("WORKFLOW_CORE_STAGE_DEFAULT_ID", "false")
|
28
|
+
)
|
29
|
+
|
30
|
+
# NOTE: Workflow
|
31
|
+
max_job_parallel: int = int(env("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
|
32
|
+
|
33
|
+
def __init__(self):
|
34
|
+
if self.max_job_parallel < 0:
|
35
|
+
raise ValueError(
|
36
|
+
f"MAX_JOB_PARALLEL should more than 0 but got "
|
37
|
+
f"{self.max_job_parallel}."
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
config = Config()
|
@@ -14,7 +14,7 @@ from typing import ClassVar, Optional, Union
|
|
14
14
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
15
15
|
|
16
16
|
from ddeutil.core import (
|
17
|
-
|
17
|
+
checker,
|
18
18
|
isinstance_check,
|
19
19
|
must_split,
|
20
20
|
)
|
@@ -38,16 +38,21 @@ class CronYearLimit(Exception): ...
|
|
38
38
|
|
39
39
|
|
40
40
|
def str2cron(value: str) -> str:
|
41
|
-
"""Convert Special String to Crontab.
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
41
|
+
"""Convert Special String with the @ prefix to Crontab value.
|
42
|
+
|
43
|
+
:param value: A string value that want to convert to cron value.
|
44
|
+
:rtype: str
|
45
|
+
|
46
|
+
Table:
|
47
|
+
|
48
|
+
@reboot Run once, at system startup
|
49
|
+
@yearly Run once every year, "0 0 1 1 *"
|
50
|
+
@annually (same as @yearly)
|
51
|
+
@monthly Run once every month, "0 0 1 * *"
|
52
|
+
@weekly Run once every week, "0 0 * * 0"
|
53
|
+
@daily Run once each day, "0 0 * * *"
|
54
|
+
@midnight (same as @daily)
|
55
|
+
@hourly Run once an hour, "0 * * * *"
|
51
56
|
"""
|
52
57
|
mapping_spacial_str = {
|
53
58
|
"@reboot": "",
|
@@ -321,7 +326,9 @@ class CronPart:
|
|
321
326
|
self._parse_range(value_range)
|
322
327
|
)
|
323
328
|
|
324
|
-
if (
|
329
|
+
if (
|
330
|
+
value_step and not checker.is_int(value_step)
|
331
|
+
) or value_step == "":
|
325
332
|
raise ValueError(
|
326
333
|
f"Invalid interval step value {value_step!r} for "
|
327
334
|
f"{self.unit.name!r}"
|