ddeutil-workflow 0.0.12__tar.gz → 0.0.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddeutil_workflow-0.0.12/src/ddeutil_workflow.egg-info → ddeutil_workflow-0.0.13}/PKG-INFO +78 -32
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/README.md +74 -29
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/pyproject.toml +6 -2
- ddeutil_workflow-0.0.13/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/job.py +73 -42
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/on.py +5 -2
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/scheduler.py +30 -29
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/stage.py +43 -10
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13/src/ddeutil_workflow.egg-info}/PKG-INFO +78 -32
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil_workflow.egg-info/SOURCES.txt +0 -1
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil_workflow.egg-info/requires.txt +2 -2
- ddeutil_workflow-0.0.13/tests/test__conf_exist.py +12 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_job_py.py +17 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_on.py +2 -2
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_scheduler.py +5 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_stage.py +13 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_stage_py.py +21 -0
- ddeutil_workflow-0.0.12/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.12/tests/test__conf_exist.py +0 -11
- ddeutil_workflow-0.0.12/tests/test__local_and_global.py +0 -158
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/LICENSE +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/__init__.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/__types.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/api.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/cli.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/cron.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/exceptions.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/log.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/repeat.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/route.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil/workflow/utils.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil_workflow.egg-info/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test__regex.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_conf.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_cron.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_job.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_log.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_params.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_desc.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_if.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_matrix.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_on.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_params.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_run.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_run_raise.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_pipeline_task.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_poke.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_stage_bash.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_stage_condition.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_stage_hook.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_stage_trigger.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_utils.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_utils_result.py +0 -0
- {ddeutil_workflow-0.0.12 → ddeutil_workflow-0.0.13}/tests/test_utils_template.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.13
|
4
4
|
Summary: Lightweight workflow orchestration with less dependencies
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -18,19 +18,21 @@ Classifier: Programming Language :: Python :: 3.9
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
21
22
|
Requires-Python: >=3.9.13
|
22
23
|
Description-Content-Type: text/markdown
|
23
24
|
License-File: LICENSE
|
24
|
-
Requires-Dist: ddeutil-io
|
25
|
+
Requires-Dist: ddeutil-io>=0.1.12
|
25
26
|
Requires-Dist: python-dotenv==1.0.1
|
26
27
|
Requires-Dist: typer<1.0.0,==0.12.5
|
27
28
|
Requires-Dist: schedule<2.0.0,==1.2.2
|
28
29
|
Provides-Extra: api
|
29
|
-
Requires-Dist: fastapi<1.0.0
|
30
|
+
Requires-Dist: fastapi<1.0.0,>=0.114.1; extra == "api"
|
30
31
|
|
31
32
|
# Workflow
|
32
33
|
|
33
34
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
35
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
34
36
|
[](https://pypi.org/project/ddeutil-workflow/)
|
35
37
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
36
38
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
@@ -41,8 +43,9 @@ for easy to make a simple metadata driven for data workflow orchestration.
|
|
41
43
|
It can to use for data operator by a `.yaml` template.
|
42
44
|
|
43
45
|
> [!WARNING]
|
44
|
-
> This package provide only orchestration workload. That mean you should not
|
45
|
-
> workflow stage to process any large data which use lot of compute
|
46
|
+
> This package provide only orchestration workload task. That mean you should not
|
47
|
+
> use the workflow stage to process any large volume data which use lot of compute
|
48
|
+
> resource. :cold_sweat:
|
46
49
|
|
47
50
|
In my opinion, I think it should not create duplicate workflow codes if I can
|
48
51
|
write with dynamic input parameters on the one template workflow that just change
|
@@ -50,23 +53,25 @@ the input parameters per use-case instead.
|
|
50
53
|
This way I can handle a lot of logical workflows in our orgs with only metadata
|
51
54
|
configuration. It called **Metadata Driven Data Workflow**.
|
52
55
|
|
53
|
-
|
54
|
-
workflow running. Because it not show us what is a use-case that running data
|
55
|
-
workflow.
|
56
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
56
57
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
**Rules of This Workflow engine**:
|
58
|
+
1. Minimum frequency unit of scheduling is **1 minute** :warning:
|
59
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
60
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
61
|
+
(Because Python 3.13 unlock GIL :unlock:)
|
63
62
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
63
|
+
> [!NOTE]
|
64
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
65
|
+
> `.yml` files and all of config file from several data orchestration framework
|
66
|
+
> tools from my experience on Data Engineer. :grimacing:
|
67
|
+
>
|
68
|
+
> Other workflow that I interest on them and pick some interested feature to this
|
69
|
+
> package:
|
70
|
+
>
|
71
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
72
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
68
73
|
|
69
|
-
## Installation
|
74
|
+
## :round_pushpin: Installation
|
70
75
|
|
71
76
|
This project need `ddeutil-io` extension namespace packages. If you want to install
|
72
77
|
this package with application add-ons, you should add `app` in installation;
|
@@ -79,7 +84,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
79
84
|
|
80
85
|
> I added this feature to the main milestone.
|
81
86
|
>
|
82
|
-
> **Docker Images** supported:
|
87
|
+
> :egg: **Docker Images** supported:
|
83
88
|
>
|
84
89
|
> | Docker Image | Python Version | Support |
|
85
90
|
> |-----------------------------|----------------|---------|
|
@@ -88,7 +93,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
88
93
|
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
89
94
|
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
90
95
|
|
91
|
-
## Usage
|
96
|
+
## :beers: Usage
|
92
97
|
|
93
98
|
This is examples that use workflow file for running common Data Engineering
|
94
99
|
use-case.
|
@@ -100,8 +105,10 @@ use-case.
|
|
100
105
|
> maintenance your data workflows.
|
101
106
|
|
102
107
|
```yaml
|
103
|
-
|
104
|
-
|
108
|
+
run-py-local:
|
109
|
+
|
110
|
+
# Validate model that use to parsing exists for template file
|
111
|
+
type: ddeutil.workflow.Workflow
|
105
112
|
on:
|
106
113
|
# If workflow deploy to schedule, it will running every 5 minutes
|
107
114
|
# with Asia/Bangkok timezone.
|
@@ -110,7 +117,7 @@ run_py_local:
|
|
110
117
|
params:
|
111
118
|
# Incoming execution parameters will validate with this type. It allow
|
112
119
|
# to set default value or templating.
|
113
|
-
|
120
|
+
source-extract: str
|
114
121
|
run-date: datetime
|
115
122
|
jobs:
|
116
123
|
getting-api-data:
|
@@ -119,17 +126,56 @@ run_py_local:
|
|
119
126
|
id: retrieve-api
|
120
127
|
uses: tasks/get-api-with-oauth-to-s3@requests
|
121
128
|
with:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
129
|
+
# Arguments of source data that want to retrieve.
|
130
|
+
method: post
|
131
|
+
url: https://finances/open-data/currency-pairs/
|
132
|
+
body:
|
133
|
+
resource: ${{ params.source-extract }}
|
134
|
+
|
135
|
+
# You can able to use filtering like Jinja template but this
|
136
|
+
# package does not use it.
|
137
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
138
|
+
auth:
|
139
|
+
type: bearer
|
140
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
141
|
+
|
142
|
+
# Arguments of target data that want to landing.
|
143
|
+
writing_mode: flatten
|
144
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
145
|
+
|
146
|
+
# This Authentication code should implement with your custom hook
|
147
|
+
# function. The template allow you to use environment variable.
|
128
148
|
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
129
149
|
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
130
150
|
```
|
131
151
|
|
132
|
-
|
152
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
153
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
154
|
+
execution time such as `run-date` should change base on that workflow running date.
|
155
|
+
|
156
|
+
So, this package provide the `Schedule` template for this action.
|
157
|
+
|
158
|
+
```yaml
|
159
|
+
schedule-run-local-wf:
|
160
|
+
|
161
|
+
# Validate model that use to parsing exists for template file
|
162
|
+
type: ddeutil.workflow.scheduler.Schedule
|
163
|
+
workflows:
|
164
|
+
|
165
|
+
# Map existing workflow that want to deploy with scheduler application.
|
166
|
+
# It allow you to passing release parameter that dynamic change depend the
|
167
|
+
# current context of this scheduler application releasing that time.
|
168
|
+
- name: run-py-local
|
169
|
+
params:
|
170
|
+
source-extract: "USD-THB"
|
171
|
+
asat-dt: "${{ release.logical_date }}"
|
172
|
+
```
|
173
|
+
|
174
|
+
## :cookie: Configuration
|
175
|
+
|
176
|
+
The main configuration that use to dynamic changing with your propose of this
|
177
|
+
application. If any configuration values do not set yet, it will use default value
|
178
|
+
and do not raise any error to you.
|
133
179
|
|
134
180
|
| Environment | Component | Default | Description |
|
135
181
|
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
@@ -155,7 +201,7 @@ run_py_local:
|
|
155
201
|
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
156
202
|
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
157
203
|
|
158
|
-
## Deployment
|
204
|
+
## :rocket: Deployment
|
159
205
|
|
160
206
|
This package able to run as a application service for receive manual trigger
|
161
207
|
from the master node via RestAPI or use to be Scheduler background service
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Workflow
|
2
2
|
|
3
3
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
4
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
4
5
|
[](https://pypi.org/project/ddeutil-workflow/)
|
5
6
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
6
7
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
@@ -11,8 +12,9 @@ for easy to make a simple metadata driven for data workflow orchestration.
|
|
11
12
|
It can to use for data operator by a `.yaml` template.
|
12
13
|
|
13
14
|
> [!WARNING]
|
14
|
-
> This package provide only orchestration workload. That mean you should not
|
15
|
-
> workflow stage to process any large data which use lot of compute
|
15
|
+
> This package provide only orchestration workload task. That mean you should not
|
16
|
+
> use the workflow stage to process any large volume data which use lot of compute
|
17
|
+
> resource. :cold_sweat:
|
16
18
|
|
17
19
|
In my opinion, I think it should not create duplicate workflow codes if I can
|
18
20
|
write with dynamic input parameters on the one template workflow that just change
|
@@ -20,23 +22,25 @@ the input parameters per use-case instead.
|
|
20
22
|
This way I can handle a lot of logical workflows in our orgs with only metadata
|
21
23
|
configuration. It called **Metadata Driven Data Workflow**.
|
22
24
|
|
23
|
-
|
24
|
-
workflow running. Because it not show us what is a use-case that running data
|
25
|
-
workflow.
|
25
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
**Rules of This Workflow engine**:
|
27
|
+
1. Minimum frequency unit of scheduling is **1 minute** :warning:
|
28
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
29
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
30
|
+
(Because Python 3.13 unlock GIL :unlock:)
|
33
31
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
> [!NOTE]
|
33
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
34
|
+
> `.yml` files and all of config file from several data orchestration framework
|
35
|
+
> tools from my experience on Data Engineer. :grimacing:
|
36
|
+
>
|
37
|
+
> Other workflow that I interest on them and pick some interested feature to this
|
38
|
+
> package:
|
39
|
+
>
|
40
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
41
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
38
42
|
|
39
|
-
## Installation
|
43
|
+
## :round_pushpin: Installation
|
40
44
|
|
41
45
|
This project need `ddeutil-io` extension namespace packages. If you want to install
|
42
46
|
this package with application add-ons, you should add `app` in installation;
|
@@ -49,7 +53,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
49
53
|
|
50
54
|
> I added this feature to the main milestone.
|
51
55
|
>
|
52
|
-
> **Docker Images** supported:
|
56
|
+
> :egg: **Docker Images** supported:
|
53
57
|
>
|
54
58
|
> | Docker Image | Python Version | Support |
|
55
59
|
> |-----------------------------|----------------|---------|
|
@@ -58,7 +62,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
58
62
|
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
59
63
|
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
60
64
|
|
61
|
-
## Usage
|
65
|
+
## :beers: Usage
|
62
66
|
|
63
67
|
This is examples that use workflow file for running common Data Engineering
|
64
68
|
use-case.
|
@@ -70,8 +74,10 @@ use-case.
|
|
70
74
|
> maintenance your data workflows.
|
71
75
|
|
72
76
|
```yaml
|
73
|
-
|
74
|
-
|
77
|
+
run-py-local:
|
78
|
+
|
79
|
+
# Validate model that use to parsing exists for template file
|
80
|
+
type: ddeutil.workflow.Workflow
|
75
81
|
on:
|
76
82
|
# If workflow deploy to schedule, it will running every 5 minutes
|
77
83
|
# with Asia/Bangkok timezone.
|
@@ -80,7 +86,7 @@ run_py_local:
|
|
80
86
|
params:
|
81
87
|
# Incoming execution parameters will validate with this type. It allow
|
82
88
|
# to set default value or templating.
|
83
|
-
|
89
|
+
source-extract: str
|
84
90
|
run-date: datetime
|
85
91
|
jobs:
|
86
92
|
getting-api-data:
|
@@ -89,17 +95,56 @@ run_py_local:
|
|
89
95
|
id: retrieve-api
|
90
96
|
uses: tasks/get-api-with-oauth-to-s3@requests
|
91
97
|
with:
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
+
# Arguments of source data that want to retrieve.
|
99
|
+
method: post
|
100
|
+
url: https://finances/open-data/currency-pairs/
|
101
|
+
body:
|
102
|
+
resource: ${{ params.source-extract }}
|
103
|
+
|
104
|
+
# You can able to use filtering like Jinja template but this
|
105
|
+
# package does not use it.
|
106
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
107
|
+
auth:
|
108
|
+
type: bearer
|
109
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
110
|
+
|
111
|
+
# Arguments of target data that want to landing.
|
112
|
+
writing_mode: flatten
|
113
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
114
|
+
|
115
|
+
# This Authentication code should implement with your custom hook
|
116
|
+
# function. The template allow you to use environment variable.
|
98
117
|
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
99
118
|
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
100
119
|
```
|
101
120
|
|
102
|
-
|
121
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
122
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
123
|
+
execution time such as `run-date` should change base on that workflow running date.
|
124
|
+
|
125
|
+
So, this package provide the `Schedule` template for this action.
|
126
|
+
|
127
|
+
```yaml
|
128
|
+
schedule-run-local-wf:
|
129
|
+
|
130
|
+
# Validate model that use to parsing exists for template file
|
131
|
+
type: ddeutil.workflow.scheduler.Schedule
|
132
|
+
workflows:
|
133
|
+
|
134
|
+
# Map existing workflow that want to deploy with scheduler application.
|
135
|
+
# It allow you to passing release parameter that dynamic change depend the
|
136
|
+
# current context of this scheduler application releasing that time.
|
137
|
+
- name: run-py-local
|
138
|
+
params:
|
139
|
+
source-extract: "USD-THB"
|
140
|
+
asat-dt: "${{ release.logical_date }}"
|
141
|
+
```
|
142
|
+
|
143
|
+
## :cookie: Configuration
|
144
|
+
|
145
|
+
The main configuration that use to dynamic changing with your propose of this
|
146
|
+
application. If any configuration values do not set yet, it will use default value
|
147
|
+
and do not raise any error to you.
|
103
148
|
|
104
149
|
| Environment | Component | Default | Description |
|
105
150
|
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
@@ -125,7 +170,7 @@ run_py_local:
|
|
125
170
|
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
126
171
|
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
127
172
|
|
128
|
-
## Deployment
|
173
|
+
## :rocket: Deployment
|
129
174
|
|
130
175
|
This package able to run as a application service for receive manual trigger
|
131
176
|
from the master node via RestAPI or use to be Scheduler background service
|
@@ -22,10 +22,11 @@ classifiers = [
|
|
22
22
|
"Programming Language :: Python :: 3.10",
|
23
23
|
"Programming Language :: Python :: 3.11",
|
24
24
|
"Programming Language :: Python :: 3.12",
|
25
|
+
"Programming Language :: Python :: 3.13",
|
25
26
|
]
|
26
27
|
requires-python = ">=3.9.13"
|
27
28
|
dependencies = [
|
28
|
-
"ddeutil-io",
|
29
|
+
"ddeutil-io>=0.1.12",
|
29
30
|
"python-dotenv==1.0.1",
|
30
31
|
"typer==0.12.5,<1.0.0",
|
31
32
|
"schedule==1.2.2,<2.0.0",
|
@@ -34,7 +35,7 @@ dynamic = ["version"]
|
|
34
35
|
|
35
36
|
[project.optional-dependencies]
|
36
37
|
api = [
|
37
|
-
"fastapi
|
38
|
+
"fastapi>=0.114.1,<1.0.0",
|
38
39
|
]
|
39
40
|
|
40
41
|
[project.urls]
|
@@ -60,8 +61,11 @@ relative_files = true
|
|
60
61
|
concurrency = ["thread", "multiprocessing"]
|
61
62
|
source = ["ddeutil.workflow", "tests"]
|
62
63
|
omit = [
|
64
|
+
"src/ddeutil/workflow/__about__.py",
|
63
65
|
"scripts/",
|
64
66
|
# Omit this files because it does not ready to production.
|
67
|
+
"src/ddeutil/workflow/api.py",
|
68
|
+
"src/ddeutil/workflow/cli.py",
|
65
69
|
"src/ddeutil/workflow/repeat.py",
|
66
70
|
"src/ddeutil/workflow/route.py",
|
67
71
|
"tests/utils.py",
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.13"
|
@@ -14,11 +14,13 @@ from concurrent.futures import (
|
|
14
14
|
as_completed,
|
15
15
|
wait,
|
16
16
|
)
|
17
|
+
from functools import lru_cache
|
17
18
|
from pickle import PickleError
|
18
19
|
from textwrap import dedent
|
19
20
|
from threading import Event
|
20
21
|
from typing import Optional
|
21
22
|
|
23
|
+
from ddeutil.core import freeze_args
|
22
24
|
from pydantic import BaseModel, Field
|
23
25
|
from pydantic.functional_validators import field_validator, model_validator
|
24
26
|
from typing_extensions import Self
|
@@ -53,12 +55,70 @@ logger = get_logger("ddeutil.workflow")
|
|
53
55
|
__all__: TupleStr = (
|
54
56
|
"Strategy",
|
55
57
|
"Job",
|
58
|
+
"make",
|
56
59
|
)
|
57
60
|
|
58
61
|
|
62
|
+
@freeze_args
|
63
|
+
@lru_cache
|
64
|
+
def make(matrix, include, exclude) -> list[DictStr]:
|
65
|
+
"""Return List of product of matrix values that already filter with
|
66
|
+
exclude and add include.
|
67
|
+
|
68
|
+
:param matrix: A matrix values that want to cross product to possible
|
69
|
+
parallelism values.
|
70
|
+
:param include: A list of additional matrix that want to adds-in.
|
71
|
+
:param exclude: A list of exclude matrix that want to filter-out.
|
72
|
+
:rtype: list[DictStr]
|
73
|
+
"""
|
74
|
+
# NOTE: If it does not set matrix, it will return list of an empty dict.
|
75
|
+
if not (mt := matrix):
|
76
|
+
return [{}]
|
77
|
+
|
78
|
+
final: list[DictStr] = []
|
79
|
+
for r in cross_product(matrix=mt):
|
80
|
+
if any(
|
81
|
+
all(r[k] == v for k, v in exclude.items()) for exclude in exclude
|
82
|
+
):
|
83
|
+
continue
|
84
|
+
final.append(r)
|
85
|
+
|
86
|
+
# NOTE: If it is empty matrix and include, it will return list of an
|
87
|
+
# empty dict.
|
88
|
+
if not final and not include:
|
89
|
+
return [{}]
|
90
|
+
|
91
|
+
# NOTE: Add include to generated matrix with exclude list.
|
92
|
+
add: list[DictStr] = []
|
93
|
+
for inc in include:
|
94
|
+
# VALIDATE:
|
95
|
+
# Validate any key in include list should be a subset of some one
|
96
|
+
# in matrix.
|
97
|
+
if all(not (set(inc.keys()) <= set(m.keys())) for m in final):
|
98
|
+
raise ValueError("Include should have the keys equal to matrix")
|
99
|
+
|
100
|
+
# VALIDATE:
|
101
|
+
# Validate value of include does not duplicate with generated
|
102
|
+
# matrix.
|
103
|
+
if any(
|
104
|
+
all(inc.get(k) == v for k, v in m.items()) for m in [*final, *add]
|
105
|
+
):
|
106
|
+
continue
|
107
|
+
add.append(inc)
|
108
|
+
final.extend(add)
|
109
|
+
return final
|
110
|
+
|
111
|
+
|
59
112
|
class Strategy(BaseModel):
|
60
113
|
"""Strategy Model that will combine a matrix together for running the
|
61
|
-
special job.
|
114
|
+
special job with combination of matrix data.
|
115
|
+
|
116
|
+
This model does not be the part of job only because you can use it to
|
117
|
+
any model object. The propose of this model is generate metrix result that
|
118
|
+
comming from combination logic with any matrix values for running it with
|
119
|
+
parallelism.
|
120
|
+
|
121
|
+
[1, 2, 3] x [a, b] --> [1a], [1b], [2a], [2b], [3a], [3b]
|
62
122
|
|
63
123
|
Data Validate:
|
64
124
|
>>> strategy = {
|
@@ -105,13 +165,19 @@ class Strategy(BaseModel):
|
|
105
165
|
def __prepare_keys(cls, values: DictData) -> DictData:
|
106
166
|
"""Rename key that use dash to underscore because Python does not
|
107
167
|
support this character exist in any variable name.
|
168
|
+
|
169
|
+
:param values: A parsing values to this models
|
170
|
+
:rtype: DictData
|
108
171
|
"""
|
109
172
|
dash2underscore("max-parallel", values)
|
110
173
|
dash2underscore("fail-fast", values)
|
111
174
|
return values
|
112
175
|
|
113
176
|
def is_set(self) -> bool:
|
114
|
-
"""Return True if this strategy was set from yaml template.
|
177
|
+
"""Return True if this strategy was set from yaml template.
|
178
|
+
|
179
|
+
:rtype: bool
|
180
|
+
"""
|
115
181
|
return len(self.matrix) > 0
|
116
182
|
|
117
183
|
def make(self) -> list[DictStr]:
|
@@ -120,44 +186,7 @@ class Strategy(BaseModel):
|
|
120
186
|
|
121
187
|
:rtype: list[DictStr]
|
122
188
|
"""
|
123
|
-
|
124
|
-
if not (mt := self.matrix):
|
125
|
-
return [{}]
|
126
|
-
|
127
|
-
final: list[DictStr] = []
|
128
|
-
for r in cross_product(matrix=mt):
|
129
|
-
if any(
|
130
|
-
all(r[k] == v for k, v in exclude.items())
|
131
|
-
for exclude in self.exclude
|
132
|
-
):
|
133
|
-
continue
|
134
|
-
final.append(r)
|
135
|
-
|
136
|
-
# NOTE: If it is empty matrix and include, it will return list of an
|
137
|
-
# empty dict.
|
138
|
-
if not final and not self.include:
|
139
|
-
return [{}]
|
140
|
-
|
141
|
-
# NOTE: Add include to generated matrix with exclude list.
|
142
|
-
add: list[DictStr] = []
|
143
|
-
for include in self.include:
|
144
|
-
# VALIDATE:
|
145
|
-
# Validate any key in include list should be a subset of some one
|
146
|
-
# in matrix.
|
147
|
-
if all(not (set(include.keys()) <= set(m.keys())) for m in final):
|
148
|
-
raise ValueError("Include should have the keys equal to matrix")
|
149
|
-
|
150
|
-
# VALIDATE:
|
151
|
-
# Validate value of include does not duplicate with generated
|
152
|
-
# matrix.
|
153
|
-
if any(
|
154
|
-
all(include.get(k) == v for k, v in m.items())
|
155
|
-
for m in [*final, *add]
|
156
|
-
):
|
157
|
-
continue
|
158
|
-
add.append(include)
|
159
|
-
final.extend(add)
|
160
|
-
return final
|
189
|
+
return make(self.matrix, self.include, self.exclude)
|
161
190
|
|
162
191
|
|
163
192
|
class Job(BaseModel):
|
@@ -238,6 +267,7 @@ class Job(BaseModel):
|
|
238
267
|
|
239
268
|
@model_validator(mode="after")
|
240
269
|
def __prepare_running_id(self):
|
270
|
+
"""Prepare the job running ID."""
|
241
271
|
if self.run_id is None:
|
242
272
|
self.run_id = gen_id(self.id or "", unique=True)
|
243
273
|
|
@@ -487,7 +517,7 @@ class Job(BaseModel):
|
|
487
517
|
stop all not done futures if it receive the first exception from all
|
488
518
|
running futures.
|
489
519
|
|
490
|
-
:param event:
|
520
|
+
:param event: An event
|
491
521
|
:param futures: A list of futures.
|
492
522
|
:rtype: Result
|
493
523
|
"""
|
@@ -529,7 +559,8 @@ class Job(BaseModel):
|
|
529
559
|
def __catch_all_completed(self, futures: list[Future]) -> Result:
|
530
560
|
"""Job parallel pool futures catching with all-completed mode.
|
531
561
|
|
532
|
-
:param futures: A list of futures
|
562
|
+
:param futures: A list of futures that want to catch all completed
|
563
|
+
result.
|
533
564
|
:rtype: Result
|
534
565
|
"""
|
535
566
|
context: DictData = {}
|
@@ -20,6 +20,7 @@ from .utils import Loader
|
|
20
20
|
|
21
21
|
__all__: TupleStr = (
|
22
22
|
"On",
|
23
|
+
"YearOn",
|
23
24
|
"interval2crontab",
|
24
25
|
)
|
25
26
|
|
@@ -187,8 +188,10 @@ class On(BaseModel):
|
|
187
188
|
return self.cronjob.schedule(date=start, tz=self.tz).next
|
188
189
|
|
189
190
|
|
190
|
-
class
|
191
|
-
"""Implement On
|
191
|
+
class YearOn(On):
|
192
|
+
"""Implement On Year Schedule Model for limit year matrix that use by some
|
193
|
+
data schedule tools like AWS Glue.
|
194
|
+
"""
|
192
195
|
|
193
196
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
194
197
|
|