ddeutil-workflow 0.0.27__tar.gz → 0.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. ddeutil_workflow-0.0.29/PKG-INFO +292 -0
  2. ddeutil_workflow-0.0.29/README.md +260 -0
  3. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/pyproject.toml +15 -7
  4. ddeutil_workflow-0.0.29/src/ddeutil/workflow/__about__.py +1 -0
  5. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/__init__.py +1 -0
  6. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/__types.py +11 -9
  7. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/api/api.py +1 -53
  8. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/conf.py +51 -19
  9. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/cron.py +7 -7
  10. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/exceptions.py +1 -1
  11. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/hook.py +21 -4
  12. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/job.py +17 -16
  13. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/params.py +3 -3
  14. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/result.py +3 -3
  15. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/scheduler.py +9 -9
  16. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/stage.py +84 -115
  17. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/templates.py +11 -12
  18. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/utils.py +20 -13
  19. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/workflow.py +24 -21
  20. ddeutil_workflow-0.0.29/src/ddeutil_workflow.egg-info/PKG-INFO +292 -0
  21. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil_workflow.egg-info/SOURCES.txt +1 -5
  22. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil_workflow.egg-info/requires.txt +2 -2
  23. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_conf.py +4 -2
  24. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_hook_tag.py +3 -2
  25. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_stage.py +7 -4
  26. ddeutil_workflow-0.0.29/tests/test_stage_handler_exec.py +182 -0
  27. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_utils.py +15 -9
  28. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_exec.py +82 -2
  29. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_exec_hook.py +10 -10
  30. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_poke.py +5 -0
  31. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_schedule.py +1 -3
  32. ddeutil_workflow-0.0.27/PKG-INFO +0 -230
  33. ddeutil_workflow-0.0.27/README.md +0 -198
  34. ddeutil_workflow-0.0.27/src/ddeutil/workflow/__about__.py +0 -1
  35. ddeutil_workflow-0.0.27/src/ddeutil_workflow.egg-info/PKG-INFO +0 -230
  36. ddeutil_workflow-0.0.27/tests/test_stage_exec_bash.py +0 -34
  37. ddeutil_workflow-0.0.27/tests/test_stage_exec_hook.py +0 -46
  38. ddeutil_workflow-0.0.27/tests/test_stage_exec_py.py +0 -85
  39. ddeutil_workflow-0.0.27/tests/test_stage_exec_trigger.py +0 -30
  40. ddeutil_workflow-0.0.27/tests/test_workflow_exec_needs.py +0 -74
  41. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/LICENSE +0 -0
  42. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/setup.cfg +0 -0
  43. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/__cron.py +0 -0
  44. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/api/__init__.py +0 -0
  45. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/api/repeat.py +0 -0
  46. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil/workflow/api/route.py +0 -0
  47. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  48. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
  49. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test__cron.py +0 -0
  50. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test__regex.py +0 -0
  51. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_conf_log.py +0 -0
  52. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_cron_on.py +0 -0
  53. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_job.py +0 -0
  54. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_job_exec_py.py +0 -0
  55. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_job_exec_strategy.py +0 -0
  56. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_job_strategy.py +0 -0
  57. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_params.py +0 -0
  58. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_result.py +0 -0
  59. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_schedule.py +0 -0
  60. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_schedule_control.py +0 -0
  61. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_schedule_tasks.py +0 -0
  62. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_templates.py +0 -0
  63. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_templates_filter.py +0 -0
  64. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow.py +0 -0
  65. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_job_exec.py +0 -0
  66. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_release.py +0 -0
  67. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_release_and_queue.py +0 -0
  68. {ddeutil_workflow-0.0.27 → ddeutil_workflow-0.0.29}/tests/test_workflow_task.py +0 -0
@@ -0,0 +1,292 @@
1
+ Metadata-Version: 2.2
2
+ Name: ddeutil-workflow
3
+ Version: 0.0.29
4
+ Summary: Lightweight workflow orchestration
5
+ Author-email: ddeutils <korawich.anu@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
8
+ Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
9
+ Keywords: orchestration,workflow
10
+ Classifier: Topic :: Utilities
11
+ Classifier: Natural Language :: English
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Requires-Python: >=3.9.13
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: ddeutil>=0.4.6
26
+ Requires-Dist: ddeutil-io[toml,yaml]>=0.2.3
27
+ Requires-Dist: pydantic==2.10.6
28
+ Requires-Dist: python-dotenv==1.0.1
29
+ Requires-Dist: schedule<2.0.0,==1.2.2
30
+ Provides-Extra: api
31
+ Requires-Dist: fastapi<1.0.0,>=0.115.0; extra == "api"
32
+
33
+ # Workflow Orchestration
34
+
35
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
36
+ [![codecov](https://codecov.io/gh/ddeutils/ddeutil-workflow/graph/badge.svg?token=3NDPN2I0H9)](https://codecov.io/gh/ddeutils/ddeutil-workflow)
37
+ [![pypi version](https://img.shields.io/pypi/v/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
38
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
39
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
40
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
41
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
42
+
43
+ The **Lightweight Workflow Orchestration** with fewer dependencies the was created
44
+ for easy to make a simple metadata driven data workflow. It can use for data operator
45
+ by a `.yaml` template.
46
+
47
+ > [!WARNING]
48
+ > This package provide only orchestration workload. That mean you should not
49
+ > use the workflow stage to process any large volume data which use a lot of compute
50
+ > resource :cold_sweat:.
51
+
52
+ In my opinion, I think it should not create duplicate workflow codes if I can
53
+ write with dynamic input parameters on the one template workflow that just change
54
+ the input parameters per use-case instead.
55
+ This way I can handle a lot of logical workflows in our orgs with only metadata
56
+ configuration. It called **Metadata Driven Data Workflow**.
57
+
58
+ ---
59
+
60
+ **:pushpin: <u>Rules of This Workflow engine</u>**:
61
+
62
+ 1. The Minimum frequency unit of scheduling is **1 minute** :warning:
63
+ 2. Can not re-run only failed stage and its pending downstream :rotating_light:
64
+ 3. All parallel tasks inside workflow engine use Multi-Threading
65
+ (Python 3.13 unlock GIL :unlock:)
66
+
67
+ ---
68
+
69
+ **:memo: <u>Workflow Diagrams</u>**:
70
+
71
+ This diagram show where is this application run on the production infrastructure.
72
+ You will see that this application do only running code with stress-less which mean
73
+ you should to set the data layer separate this core program before run this application.
74
+
75
+ ```mermaid
76
+ flowchart LR
77
+ subgraph Interface
78
+ A((User))
79
+ subgraph Docker Container
80
+ G@{ shape: rounded, label: "Observe<br>Application" }
81
+ end
82
+ end
83
+
84
+ A --->|action| B(Workflow<br>Application)
85
+ B ---> |response| A
86
+ B -..-> |response| G
87
+ G -..-> |request| B
88
+
89
+ subgraph Docker Container
90
+ B
91
+ end
92
+
93
+ subgraph Data Context
94
+ D@{ shape: processes, label: "Logs" }
95
+ E@{ shape: lin-cyl, label: "Metadata" }
96
+ end
97
+
98
+ subgraph Git Context
99
+ F@{ shape: tag-rect, label: "YAML<br>files" }
100
+ end
101
+
102
+ B --->|disable| F
103
+ F --->|read| B
104
+
105
+ B --->|write| E
106
+ E --->|read| B
107
+ B --->|write| D
108
+
109
+ D -.->|read| G
110
+ E -.->|read| G
111
+ ```
112
+
113
+ > [!NOTE]
114
+ > _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
115
+ > with `.yml` files and all configs file from several data orchestration framework
116
+ > tools from my experience on Data Engineer. :grimacing:
117
+ >
118
+ > Other workflow tools that I interest on them and pick some interested feature
119
+ > implement to this package:
120
+ >
121
+ > - [Google **Workflows**](https://cloud.google.com/workflows)
122
+ > - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
123
+
124
+ ## :round_pushpin: Installation
125
+
126
+ This project need `ddeutil` and `ddeutil-io` extension namespace packages.
127
+ If you want to install this package with application add-ons, you should add
128
+ `app` in installation;
129
+
130
+ | Use-case | Install Optional | Support |
131
+ |----------------|--------------------------|--------------------|
132
+ | Python | `ddeutil-workflow` | :heavy_check_mark: |
133
+ | FastAPI Server | `ddeutil-workflow[api]` | :heavy_check_mark: |
134
+
135
+ ## :beers: Usage
136
+
137
+ This is examples that use workflow file for running common Data Engineering
138
+ use-case.
139
+
140
+ > [!IMPORTANT]
141
+ > I recommend you to use the `hook` stage for all actions that you want to do
142
+ > with workflow activity that you want to orchestrate. Because it is able to
143
+ > dynamic an input argument with the same hook function that make you use less
144
+ > time to maintenance your data workflows.
145
+
146
+ ```yaml
147
+ run-py-local:
148
+
149
+ # Validate model that use to parsing exists for template file
150
+ type: Workflow
151
+ on:
152
+ # If workflow deploy to schedule, it will run every 5 minutes
153
+ # with Asia/Bangkok timezone.
154
+ - cronjob: '*/5 * * * *'
155
+ timezone: "Asia/Bangkok"
156
+ params:
157
+ # Incoming execution parameters will validate with this type. It allows
158
+ # to set default value or templating.
159
+ source-extract: str
160
+ run-date: datetime
161
+ jobs:
162
+ getting-api-data:
163
+ stages:
164
+ - name: "Retrieve API Data"
165
+ id: retrieve-api
166
+ uses: tasks/get-api-with-oauth-to-s3@requests
167
+ with:
168
+ # Arguments of source data that want to retrieve.
169
+ method: post
170
+ url: https://finances/open-data/currency-pairs/
171
+ body:
172
+ resource: ${{ params.source-extract }}
173
+
174
+ # You can use filtering like Jinja template but this
175
+ # package does not use it.
176
+ filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
177
+ auth:
178
+ type: bearer
179
+ keys: ${API_ACCESS_REFRESH_TOKEN}
180
+
181
+ # Arguments of target data that want to land.
182
+ writing_mode: flatten
183
+ aws_s3_path: my-data/open-data/${{ params.source-extract }}
184
+
185
+ # This Authentication code should implement with your custom hook
186
+ # function. The template allow you to use environment variable.
187
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
188
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
189
+ ```
190
+
191
+ The above workflow template is main executor pipeline that you want to do. If you
192
+ want to schedule this workflow, you want to dynamic its parameters change base on
193
+ execution time such as `run-date` should change base on that workflow running date.
194
+
195
+ So, this package provide the `Schedule` template for this action.
196
+
197
+ ```yaml
198
+ schedule-run-local-wf:
199
+
200
+ # Validate model that use to parsing exists for template file
201
+ type: Schedule
202
+ workflows:
203
+
204
+ # Map existing workflow that want to deploy with scheduler application.
205
+ # It allows you to pass release parameter that dynamic change depend on the
206
+ # current context of this scheduler application releasing that time.
207
+ - name: run-py-local
208
+ params:
209
+ source-extract: "USD-THB"
210
+ asat-dt: "${{ release.logical_date }}"
211
+ ```
212
+
213
+ ## :cookie: Configuration
214
+
215
+ The main configuration that use to dynamic changing with your objective of this
216
+ application. If any configuration values do not set yet, it will use default value
217
+ and do not raise any error to you.
218
+
219
+ > [!IMPORTANT]
220
+ > The config value that you will set on the environment should combine with
221
+ > prefix, component, and name which is `WORKFLOW_{component}_{name}` (Upper case).
222
+
223
+ | Name | Component | Default | Description |
224
+ |:-----------------------------|:---------:|:----------------------------------|:-------------------------------------------------------------------------------------------------------------------|
225
+ | **ROOT_PATH** | Core | `.` | The root path of the workflow application. |
226
+ | **REGISTRY** | Core | `.` | List of importable string for the hook stage. |
227
+ | **REGISTRY_FILTER** | Core | `ddeutil.workflow.templates` | List of importable string for the filter template. |
228
+ | **CONF_PATH** | Core | `conf` | The config path that keep all template `.yaml` files. |
229
+ | **TIMEZONE** | Core | `Asia/Bangkok` | A Timezone string value that will pass to `ZoneInfo` object. |
230
+ | **STAGE_DEFAULT_ID** | Core | `true` | A flag that enable default stage ID that use for catch an execution output. |
231
+ | **STAGE_RAISE_ERROR** | Core | `false` | A flag that all stage raise StageException from stage execution. |
232
+ | **JOB_DEFAULT_ID** | Core | `false` | A flag that enable default job ID that use for catch an execution output. The ID that use will be sequence number. |
233
+ | **JOB_RAISE_ERROR** | Core | `true` | A flag that all job raise JobException from job strategy execution. |
234
+ | **MAX_NUM_POKING** | Core | `4` | . |
235
+ | **MAX_JOB_PARALLEL** | Core | `2` | The maximum job number that able to run parallel in workflow executor. |
236
+ | **MAX_JOB_EXEC_TIMEOUT** | Core | `600` | |
237
+ | **MAX_CRON_PER_WORKFLOW** | Core | `5` | |
238
+ | **MAX_QUEUE_COMPLETE_HIST** | Core | `16` | |
239
+ | **GENERATE_ID_SIMPLE_MODE** | Core | `true` | A flog that enable generating ID with `md5` algorithm. |
240
+ | **PATH** | Log | `./logs` | The log path of the workflow saving log. |
241
+ | **DEBUG_MODE** | Log | `true` | A flag that enable logging with debug level mode. |
242
+ | **ENABLE_WRITE** | Log | `true` | A flag that enable logging object saving log to its destination. |
243
+ | **MAX_PROCESS** | App | `2` | The maximum process worker number that run in scheduler app module. |
244
+ | **MAX_SCHEDULE_PER_PROCESS** | App | `100` | A schedule per process that run parallel. |
245
+ | **STOP_BOUNDARY_DELTA** | App | `'{"minutes": 5, "seconds": 20}'` | A time delta value that use to stop scheduler app in json string format. |
246
+
247
+ **API Application**:
248
+
249
+ | Environment | Component | Default | Description |
250
+ |:---------------------------|:-----------:|---------|------------------------------------------------------------------------------------|
251
+ | **ENABLE_ROUTE_WORKFLOW** | API | `true` | A flag that enable workflow route to manage execute manually and workflow logging. |
252
+ | **ENABLE_ROUTE_SCHEDULE** | API | `true` | A flag that enable run scheduler. |
253
+
254
+ ## :rocket: Deployment
255
+
256
+ This package able to run as an application service for receive manual trigger
257
+ from the master node via RestAPI or use to be Scheduler background service
258
+ like crontab job but via Python API.
259
+
260
+ ### API Server
261
+
262
+ ```shell
263
+ (venv) $ uvicorn src.ddeutil.workflow.api:app \
264
+ --host 127.0.0.1 \
265
+ --port 80 \
266
+ --no-access-log
267
+ ```
268
+
269
+ > [!NOTE]
270
+ > If this package already deploy, it is able to use multiprocess;
271
+ > `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
272
+
273
+ ### Docker Container
274
+
275
+ Create Docker image;
276
+
277
+ ```shell
278
+ $ docker build -t ddeutil-workflow:latest -f .container/Dockerfile .
279
+ ```
280
+
281
+ Run the above Docker image;
282
+
283
+ ```shell
284
+ $ docker run -i ddeutil-workflow:latest
285
+ ```
286
+
287
+ ## :speech_balloon: Contribute
288
+
289
+ I do not think this project will go around the world because it has specific propose,
290
+ and you can create by your coding without this project dependency for long term
291
+ solution. So, on this time, you can open [the GitHub issue on this project :raised_hands:](https://github.com/ddeutils/ddeutil-workflow/issues)
292
+ for fix bug or request new feature if you want it.
@@ -0,0 +1,260 @@
1
+ # Workflow Orchestration
2
+
3
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
4
+ [![codecov](https://codecov.io/gh/ddeutils/ddeutil-workflow/graph/badge.svg?token=3NDPN2I0H9)](https://codecov.io/gh/ddeutils/ddeutil-workflow)
5
+ [![pypi version](https://img.shields.io/pypi/v/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
6
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
7
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
8
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
9
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
10
+
11
+ The **Lightweight Workflow Orchestration** with fewer dependencies the was created
12
+ for easy to make a simple metadata driven data workflow. It can use for data operator
13
+ by a `.yaml` template.
14
+
15
+ > [!WARNING]
16
+ > This package provide only orchestration workload. That mean you should not
17
+ > use the workflow stage to process any large volume data which use a lot of compute
18
+ > resource :cold_sweat:.
19
+
20
+ In my opinion, I think it should not create duplicate workflow codes if I can
21
+ write with dynamic input parameters on the one template workflow that just change
22
+ the input parameters per use-case instead.
23
+ This way I can handle a lot of logical workflows in our orgs with only metadata
24
+ configuration. It called **Metadata Driven Data Workflow**.
25
+
26
+ ---
27
+
28
+ **:pushpin: <u>Rules of This Workflow engine</u>**:
29
+
30
+ 1. The Minimum frequency unit of scheduling is **1 minute** :warning:
31
+ 2. Can not re-run only failed stage and its pending downstream :rotating_light:
32
+ 3. All parallel tasks inside workflow engine use Multi-Threading
33
+ (Python 3.13 unlock GIL :unlock:)
34
+
35
+ ---
36
+
37
+ **:memo: <u>Workflow Diagrams</u>**:
38
+
39
+ This diagram show where is this application run on the production infrastructure.
40
+ You will see that this application do only running code with stress-less which mean
41
+ you should to set the data layer separate this core program before run this application.
42
+
43
+ ```mermaid
44
+ flowchart LR
45
+ subgraph Interface
46
+ A((User))
47
+ subgraph Docker Container
48
+ G@{ shape: rounded, label: "Observe<br>Application" }
49
+ end
50
+ end
51
+
52
+ A --->|action| B(Workflow<br>Application)
53
+ B ---> |response| A
54
+ B -..-> |response| G
55
+ G -..-> |request| B
56
+
57
+ subgraph Docker Container
58
+ B
59
+ end
60
+
61
+ subgraph Data Context
62
+ D@{ shape: processes, label: "Logs" }
63
+ E@{ shape: lin-cyl, label: "Metadata" }
64
+ end
65
+
66
+ subgraph Git Context
67
+ F@{ shape: tag-rect, label: "YAML<br>files" }
68
+ end
69
+
70
+ B --->|disable| F
71
+ F --->|read| B
72
+
73
+ B --->|write| E
74
+ E --->|read| B
75
+ B --->|write| D
76
+
77
+ D -.->|read| G
78
+ E -.->|read| G
79
+ ```
80
+
81
+ > [!NOTE]
82
+ > _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
83
+ > with `.yml` files and all configs file from several data orchestration framework
84
+ > tools from my experience on Data Engineer. :grimacing:
85
+ >
86
+ > Other workflow tools that I interest on them and pick some interested feature
87
+ > implement to this package:
88
+ >
89
+ > - [Google **Workflows**](https://cloud.google.com/workflows)
90
+ > - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
91
+
92
+ ## :round_pushpin: Installation
93
+
94
+ This project need `ddeutil` and `ddeutil-io` extension namespace packages.
95
+ If you want to install this package with application add-ons, you should add
96
+ `app` in installation;
97
+
98
+ | Use-case | Install Optional | Support |
99
+ |----------------|--------------------------|--------------------|
100
+ | Python | `ddeutil-workflow` | :heavy_check_mark: |
101
+ | FastAPI Server | `ddeutil-workflow[api]` | :heavy_check_mark: |
102
+
103
+ ## :beers: Usage
104
+
105
+ This is examples that use workflow file for running common Data Engineering
106
+ use-case.
107
+
108
+ > [!IMPORTANT]
109
+ > I recommend you to use the `hook` stage for all actions that you want to do
110
+ > with workflow activity that you want to orchestrate. Because it is able to
111
+ > dynamic an input argument with the same hook function that make you use less
112
+ > time to maintenance your data workflows.
113
+
114
+ ```yaml
115
+ run-py-local:
116
+
117
+ # Validate model that use to parsing exists for template file
118
+ type: Workflow
119
+ on:
120
+ # If workflow deploy to schedule, it will run every 5 minutes
121
+ # with Asia/Bangkok timezone.
122
+ - cronjob: '*/5 * * * *'
123
+ timezone: "Asia/Bangkok"
124
+ params:
125
+ # Incoming execution parameters will validate with this type. It allows
126
+ # to set default value or templating.
127
+ source-extract: str
128
+ run-date: datetime
129
+ jobs:
130
+ getting-api-data:
131
+ stages:
132
+ - name: "Retrieve API Data"
133
+ id: retrieve-api
134
+ uses: tasks/get-api-with-oauth-to-s3@requests
135
+ with:
136
+ # Arguments of source data that want to retrieve.
137
+ method: post
138
+ url: https://finances/open-data/currency-pairs/
139
+ body:
140
+ resource: ${{ params.source-extract }}
141
+
142
+ # You can use filtering like Jinja template but this
143
+ # package does not use it.
144
+ filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
145
+ auth:
146
+ type: bearer
147
+ keys: ${API_ACCESS_REFRESH_TOKEN}
148
+
149
+ # Arguments of target data that want to land.
150
+ writing_mode: flatten
151
+ aws_s3_path: my-data/open-data/${{ params.source-extract }}
152
+
153
+ # This Authentication code should implement with your custom hook
154
+ # function. The template allow you to use environment variable.
155
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
156
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
157
+ ```
158
+
159
+ The above workflow template is main executor pipeline that you want to do. If you
160
+ want to schedule this workflow, you want to dynamic its parameters change base on
161
+ execution time such as `run-date` should change base on that workflow running date.
162
+
163
+ So, this package provide the `Schedule` template for this action.
164
+
165
+ ```yaml
166
+ schedule-run-local-wf:
167
+
168
+ # Validate model that use to parsing exists for template file
169
+ type: Schedule
170
+ workflows:
171
+
172
+ # Map existing workflow that want to deploy with scheduler application.
173
+ # It allows you to pass release parameter that dynamic change depend on the
174
+ # current context of this scheduler application releasing that time.
175
+ - name: run-py-local
176
+ params:
177
+ source-extract: "USD-THB"
178
+ asat-dt: "${{ release.logical_date }}"
179
+ ```
180
+
181
+ ## :cookie: Configuration
182
+
183
+ The main configuration that use to dynamic changing with your objective of this
184
+ application. If any configuration values do not set yet, it will use default value
185
+ and do not raise any error to you.
186
+
187
+ > [!IMPORTANT]
188
+ > The config value that you will set on the environment should combine with
189
+ > prefix, component, and name which is `WORKFLOW_{component}_{name}` (Upper case).
190
+
191
+ | Name | Component | Default | Description |
192
+ |:-----------------------------|:---------:|:----------------------------------|:-------------------------------------------------------------------------------------------------------------------|
193
+ | **ROOT_PATH** | Core | `.` | The root path of the workflow application. |
194
+ | **REGISTRY** | Core | `.` | List of importable string for the hook stage. |
195
+ | **REGISTRY_FILTER** | Core | `ddeutil.workflow.templates` | List of importable string for the filter template. |
196
+ | **CONF_PATH** | Core | `conf` | The config path that keep all template `.yaml` files. |
197
+ | **TIMEZONE** | Core | `Asia/Bangkok` | A Timezone string value that will pass to `ZoneInfo` object. |
198
+ | **STAGE_DEFAULT_ID** | Core | `true` | A flag that enable default stage ID that use for catch an execution output. |
199
+ | **STAGE_RAISE_ERROR** | Core | `false` | A flag that all stage raise StageException from stage execution. |
200
+ | **JOB_DEFAULT_ID** | Core | `false` | A flag that enable default job ID that use for catch an execution output. The ID that use will be sequence number. |
201
+ | **JOB_RAISE_ERROR** | Core | `true` | A flag that all job raise JobException from job strategy execution. |
202
+ | **MAX_NUM_POKING** | Core | `4` | . |
203
+ | **MAX_JOB_PARALLEL** | Core | `2` | The maximum job number that able to run parallel in workflow executor. |
204
+ | **MAX_JOB_EXEC_TIMEOUT** | Core | `600` | |
205
+ | **MAX_CRON_PER_WORKFLOW** | Core | `5` | |
206
+ | **MAX_QUEUE_COMPLETE_HIST** | Core | `16` | |
207
+ | **GENERATE_ID_SIMPLE_MODE** | Core | `true` | A flog that enable generating ID with `md5` algorithm. |
208
+ | **PATH** | Log | `./logs` | The log path of the workflow saving log. |
209
+ | **DEBUG_MODE** | Log | `true` | A flag that enable logging with debug level mode. |
210
+ | **ENABLE_WRITE** | Log | `true` | A flag that enable logging object saving log to its destination. |
211
+ | **MAX_PROCESS** | App | `2` | The maximum process worker number that run in scheduler app module. |
212
+ | **MAX_SCHEDULE_PER_PROCESS** | App | `100` | A schedule per process that run parallel. |
213
+ | **STOP_BOUNDARY_DELTA** | App | `'{"minutes": 5, "seconds": 20}'` | A time delta value that use to stop scheduler app in json string format. |
214
+
215
+ **API Application**:
216
+
217
+ | Environment | Component | Default | Description |
218
+ |:---------------------------|:-----------:|---------|------------------------------------------------------------------------------------|
219
+ | **ENABLE_ROUTE_WORKFLOW** | API | `true` | A flag that enable workflow route to manage execute manually and workflow logging. |
220
+ | **ENABLE_ROUTE_SCHEDULE** | API | `true` | A flag that enable run scheduler. |
221
+
222
+ ## :rocket: Deployment
223
+
224
+ This package able to run as an application service for receive manual trigger
225
+ from the master node via RestAPI or use to be Scheduler background service
226
+ like crontab job but via Python API.
227
+
228
+ ### API Server
229
+
230
+ ```shell
231
+ (venv) $ uvicorn src.ddeutil.workflow.api:app \
232
+ --host 127.0.0.1 \
233
+ --port 80 \
234
+ --no-access-log
235
+ ```
236
+
237
+ > [!NOTE]
238
+ > If this package already deploy, it is able to use multiprocess;
239
+ > `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
240
+
241
+ ### Docker Container
242
+
243
+ Create Docker image;
244
+
245
+ ```shell
246
+ $ docker build -t ddeutil-workflow:latest -f .container/Dockerfile .
247
+ ```
248
+
249
+ Run the above Docker image;
250
+
251
+ ```shell
252
+ $ docker run -i ddeutil-workflow:latest
253
+ ```
254
+
255
+ ## :speech_balloon: Contribute
256
+
257
+ I do not think this project will go around the world because it has specific propose,
258
+ and you can create by your coding without this project dependency for long term
259
+ solution. So, on this time, you can open [the GitHub issue on this project :raised_hands:](https://github.com/ddeutils/ddeutil-workflow/issues)
260
+ for fix bug or request new feature if you want it.
@@ -1,10 +1,10 @@
1
1
  [build-system]
2
- requires = ["setuptools>=68.0"]
2
+ requires = [ "setuptools>=68.0" ]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ddeutil-workflow"
7
- description = "Lightweight workflow orchestration with less dependencies"
7
+ description = "Lightweight workflow orchestration"
8
8
  readme = {file = "README.md", content-type = "text/markdown"}
9
9
  license = {text = "MIT"}
10
10
  authors = [{ name = "ddeutils", email = "korawich.anu@gmail.com" }]
@@ -26,18 +26,16 @@ classifiers = [
26
26
  ]
27
27
  requires-python = ">=3.9.13"
28
28
  dependencies = [
29
- "ddeutil>=0.4.3",
29
+ "ddeutil>=0.4.6",
30
30
  "ddeutil-io[yaml,toml]>=0.2.3",
31
- "pydantic==2.10.4",
31
+ "pydantic==2.10.6",
32
32
  "python-dotenv==1.0.1",
33
33
  "schedule==1.2.2,<2.0.0",
34
34
  ]
35
35
  dynamic = ["version"]
36
36
 
37
37
  [project.optional-dependencies]
38
- api = [
39
- "fastapi>=0.115.0,<1.0.0",
40
- ]
38
+ api = [ "fastapi>=0.115.0,<1.0.0" ]
41
39
 
42
40
  [project.urls]
43
41
  Homepage = "https://github.com/ddeutils/ddeutil-workflow/"
@@ -52,6 +50,10 @@ where = ["src"]
52
50
  [tool.shelf.version]
53
51
  version = "./src/ddeutil/workflow/__about__.py"
54
52
  changelog = "CHANGELOG.md"
53
+ commit_msg_format = "- {subject}"
54
+
55
+ [tool.shelf.git]
56
+ commit_prefix_force_fix = true
55
57
 
56
58
  [tool.coverage.run]
57
59
  branch = true
@@ -75,6 +77,12 @@ exclude_lines = [
75
77
 
76
78
  [tool.pytest.ini_options]
77
79
  pythonpath = ["src"]
80
+ # NOTE: You can deslect multiple markers by '-m "not (poke or api)"'
81
+ markers = [
82
+ "poke: marks tests as slow by poking (deselect with '-m \"not poke\"')",
83
+ "schedule: marks tests as schedule (deselect with '-m \"not schedule\"')",
84
+ "api: marks tests as api (deselect with '-m \"not api\"')",
85
+ ]
78
86
  console_output_style = "count"
79
87
  addopts = [
80
88
  "--strict-config",
@@ -0,0 +1 @@
1
+ __version__: str = "0.0.29"
@@ -4,6 +4,7 @@
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
6
  from .__cron import CronJob, CronRunner
7
+ from .__types import Re
7
8
  from .conf import (
8
9
  Config,
9
10
  Loader,
@@ -27,6 +27,8 @@ Matrix = dict[str, Union[list[str], list[int]]]
27
27
 
28
28
 
29
29
  class Context(TypedDict):
30
+ """TypeDict support the Context."""
31
+
30
32
  params: dict[str, Any]
31
33
  jobs: dict[str, Any]
32
34
 
@@ -71,14 +73,14 @@ class Re:
71
73
  # - ${{ params.source?.schema }}
72
74
  #
73
75
  __re_caller: str = r"""
74
- \$
75
- {{
76
- \s*
76
+ \$ # start with $
77
+ {{ # value open with {{
78
+ \s* # whitespace or not
77
79
  (?P<caller>
78
80
  (?P<caller_prefix>(?:[a-zA-Z_-]+\??\.)*)
79
81
  (?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+\??)
80
82
  )
81
- \s*
83
+ \s* # whitespace or not
82
84
  (?P<post_filters>
83
85
  (?:
84
86
  \|\s*
@@ -88,7 +90,7 @@ class Re:
88
90
  )\s*
89
91
  )*
90
92
  )
91
- }}
93
+ }} # value close with }}
92
94
  """
93
95
  RE_CALLER: Pattern = re.compile(
94
96
  __re_caller, MULTILINE | IGNORECASE | UNICODE | VERBOSE
@@ -103,13 +105,13 @@ class Re:
103
105
  # - tasks/function@dummy
104
106
  #
105
107
  __re_task_fmt: str = r"""
106
- ^
108
+ ^ # start task format
107
109
  (?P<path>[^/@]+)
108
- /
110
+ / # start get function with /
109
111
  (?P<func>[^@]+)
110
- @
112
+ @ # start tag with @
111
113
  (?P<tag>.+)
112
- $
114
+ $ # end task format
113
115
  """
114
116
  RE_TASK_FMT: Pattern = re.compile(
115
117
  __re_task_fmt, MULTILINE | IGNORECASE | UNICODE | VERBOSE