ddeutil-workflow 0.0.8__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. ddeutil_workflow-0.0.9/PKG-INFO +273 -0
  2. ddeutil_workflow-0.0.9/README.md +240 -0
  3. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/pyproject.toml +10 -7
  4. ddeutil_workflow-0.0.9/src/ddeutil/workflow/__about__.py +1 -0
  5. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/__init__.py +3 -14
  6. ddeutil_workflow-0.0.9/src/ddeutil/workflow/api.py +89 -0
  7. ddeutil_workflow-0.0.9/src/ddeutil/workflow/cli.py +51 -0
  8. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/loader.py +65 -13
  9. ddeutil_workflow-0.0.9/src/ddeutil/workflow/log.py +177 -0
  10. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/on.py +18 -15
  11. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/pipeline.py +389 -140
  12. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/repeat.py +9 -5
  13. ddeutil_workflow-0.0.9/src/ddeutil/workflow/route.py +71 -0
  14. ddeutil_workflow-0.0.9/src/ddeutil/workflow/scheduler.py +452 -0
  15. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/stage.py +145 -73
  16. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/utils.py +133 -42
  17. ddeutil_workflow-0.0.9/src/ddeutil_workflow.egg-info/PKG-INFO +273 -0
  18. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil_workflow.egg-info/SOURCES.txt +14 -5
  19. ddeutil_workflow-0.0.9/src/ddeutil_workflow.egg-info/entry_points.txt +2 -0
  20. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil_workflow.egg-info/requires.txt +3 -3
  21. ddeutil_workflow-0.0.9/tests/test__conf_exist.py +11 -0
  22. ddeutil_workflow-0.0.8/tests/test_scheduler.py → ddeutil_workflow-0.0.9/tests/test_cron.py +18 -18
  23. ddeutil_workflow-0.0.9/tests/test_job.py +18 -0
  24. ddeutil_workflow-0.0.9/tests/test_job_py.py +33 -0
  25. ddeutil_workflow-0.0.9/tests/test_log.py +34 -0
  26. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_on.py +13 -0
  27. ddeutil_workflow-0.0.9/tests/test_params.py +13 -0
  28. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline.py +8 -0
  29. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_desc.py +1 -1
  30. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_matrix.py +4 -4
  31. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_on.py +1 -1
  32. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_params.py +1 -1
  33. ddeutil_workflow-0.0.9/tests/test_pipeline_run.py +22 -0
  34. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_run_raise.py +1 -1
  35. ddeutil_workflow-0.0.9/tests/test_poke.py +19 -0
  36. ddeutil_workflow-0.0.9/tests/test_stage.py +31 -0
  37. ddeutil_workflow-0.0.9/tests/test_stage_bash.py +40 -0
  38. ddeutil_workflow-0.0.9/tests/test_stage_condition.py +14 -0
  39. ddeutil_workflow-0.0.9/tests/test_stage_hook.py +56 -0
  40. ddeutil_workflow-0.0.9/tests/test_stage_py.py +56 -0
  41. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_stage_trigger.py +1 -1
  42. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_utils_result.py +11 -22
  43. ddeutil_workflow-0.0.8/tests/test_utils_param2template.py → ddeutil_workflow-0.0.9/tests/test_utils_template.py +54 -0
  44. ddeutil_workflow-0.0.8/PKG-INFO +0 -266
  45. ddeutil_workflow-0.0.8/README.md +0 -233
  46. ddeutil_workflow-0.0.8/src/ddeutil/workflow/__about__.py +0 -1
  47. ddeutil_workflow-0.0.8/src/ddeutil/workflow/api.py +0 -120
  48. ddeutil_workflow-0.0.8/src/ddeutil/workflow/app.py +0 -45
  49. ddeutil_workflow-0.0.8/src/ddeutil/workflow/log.py +0 -79
  50. ddeutil_workflow-0.0.8/src/ddeutil/workflow/route.py +0 -78
  51. ddeutil_workflow-0.0.8/src/ddeutil_workflow.egg-info/PKG-INFO +0 -266
  52. ddeutil_workflow-0.0.8/tests/test__conf_exist.py +0 -11
  53. ddeutil_workflow-0.0.8/tests/test_job.py +0 -7
  54. ddeutil_workflow-0.0.8/tests/test_pipeline_on_ready.py +0 -26
  55. ddeutil_workflow-0.0.8/tests/test_pipeline_run.py +0 -107
  56. ddeutil_workflow-0.0.8/tests/test_stage.py +0 -15
  57. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/LICENSE +0 -0
  58. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/setup.cfg +0 -0
  59. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/__types.py +0 -0
  60. /ddeutil_workflow-0.0.8/src/ddeutil/workflow/scheduler.py → /ddeutil_workflow-0.0.9/src/ddeutil/workflow/cron.py +0 -0
  61. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil/workflow/exceptions.py +0 -0
  62. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  63. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
  64. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test__local_and_global.py +0 -0
  65. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test__regex.py +0 -0
  66. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_if.py +0 -0
  67. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_pipeline_task.py +0 -0
  68. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.9}/tests/test_utils.py +0 -0
@@ -0,0 +1,273 @@
1
+ Metadata-Version: 2.1
2
+ Name: ddeutil-workflow
3
+ Version: 0.0.9
4
+ Summary: Lightweight workflow orchestration with less dependencies
5
+ Author-email: ddeutils <korawich.anu@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
8
+ Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
9
+ Keywords: orchestration,workflow
10
+ Classifier: Topic :: Utilities
11
+ Classifier: Natural Language :: English
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Requires-Python: >=3.9.13
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: fmtutil
25
+ Requires-Dist: ddeutil-io
26
+ Requires-Dist: python-dotenv==1.0.1
27
+ Requires-Dist: typer==0.12.4
28
+ Provides-Extra: schedule
29
+ Requires-Dist: schedule<2.0.0,==1.2.2; extra == "schedule"
30
+ Provides-Extra: api
31
+ Requires-Dist: fastapi[standard]==0.112.1; extra == "api"
32
+ Requires-Dist: croniter==3.0.3; extra == "api"
33
+
34
+ # Workflow
35
+
36
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
37
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
38
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
39
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
40
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
41
+
42
+ **Table of Contents**:
43
+
44
+ - [Installation](#installation)
45
+ - [Getting Started](#getting-started)
46
+ - [On](#on)
47
+ - [Pipeline](#pipeline)
48
+ - [Usage](#usage)
49
+ - [Configuration](#configuration)
50
+ - [Future](#future)
51
+ - [Deployment](#deployment)
52
+
53
+ The **Lightweight workflow orchestration** with less dependencies the was created
54
+ for easy to make a simple metadata driven for data pipeline orchestration.
55
+ It can to use for data operator by a `.yaml` template.
56
+
57
+ > [!WARNING]
58
+ > This package provide only orchestration workload. That mean you should not use
59
+ > workflow stage to process any large data which use lot of compute usecase.
60
+
61
+ In my opinion, I think it should not create duplicate pipeline codes if I can
62
+ write with dynamic input parameters on the one template pipeline that just change
63
+ the input parameters per use-case instead.
64
+ This way I can handle a lot of logical pipelines in our orgs with only metadata
65
+ configuration. It called **Metadata Driven Data Pipeline**.
66
+
67
+ Next, we should get some monitoring tools for manage logging that return from
68
+ pipeline running. Because it not show us what is a use-case that running data
69
+ pipeline.
70
+
71
+ > [!NOTE]
72
+ > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
73
+ > and all of config file from several data orchestration framework tools from my
74
+ > experience on Data Engineer.
75
+
76
+ **Rules of This Workflow engine**:
77
+
78
+ 1. Minimum unit of scheduling is 1 minute
79
+ 2. Cannot re-run only failed stage and its pending downstream
80
+ 3. All parallel tasks inside workflow engine use Threading
81
+ (Because Python 3.13 unlock GIL)
82
+
83
+ ## Installation
84
+
85
+ This project need `ddeutil-io` extension namespace packages. If you want to install
86
+ this package with application add-ons, you should add `app` in installation;
87
+
88
+ | Usecase | Install Optional | Support |
89
+ |-------------------|------------------------------------------|--------------------|
90
+ | Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
91
+ | Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
92
+ | FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
93
+
94
+
95
+ > I added this feature to the main milestone.
96
+ >
97
+ > **Docker Images** supported:
98
+ >
99
+ > | Docker Image | Python Version | Support |
100
+ > |-----------------------------|----------------|---------|
101
+ > | ddeutil-workflow:latest | `3.9` | :x: |
102
+ > | ddeutil-workflow:python3.10 | `3.10` | :x: |
103
+ > | ddeutil-workflow:python3.11 | `3.11` | :x: |
104
+ > | ddeutil-workflow:python3.12 | `3.12` | :x: |
105
+
106
+ ## Getting Started
107
+
108
+ The main feature of this project is the `Pipeline` object that can call any
109
+ registries function. The pipeline can handle everything that you want to do, it
110
+ will passing parameters and catching the output for re-use it to next step.
111
+
112
+ ### On
113
+
114
+ The **On** is schedule object that receive crontab value and able to generate
115
+ datetime value with next or previous with any start point of an input datetime.
116
+
117
+ ```yaml
118
+ # This file should keep under this path: `./root-path/conf-path/*`
119
+ on_every_5_min:
120
+ type: on.On
121
+ cron: "*/5 * * * *"
122
+ ```
123
+
124
+ ```python
125
+ from ddeutil.workflow.on import On
126
+
127
+ # NOTE: Start load the on data from `.yaml` template file with this key.
128
+ schedule = On.from_loader(name='on_every_5_min', externals={})
129
+
130
+ assert '*/5 * * * *' == str(schedule.cronjob)
131
+
132
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
133
+
134
+ assert "2022-01-01 00:05:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
135
+ assert "2022-01-01 00:10:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
136
+ assert "2022-01-01 00:15:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
137
+ ```
138
+
139
+ ### Pipeline
140
+
141
+ The **Pipeline** object that is the core feature of this project.
142
+
143
+ ```yaml
144
+ # This file should keep under this path: `./root-path/conf-path/*`
145
+ pipeline-name:
146
+ type: ddeutil.workflow.pipeline.Pipeline
147
+ on: 'on_every_5_min'
148
+ params:
149
+ author-run:
150
+ type: str
151
+ run-date:
152
+ type: datetime
153
+ jobs:
154
+ first-job:
155
+ stages:
156
+ - name: "Empty stage do logging to console only!!"
157
+ ```
158
+
159
+ ```python
160
+ from ddeutil.workflow.pipeline import Pipeline
161
+
162
+ pipe = Pipeline.from_loader(name='pipeline-name', externals={})
163
+ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
164
+ ```
165
+
166
+ > [!NOTE]
167
+ > The above parameter can use short declarative statement. You can pass a parameter
168
+ > type to the key of a parameter name but it does not handler default value if you
169
+ > run this pipeline workflow with schedule.
170
+ >
171
+ > ```yaml
172
+ > ...
173
+ > params:
174
+ > author-run: str
175
+ > run-date: datetime
176
+ > ...
177
+ > ```
178
+ >
179
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
180
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
181
+
182
+ ## Usage
183
+
184
+ This is examples that use workflow file for running common Data Engineering
185
+ use-case.
186
+
187
+ > [!IMPORTANT]
188
+ > I recommend you to use the `hook` stage for all actions that you want to do
189
+ > with pipeline activity that you want to orchestrate. Because it able to dynamic
190
+ > an input argument with the same hook function that make you use less time to
191
+ > maintenance your data pipelines.
192
+
193
+ ```yaml
194
+ run_py_local:
195
+ type: pipeline.Pipeline
196
+ on:
197
+ - cronjob: '*/5 * * * *'
198
+ timezone: "Asia/Bangkok"
199
+ params:
200
+ author-run: str
201
+ run-date: datetime
202
+ jobs:
203
+ getting-api-data:
204
+ stages:
205
+ - name: "Retrieve API Data"
206
+ id: retrieve-api
207
+ uses: tasks/get-api-with-oauth-to-s3@requests
208
+ with:
209
+ url: https://open-data/
210
+ auth: ${API_ACCESS_REFRESH_TOKEN}
211
+ aws_s3_path: my-data/open-data/
212
+ # This Authentication code should implement with your custom hook function
213
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
214
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
215
+ ```
216
+
217
+ ## Configuration
218
+
219
+ | Environment | Component | Default | Description |
220
+ |-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
221
+ | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
222
+ | `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
223
+ | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
224
+ | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
225
+ | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
226
+ | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
227
+ | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
228
+ | `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
229
+ | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
230
+ | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
231
+
232
+
233
+ **Application**:
234
+
235
+ | Environment | Default | Description |
236
+ |-------------------------------------|---------|-------------|
237
+ | `WORKFLOW_APP_PROCESS_WORKER` | 2 | |
238
+ | `WORKFLOW_APP_PIPELINE_PER_PROCESS` | 100 | |
239
+
240
+ **API server**:
241
+
242
+ | Environment | Default | Description |
243
+ |-----------------------|--------------------------------------------------------|--------------------------------------------------------------------|
244
+ | `WORKFLOW_API_DB_URL` | postgresql+asyncpg://user:pass@localhost:5432/schedule | A Database URL that will pass to SQLAlchemy create_engine function |
245
+
246
+ ## Future
247
+
248
+ The current milestone that will develop and necessary features that should to
249
+ implement on this project.
250
+
251
+ - ...
252
+
253
+ ## Deployment
254
+
255
+ This package able to run as a application service for receive manual trigger
256
+ from the master node via RestAPI or use to be Scheduler background service
257
+ like crontab job but via Python API.
258
+
259
+ ### Schedule Service
260
+
261
+ ```shell
262
+ (venv) $ python src.ddeutil.workflow.app
263
+ ```
264
+
265
+ ### API Server
266
+
267
+ ```shell
268
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
269
+ ```
270
+
271
+ > [!NOTE]
272
+ > If this package already deploy, it able to use
273
+ > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
@@ -0,0 +1,240 @@
1
+ # Workflow
2
+
3
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
4
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
5
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
6
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
7
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
8
+
9
+ **Table of Contents**:
10
+
11
+ - [Installation](#installation)
12
+ - [Getting Started](#getting-started)
13
+ - [On](#on)
14
+ - [Pipeline](#pipeline)
15
+ - [Usage](#usage)
16
+ - [Configuration](#configuration)
17
+ - [Future](#future)
18
+ - [Deployment](#deployment)
19
+
20
+ The **Lightweight workflow orchestration** with less dependencies the was created
21
+ for easy to make a simple metadata driven for data pipeline orchestration.
22
+ It can to use for data operator by a `.yaml` template.
23
+
24
+ > [!WARNING]
25
+ > This package provide only orchestration workload. That mean you should not use
26
+ > workflow stage to process any large data which use lot of compute usecase.
27
+
28
+ In my opinion, I think it should not create duplicate pipeline codes if I can
29
+ write with dynamic input parameters on the one template pipeline that just change
30
+ the input parameters per use-case instead.
31
+ This way I can handle a lot of logical pipelines in our orgs with only metadata
32
+ configuration. It called **Metadata Driven Data Pipeline**.
33
+
34
+ Next, we should get some monitoring tools for manage logging that return from
35
+ pipeline running. Because it not show us what is a use-case that running data
36
+ pipeline.
37
+
38
+ > [!NOTE]
39
+ > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
40
+ > and all of config file from several data orchestration framework tools from my
41
+ > experience on Data Engineer.
42
+
43
+ **Rules of This Workflow engine**:
44
+
45
+ 1. Minimum unit of scheduling is 1 minute
46
+ 2. Cannot re-run only failed stage and its pending downstream
47
+ 3. All parallel tasks inside workflow engine use Threading
48
+ (Because Python 3.13 unlock GIL)
49
+
50
+ ## Installation
51
+
52
+ This project need `ddeutil-io` extension namespace packages. If you want to install
53
+ this package with application add-ons, you should add `app` in installation;
54
+
55
+ | Usecase | Install Optional | Support |
56
+ |-------------------|------------------------------------------|--------------------|
57
+ | Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
58
+ | Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
59
+ | FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
60
+
61
+
62
+ > I added this feature to the main milestone.
63
+ >
64
+ > **Docker Images** supported:
65
+ >
66
+ > | Docker Image | Python Version | Support |
67
+ > |-----------------------------|----------------|---------|
68
+ > | ddeutil-workflow:latest | `3.9` | :x: |
69
+ > | ddeutil-workflow:python3.10 | `3.10` | :x: |
70
+ > | ddeutil-workflow:python3.11 | `3.11` | :x: |
71
+ > | ddeutil-workflow:python3.12 | `3.12` | :x: |
72
+
73
+ ## Getting Started
74
+
75
+ The main feature of this project is the `Pipeline` object that can call any
76
+ registries function. The pipeline can handle everything that you want to do, it
77
+ will passing parameters and catching the output for re-use it to next step.
78
+
79
+ ### On
80
+
81
+ The **On** is schedule object that receive crontab value and able to generate
82
+ datetime value with next or previous with any start point of an input datetime.
83
+
84
+ ```yaml
85
+ # This file should keep under this path: `./root-path/conf-path/*`
86
+ on_every_5_min:
87
+ type: on.On
88
+ cron: "*/5 * * * *"
89
+ ```
90
+
91
+ ```python
92
+ from ddeutil.workflow.on import On
93
+
94
+ # NOTE: Start load the on data from `.yaml` template file with this key.
95
+ schedule = On.from_loader(name='on_every_5_min', externals={})
96
+
97
+ assert '*/5 * * * *' == str(schedule.cronjob)
98
+
99
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
100
+
101
+ assert "2022-01-01 00:05:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
102
+ assert "2022-01-01 00:10:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
103
+ assert "2022-01-01 00:15:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
104
+ ```
105
+
106
+ ### Pipeline
107
+
108
+ The **Pipeline** object that is the core feature of this project.
109
+
110
+ ```yaml
111
+ # This file should keep under this path: `./root-path/conf-path/*`
112
+ pipeline-name:
113
+ type: ddeutil.workflow.pipeline.Pipeline
114
+ on: 'on_every_5_min'
115
+ params:
116
+ author-run:
117
+ type: str
118
+ run-date:
119
+ type: datetime
120
+ jobs:
121
+ first-job:
122
+ stages:
123
+ - name: "Empty stage do logging to console only!!"
124
+ ```
125
+
126
+ ```python
127
+ from ddeutil.workflow.pipeline import Pipeline
128
+
129
+ pipe = Pipeline.from_loader(name='pipeline-name', externals={})
130
+ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
131
+ ```
132
+
133
+ > [!NOTE]
134
+ > The above parameter can use short declarative statement. You can pass a parameter
135
+ > type to the key of a parameter name but it does not handler default value if you
136
+ > run this pipeline workflow with schedule.
137
+ >
138
+ > ```yaml
139
+ > ...
140
+ > params:
141
+ > author-run: str
142
+ > run-date: datetime
143
+ > ...
144
+ > ```
145
+ >
146
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
147
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
148
+
149
+ ## Usage
150
+
151
+ This is examples that use workflow file for running common Data Engineering
152
+ use-case.
153
+
154
+ > [!IMPORTANT]
155
+ > I recommend you to use the `hook` stage for all actions that you want to do
156
+ > with pipeline activity that you want to orchestrate. Because it able to dynamic
157
+ > an input argument with the same hook function that make you use less time to
158
+ > maintenance your data pipelines.
159
+
160
+ ```yaml
161
+ run_py_local:
162
+ type: pipeline.Pipeline
163
+ on:
164
+ - cronjob: '*/5 * * * *'
165
+ timezone: "Asia/Bangkok"
166
+ params:
167
+ author-run: str
168
+ run-date: datetime
169
+ jobs:
170
+ getting-api-data:
171
+ stages:
172
+ - name: "Retrieve API Data"
173
+ id: retrieve-api
174
+ uses: tasks/get-api-with-oauth-to-s3@requests
175
+ with:
176
+ url: https://open-data/
177
+ auth: ${API_ACCESS_REFRESH_TOKEN}
178
+ aws_s3_path: my-data/open-data/
179
+ # This Authentication code should implement with your custom hook function
180
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
181
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
182
+ ```
183
+
184
+ ## Configuration
185
+
186
+ | Environment | Component | Default | Description |
187
+ |-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
188
+ | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
189
+ | `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
190
+ | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
191
+ | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
192
+ | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
193
+ | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
194
+ | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
195
+ | `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
196
+ | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
197
+ | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
198
+
199
+
200
+ **Application**:
201
+
202
+ | Environment | Default | Description |
203
+ |-------------------------------------|---------|-------------|
204
+ | `WORKFLOW_APP_PROCESS_WORKER` | 2 | |
205
+ | `WORKFLOW_APP_PIPELINE_PER_PROCESS` | 100 | |
206
+
207
+ **API server**:
208
+
209
+ | Environment | Default | Description |
210
+ |-----------------------|--------------------------------------------------------|--------------------------------------------------------------------|
211
+ | `WORKFLOW_API_DB_URL` | postgresql+asyncpg://user:pass@localhost:5432/schedule | A Database URL that will pass to SQLAlchemy create_engine function |
212
+
213
+ ## Future
214
+
215
+ The current milestone that will develop and necessary features that should to
216
+ implement on this project.
217
+
218
+ - ...
219
+
220
+ ## Deployment
221
+
222
+ This package able to run as a application service for receive manual trigger
223
+ from the master node via RestAPI or use to be Scheduler background service
224
+ like crontab job but via Python API.
225
+
226
+ ### Schedule Service
227
+
228
+ ```shell
229
+ (venv) $ python src.ddeutil.workflow.app
230
+ ```
231
+
232
+ ### API Server
233
+
234
+ ```shell
235
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
236
+ ```
237
+
238
+ > [!NOTE]
239
+ > If this package already deploy, it able to use
240
+ > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ddeutil-workflow"
7
- description = "Data Developer & Engineer Workflow Utility Objects"
7
+ description = "Lightweight workflow orchestration with less dependencies"
8
8
  readme = {file = "README.md", content-type = "text/markdown"}
9
9
  license = {text = "MIT"}
10
10
  authors = [{ name = "ddeutils", email = "korawich.anu@gmail.com" }]
11
- keywords = ['data', 'workflow', 'utility', 'pipeline']
11
+ keywords = ['orchestration', 'workflow']
12
12
  classifiers = [
13
13
  "Topic :: Utilities",
14
14
  "Natural Language :: English",
@@ -28,16 +28,17 @@ dependencies = [
28
28
  "fmtutil",
29
29
  "ddeutil-io",
30
30
  "python-dotenv==1.0.1",
31
+ "typer==0.12.4",
31
32
  ]
32
33
  dynamic = ["version"]
33
34
 
34
35
  [project.optional-dependencies]
35
- app = [
36
+ schedule = [
36
37
  "schedule==1.2.2,<2.0.0",
37
38
  ]
38
39
  api = [
39
- "fastapi[standard]==0.112.0",
40
- "apscheduler[sqlalchemy]==3.10.4,<4.0.0",
40
+ "fastapi[standard]==0.112.1",
41
+ # TODO: This package can migrate to use /cron/
41
42
  "croniter==3.0.3",
42
43
  ]
43
44
 
@@ -45,6 +46,9 @@ api = [
45
46
  Homepage = "https://github.com/ddeutils/ddeutil-workflow/"
46
47
  "Source Code" = "https://github.com/ddeutils/ddeutil-workflow/"
47
48
 
49
+ [project.scripts]
50
+ workflow = "ddeutil.workflow.cli:app"
51
+
48
52
  [tool.setuptools.dynamic]
49
53
  version = {attr = "ddeutil.workflow.__about__.__version__"}
50
54
 
@@ -62,8 +66,7 @@ concurrency = ["thread", "multiprocessing"]
62
66
  source = ["ddeutil.workflow", "tests"]
63
67
  omit = [
64
68
  "scripts/",
65
- "src/ddeutil/workflow/api.py",
66
- "src/ddeutil/workflow/app.py",
69
+ # Omit this files because it does not ready to production.
67
70
  "src/ddeutil/workflow/repeat.py",
68
71
  "src/ddeutil/workflow/route.py",
69
72
  "tests/utils.py",
@@ -0,0 +1 @@
1
+ __version__: str = "0.0.9"
@@ -10,22 +10,11 @@ from .exceptions import (
10
10
  StageException,
11
11
  UtilException,
12
12
  )
13
- from .on import AwsOn, On
14
- from .pipeline import Job, Pipeline
15
- from .stage import (
16
- BashStage,
17
- EmptyStage,
18
- HookStage,
19
- PyStage,
20
- Stage,
21
- TriggerStage,
22
- )
13
+ from .on import On, interval2crontab
14
+ from .pipeline import Job, Pipeline, Strategy
15
+ from .stage import Stage, handler_result
23
16
  from .utils import (
24
- ChoiceParam,
25
- DatetimeParam,
26
- IntParam,
27
17
  Param,
28
- StrParam,
29
18
  dash2underscore,
30
19
  param2template,
31
20
  )
@@ -0,0 +1,89 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ import uuid
12
+ from queue import Empty, Queue
13
+
14
+ from ddeutil.core import str2bool
15
+ from dotenv import load_dotenv
16
+ from fastapi import FastAPI
17
+ from fastapi.middleware.gzip import GZipMiddleware
18
+ from fastapi.responses import UJSONResponse
19
+ from pydantic import BaseModel
20
+
21
+ from .repeat import repeat_every
22
+
23
+ load_dotenv()
24
+ logger = logging.getLogger(__name__)
25
+ logging.basicConfig(
26
+ level=logging.DEBUG,
27
+ format=(
28
+ "%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, %(thread)-5d) "
29
+ "[%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
30
+ ),
31
+ handlers=[logging.StreamHandler()],
32
+ datefmt="%Y-%m-%d %H:%M:%S",
33
+ )
34
+
35
+
36
+ app = FastAPI()
37
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
38
+ app.queue = Queue()
39
+ app.output_dict = {}
40
+ app.queue_limit = 2
41
+
42
+
43
+ @app.on_event("startup")
44
+ @repeat_every(seconds=10, logger=logger)
45
+ def broker_upper_messages():
46
+ """Broker for receive message from the `/upper` path and change it to upper
47
+ case.
48
+ """
49
+ for _ in range(app.queue_limit):
50
+ try:
51
+ obj = app.queue.get_nowait()
52
+ app.output_dict[obj["request_id"]] = obj["text"].upper()
53
+ logger.info(f"Upper message: {app.output_dict}")
54
+ except Empty:
55
+ pass
56
+
57
+
58
+ class Payload(BaseModel):
59
+ text: str
60
+
61
+
62
+ async def get_result(request_id):
63
+ """Get data from output dict that global."""
64
+ while True:
65
+ if request_id in app.output_dict:
66
+ result = app.output_dict[request_id]
67
+ del app.output_dict[request_id]
68
+ return {"message": result}
69
+ await asyncio.sleep(0.001)
70
+
71
+
72
+ @app.post("/upper", response_class=UJSONResponse)
73
+ async def message_upper(payload: Payload):
74
+ request_id: str = str(uuid.uuid4())
75
+ app.queue.put(
76
+ {"text": payload.text, "request_id": request_id},
77
+ )
78
+ return await get_result(request_id)
79
+
80
+
81
+ if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")):
82
+ from .route import workflow
83
+
84
+ app.include_router(workflow)
85
+
86
+ if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")):
87
+ from .route import schedule
88
+
89
+ app.include_router(schedule)