ddeutil-workflow 0.0.8__tar.gz → 0.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. ddeutil_workflow-0.0.10/PKG-INFO +182 -0
  2. ddeutil_workflow-0.0.10/README.md +151 -0
  3. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/pyproject.toml +10 -10
  4. ddeutil_workflow-0.0.10/src/ddeutil/workflow/__about__.py +1 -0
  5. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/__init__.py +3 -14
  6. ddeutil_workflow-0.0.10/src/ddeutil/workflow/api.py +89 -0
  7. ddeutil_workflow-0.0.10/src/ddeutil/workflow/cli.py +134 -0
  8. ddeutil_workflow-0.0.8/src/ddeutil/workflow/scheduler.py → ddeutil_workflow-0.0.10/src/ddeutil/workflow/cron.py +116 -26
  9. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/exceptions.py +3 -0
  10. ddeutil_workflow-0.0.10/src/ddeutil/workflow/log.py +184 -0
  11. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/on.py +27 -18
  12. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/pipeline.py +527 -234
  13. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/repeat.py +71 -40
  14. ddeutil_workflow-0.0.10/src/ddeutil/workflow/route.py +92 -0
  15. ddeutil_workflow-0.0.10/src/ddeutil/workflow/scheduler.py +620 -0
  16. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/stage.py +158 -82
  17. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/utils.py +273 -46
  18. ddeutil_workflow-0.0.10/src/ddeutil_workflow.egg-info/PKG-INFO +182 -0
  19. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil_workflow.egg-info/SOURCES.txt +15 -5
  20. ddeutil_workflow-0.0.10/src/ddeutil_workflow.egg-info/entry_points.txt +2 -0
  21. ddeutil_workflow-0.0.10/src/ddeutil_workflow.egg-info/requires.txt +9 -0
  22. ddeutil_workflow-0.0.10/tests/test__conf_exist.py +11 -0
  23. ddeutil_workflow-0.0.10/tests/test_conf.py +8 -0
  24. ddeutil_workflow-0.0.8/tests/test_scheduler.py → ddeutil_workflow-0.0.10/tests/test_cron.py +74 -18
  25. ddeutil_workflow-0.0.10/tests/test_job.py +18 -0
  26. ddeutil_workflow-0.0.10/tests/test_job_py.py +33 -0
  27. ddeutil_workflow-0.0.10/tests/test_log.py +24 -0
  28. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_on.py +21 -0
  29. ddeutil_workflow-0.0.10/tests/test_params.py +13 -0
  30. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline.py +8 -0
  31. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_desc.py +1 -1
  32. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_matrix.py +4 -4
  33. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_on.py +1 -1
  34. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_params.py +1 -1
  35. ddeutil_workflow-0.0.10/tests/test_pipeline_run.py +22 -0
  36. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_run_raise.py +1 -1
  37. ddeutil_workflow-0.0.10/tests/test_poke.py +13 -0
  38. ddeutil_workflow-0.0.10/tests/test_scheduler.py +12 -0
  39. ddeutil_workflow-0.0.10/tests/test_stage.py +31 -0
  40. ddeutil_workflow-0.0.10/tests/test_stage_bash.py +40 -0
  41. ddeutil_workflow-0.0.10/tests/test_stage_condition.py +14 -0
  42. ddeutil_workflow-0.0.10/tests/test_stage_hook.py +56 -0
  43. ddeutil_workflow-0.0.10/tests/test_stage_py.py +56 -0
  44. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_stage_trigger.py +3 -3
  45. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_utils_result.py +11 -22
  46. ddeutil_workflow-0.0.8/tests/test_utils_param2template.py → ddeutil_workflow-0.0.10/tests/test_utils_template.py +54 -0
  47. ddeutil_workflow-0.0.8/PKG-INFO +0 -266
  48. ddeutil_workflow-0.0.8/README.md +0 -233
  49. ddeutil_workflow-0.0.8/src/ddeutil/workflow/__about__.py +0 -1
  50. ddeutil_workflow-0.0.8/src/ddeutil/workflow/api.py +0 -120
  51. ddeutil_workflow-0.0.8/src/ddeutil/workflow/app.py +0 -45
  52. ddeutil_workflow-0.0.8/src/ddeutil/workflow/loader.py +0 -80
  53. ddeutil_workflow-0.0.8/src/ddeutil/workflow/log.py +0 -79
  54. ddeutil_workflow-0.0.8/src/ddeutil/workflow/route.py +0 -78
  55. ddeutil_workflow-0.0.8/src/ddeutil_workflow.egg-info/PKG-INFO +0 -266
  56. ddeutil_workflow-0.0.8/src/ddeutil_workflow.egg-info/requires.txt +0 -11
  57. ddeutil_workflow-0.0.8/tests/test__conf_exist.py +0 -11
  58. ddeutil_workflow-0.0.8/tests/test_job.py +0 -7
  59. ddeutil_workflow-0.0.8/tests/test_pipeline_on_ready.py +0 -26
  60. ddeutil_workflow-0.0.8/tests/test_pipeline_run.py +0 -107
  61. ddeutil_workflow-0.0.8/tests/test_stage.py +0 -15
  62. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/LICENSE +0 -0
  63. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/setup.cfg +0 -0
  64. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil/workflow/__types.py +0 -0
  65. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  66. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
  67. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test__local_and_global.py +0 -0
  68. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test__regex.py +0 -0
  69. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_if.py +0 -0
  70. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_pipeline_task.py +0 -0
  71. {ddeutil_workflow-0.0.8 → ddeutil_workflow-0.0.10}/tests/test_utils.py +0 -0
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.1
2
+ Name: ddeutil-workflow
3
+ Version: 0.0.10
4
+ Summary: Lightweight workflow orchestration with less dependencies
5
+ Author-email: ddeutils <korawich.anu@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
8
+ Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
9
+ Keywords: orchestration,workflow
10
+ Classifier: Topic :: Utilities
11
+ Classifier: Natural Language :: English
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Requires-Python: >=3.9.13
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: ddeutil-io
25
+ Requires-Dist: python-dotenv==1.0.1
26
+ Requires-Dist: typer<1.0.0,==0.12.5
27
+ Provides-Extra: schedule
28
+ Requires-Dist: schedule<2.0.0,==1.2.2; extra == "schedule"
29
+ Provides-Extra: api
30
+ Requires-Dist: fastapi[standard]<1.0.0,==0.112.2; extra == "api"
31
+
32
+ # Workflow
33
+
34
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
35
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
36
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
37
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
38
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
39
+
40
+ The **Lightweight workflow orchestration** with less dependencies the was created
41
+ for easy to make a simple metadata driven for data pipeline orchestration.
42
+ It can to use for data operator by a `.yaml` template.
43
+
44
+ > [!WARNING]
45
+ > This package provide only orchestration workload. That mean you should not use
46
+ > workflow stage to process any large data which use lot of compute usecase.
47
+
48
+ In my opinion, I think it should not create duplicate pipeline codes if I can
49
+ write with dynamic input parameters on the one template pipeline that just change
50
+ the input parameters per use-case instead.
51
+ This way I can handle a lot of logical pipelines in our orgs with only metadata
52
+ configuration. It called **Metadata Driven Data Pipeline**.
53
+
54
+ Next, we should get some monitoring tools for manage logging that return from
55
+ pipeline running. Because it not show us what is a use-case that running data
56
+ pipeline.
57
+
58
+ > [!NOTE]
59
+ > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
60
+ > and all of config file from several data orchestration framework tools from my
61
+ > experience on Data Engineer.
62
+
63
+ **Rules of This Workflow engine**:
64
+
65
+ 1. Minimum unit of scheduling is 1 minute
66
+ 2. Cannot re-run only failed stage and its pending downstream
67
+ 3. All parallel tasks inside workflow engine use Threading
68
+ (Because Python 3.13 unlock GIL)
69
+
70
+ ## Installation
71
+
72
+ This project need `ddeutil-io` extension namespace packages. If you want to install
73
+ this package with application add-ons, you should add `app` in installation;
74
+
75
+ | Usecase | Install Optional | Support |
76
+ |-------------------|------------------------------------------|--------------------|
77
+ | Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
78
+ | Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
79
+ | FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
80
+
81
+
82
+ > I added this feature to the main milestone.
83
+ >
84
+ > **Docker Images** supported:
85
+ >
86
+ > | Docker Image | Python Version | Support |
87
+ > |-----------------------------|----------------|---------|
88
+ > | ddeutil-workflow:latest | `3.9` | :x: |
89
+ > | ddeutil-workflow:python3.10 | `3.10` | :x: |
90
+ > | ddeutil-workflow:python3.11 | `3.11` | :x: |
91
+ > | ddeutil-workflow:python3.12 | `3.12` | :x: |
92
+
93
+ ## Usage
94
+
95
+ This is examples that use workflow file for running common Data Engineering
96
+ use-case.
97
+
98
+ > [!IMPORTANT]
99
+ > I recommend you to use the `hook` stage for all actions that you want to do
100
+ > with pipeline activity that you want to orchestrate. Because it able to dynamic
101
+ > an input argument with the same hook function that make you use less time to
102
+ > maintenance your data pipelines.
103
+
104
+ ```yaml
105
+ run_py_local:
106
+ type: pipeline.Pipeline
107
+ on:
108
+ - cronjob: '*/5 * * * *'
109
+ timezone: "Asia/Bangkok"
110
+ params:
111
+ author-run: str
112
+ run-date: datetime
113
+ jobs:
114
+ getting-api-data:
115
+ stages:
116
+ - name: "Retrieve API Data"
117
+ id: retrieve-api
118
+ uses: tasks/get-api-with-oauth-to-s3@requests
119
+ with:
120
+ url: https://open-data/
121
+ auth: ${API_ACCESS_REFRESH_TOKEN}
122
+ aws_s3_path: my-data/open-data/
123
+
124
+ # This Authentication code should implement with your custom hook function.
125
+ # The template allow you to use environment variable.
126
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
127
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
128
+ ```
129
+
130
+ ## Configuration
131
+
132
+ | Environment | Component | Default | Description |
133
+ |-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
134
+ | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
135
+ | `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
136
+ | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
137
+ | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
138
+ | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
139
+ | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
140
+ | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
141
+ | `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
142
+ | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
143
+ | `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
144
+ | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
145
+
146
+
147
+ **Application**:
148
+
149
+ | Environment | Default | Description |
150
+ |-------------------------------------|----------------------------------|-------------------------------------------------------------------------|
151
+ | `WORKFLOW_APP_PROCESS_WORKER` | 2 | The maximum process worker number that run in scheduler app module |
152
+ | `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | 100 | A schedule per process that run parallel |
153
+ | `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
154
+
155
+ **API server**:
156
+
157
+ | Environment | Default | Description |
158
+ |--------------------------------------|---------|-----------------------------------------------------------------------------------|
159
+ | `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
160
+ | `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
161
+
162
+ ## Deployment
163
+
164
+ This package able to run as a application service for receive manual trigger
165
+ from the master node via RestAPI or use to be Scheduler background service
166
+ like crontab job but via Python API.
167
+
168
+ ### Schedule Service
169
+
170
+ ```shell
171
+ (venv) $ python src.ddeutil.workflow.app
172
+ ```
173
+
174
+ ### API Server
175
+
176
+ ```shell
177
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
178
+ ```
179
+
180
+ > [!NOTE]
181
+ > If this package already deploy, it able to use
182
+ > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --workers 4`
@@ -0,0 +1,151 @@
1
+ # Workflow
2
+
3
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
4
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
5
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
6
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
7
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
8
+
9
+ The **Lightweight workflow orchestration** with less dependencies the was created
10
+ for easy to make a simple metadata driven for data pipeline orchestration.
11
+ It can to use for data operator by a `.yaml` template.
12
+
13
+ > [!WARNING]
14
+ > This package provide only orchestration workload. That mean you should not use
15
+ > workflow stage to process any large data which use lot of compute usecase.
16
+
17
+ In my opinion, I think it should not create duplicate pipeline codes if I can
18
+ write with dynamic input parameters on the one template pipeline that just change
19
+ the input parameters per use-case instead.
20
+ This way I can handle a lot of logical pipelines in our orgs with only metadata
21
+ configuration. It called **Metadata Driven Data Pipeline**.
22
+
23
+ Next, we should get some monitoring tools for manage logging that return from
24
+ pipeline running. Because it not show us what is a use-case that running data
25
+ pipeline.
26
+
27
+ > [!NOTE]
28
+ > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
29
+ > and all of config file from several data orchestration framework tools from my
30
+ > experience on Data Engineer.
31
+
32
+ **Rules of This Workflow engine**:
33
+
34
+ 1. Minimum unit of scheduling is 1 minute
35
+ 2. Cannot re-run only failed stage and its pending downstream
36
+ 3. All parallel tasks inside workflow engine use Threading
37
+ (Because Python 3.13 unlock GIL)
38
+
39
+ ## Installation
40
+
41
+ This project need `ddeutil-io` extension namespace packages. If you want to install
42
+ this package with application add-ons, you should add `app` in installation;
43
+
44
+ | Usecase | Install Optional | Support |
45
+ |-------------------|------------------------------------------|--------------------|
46
+ | Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
47
+ | Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
48
+ | FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
49
+
50
+
51
+ > I added this feature to the main milestone.
52
+ >
53
+ > **Docker Images** supported:
54
+ >
55
+ > | Docker Image | Python Version | Support |
56
+ > |-----------------------------|----------------|---------|
57
+ > | ddeutil-workflow:latest | `3.9` | :x: |
58
+ > | ddeutil-workflow:python3.10 | `3.10` | :x: |
59
+ > | ddeutil-workflow:python3.11 | `3.11` | :x: |
60
+ > | ddeutil-workflow:python3.12 | `3.12` | :x: |
61
+
62
+ ## Usage
63
+
64
+ This is examples that use workflow file for running common Data Engineering
65
+ use-case.
66
+
67
+ > [!IMPORTANT]
68
+ > I recommend you to use the `hook` stage for all actions that you want to do
69
+ > with pipeline activity that you want to orchestrate. Because it able to dynamic
70
+ > an input argument with the same hook function that make you use less time to
71
+ > maintenance your data pipelines.
72
+
73
+ ```yaml
74
+ run_py_local:
75
+ type: pipeline.Pipeline
76
+ on:
77
+ - cronjob: '*/5 * * * *'
78
+ timezone: "Asia/Bangkok"
79
+ params:
80
+ author-run: str
81
+ run-date: datetime
82
+ jobs:
83
+ getting-api-data:
84
+ stages:
85
+ - name: "Retrieve API Data"
86
+ id: retrieve-api
87
+ uses: tasks/get-api-with-oauth-to-s3@requests
88
+ with:
89
+ url: https://open-data/
90
+ auth: ${API_ACCESS_REFRESH_TOKEN}
91
+ aws_s3_path: my-data/open-data/
92
+
93
+ # This Authentication code should implement with your custom hook function.
94
+ # The template allow you to use environment variable.
95
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
96
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
97
+ ```
98
+
99
+ ## Configuration
100
+
101
+ | Environment | Component | Default | Description |
102
+ |-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
103
+ | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
104
+ | `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
105
+ | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
106
+ | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
107
+ | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
108
+ | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
109
+ | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
110
+ | `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
111
+ | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
112
+ | `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
113
+ | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
114
+
115
+
116
+ **Application**:
117
+
118
+ | Environment | Default | Description |
119
+ |-------------------------------------|----------------------------------|-------------------------------------------------------------------------|
120
+ | `WORKFLOW_APP_PROCESS_WORKER` | 2 | The maximum process worker number that run in scheduler app module |
121
+ | `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | 100 | A schedule per process that run parallel |
122
+ | `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
123
+
124
+ **API server**:
125
+
126
+ | Environment | Default | Description |
127
+ |--------------------------------------|---------|-----------------------------------------------------------------------------------|
128
+ | `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
129
+ | `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
130
+
131
+ ## Deployment
132
+
133
+ This package able to run as a application service for receive manual trigger
134
+ from the master node via RestAPI or use to be Scheduler background service
135
+ like crontab job but via Python API.
136
+
137
+ ### Schedule Service
138
+
139
+ ```shell
140
+ (venv) $ python src.ddeutil.workflow.app
141
+ ```
142
+
143
+ ### API Server
144
+
145
+ ```shell
146
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
147
+ ```
148
+
149
+ > [!NOTE]
150
+ > If this package already deploy, it able to use
151
+ > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --workers 4`
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ddeutil-workflow"
7
- description = "Data Developer & Engineer Workflow Utility Objects"
7
+ description = "Lightweight workflow orchestration with less dependencies"
8
8
  readme = {file = "README.md", content-type = "text/markdown"}
9
9
  license = {text = "MIT"}
10
10
  authors = [{ name = "ddeutils", email = "korawich.anu@gmail.com" }]
11
- keywords = ['data', 'workflow', 'utility', 'pipeline']
11
+ keywords = ['orchestration', 'workflow']
12
12
  classifiers = [
13
13
  "Topic :: Utilities",
14
14
  "Natural Language :: English",
@@ -25,26 +25,27 @@ classifiers = [
25
25
  ]
26
26
  requires-python = ">=3.9.13"
27
27
  dependencies = [
28
- "fmtutil",
29
28
  "ddeutil-io",
30
29
  "python-dotenv==1.0.1",
30
+ "typer==0.12.5,<1.0.0",
31
31
  ]
32
32
  dynamic = ["version"]
33
33
 
34
34
  [project.optional-dependencies]
35
- app = [
35
+ schedule = [
36
36
  "schedule==1.2.2,<2.0.0",
37
37
  ]
38
38
  api = [
39
- "fastapi[standard]==0.112.0",
40
- "apscheduler[sqlalchemy]==3.10.4,<4.0.0",
41
- "croniter==3.0.3",
39
+ "fastapi[standard]==0.112.2,<1.0.0",
42
40
  ]
43
41
 
44
42
  [project.urls]
45
43
  Homepage = "https://github.com/ddeutils/ddeutil-workflow/"
46
44
  "Source Code" = "https://github.com/ddeutils/ddeutil-workflow/"
47
45
 
46
+ [project.scripts]
47
+ ddeutil-workflow = "ddeutil.workflow.cli:cli"
48
+
48
49
  [tool.setuptools.dynamic]
49
50
  version = {attr = "ddeutil.workflow.__about__.__version__"}
50
51
 
@@ -62,8 +63,7 @@ concurrency = ["thread", "multiprocessing"]
62
63
  source = ["ddeutil.workflow", "tests"]
63
64
  omit = [
64
65
  "scripts/",
65
- "src/ddeutil/workflow/api.py",
66
- "src/ddeutil/workflow/app.py",
66
+ # Omit this files because it does not ready to production.
67
67
  "src/ddeutil/workflow/repeat.py",
68
68
  "src/ddeutil/workflow/route.py",
69
69
  "tests/utils.py",
@@ -83,7 +83,7 @@ addopts = [
83
83
  filterwarnings = ["error"]
84
84
  log_cli = true
85
85
  log_cli_level = "DEBUG"
86
- log_cli_format = "%(asctime)s [%(levelname)-7s] %(message)-100s (%(filename)s:%(lineno)s)"
86
+ log_cli_format = "%(asctime)s [%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
87
87
  log_cli_date_format = "%Y%m%d %H:%M:%S"
88
88
 
89
89
  [tool.black]
@@ -0,0 +1 @@
1
+ __version__: str = "0.0.10"
@@ -10,22 +10,11 @@ from .exceptions import (
10
10
  StageException,
11
11
  UtilException,
12
12
  )
13
- from .on import AwsOn, On
14
- from .pipeline import Job, Pipeline
15
- from .stage import (
16
- BashStage,
17
- EmptyStage,
18
- HookStage,
19
- PyStage,
20
- Stage,
21
- TriggerStage,
22
- )
13
+ from .on import On, interval2crontab
14
+ from .pipeline import Job, Pipeline, Strategy
15
+ from .stage import Stage, handler_result
23
16
  from .utils import (
24
- ChoiceParam,
25
- DatetimeParam,
26
- IntParam,
27
17
  Param,
28
- StrParam,
29
18
  dash2underscore,
30
19
  param2template,
31
20
  )
@@ -0,0 +1,89 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import os
10
+ import uuid
11
+ from queue import Empty, Queue
12
+
13
+ from ddeutil.core import str2bool
14
+ from dotenv import load_dotenv
15
+ from fastapi import FastAPI
16
+ from fastapi.middleware.gzip import GZipMiddleware
17
+ from fastapi.responses import UJSONResponse
18
+ from pydantic import BaseModel
19
+
20
+ from .__about__ import __version__
21
+ from .log import get_logger
22
+ from .repeat import repeat_every
23
+
24
+ load_dotenv()
25
+ logger = get_logger("ddeutil.workflow")
26
+
27
+
28
+ app = FastAPI(
29
+ titile="Workflow API",
30
+ description=(
31
+ "This is workflow FastAPI web application that use to manage manual "
32
+ "execute or schedule workflow via RestAPI."
33
+ ),
34
+ version=__version__,
35
+ )
36
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
37
+ app.queue = Queue()
38
+ app.output_dict = {}
39
+ app.queue_limit = 5
40
+
41
+
42
+ @app.on_event("startup")
43
+ @repeat_every(seconds=10)
44
+ def broker_upper_messages():
45
+ """Broker for receive message from the `/upper` path and change it to upper
46
+ case. This broker use interval running in background every 10 seconds.
47
+ """
48
+ for _ in range(app.queue_limit):
49
+ try:
50
+ obj = app.queue.get_nowait()
51
+ app.output_dict[obj["request_id"]] = obj["text"].upper()
52
+ logger.info(f"Upper message: {app.output_dict}")
53
+ except Empty:
54
+ pass
55
+
56
+
57
+ class Payload(BaseModel):
58
+ text: str
59
+
60
+
61
+ async def get_result(request_id):
62
+ """Get data from output dict that global."""
63
+ while True:
64
+ if request_id in app.output_dict:
65
+ result = app.output_dict[request_id]
66
+ del app.output_dict[request_id]
67
+ return {"message": result}
68
+ await asyncio.sleep(0.0025)
69
+
70
+
71
+ @app.post("/upper", response_class=UJSONResponse)
72
+ async def message_upper(payload: Payload):
73
+ """Convert message from any case to the upper case."""
74
+ request_id: str = str(uuid.uuid4())
75
+ app.queue.put(
76
+ {"text": payload.text, "request_id": request_id},
77
+ )
78
+ return await get_result(request_id)
79
+
80
+
81
+ if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")):
82
+ from .route import workflow
83
+
84
+ app.include_router(workflow)
85
+
86
+ if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")):
87
+ from .route import schedule
88
+
89
+ app.include_router(schedule)
@@ -0,0 +1,134 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import os
10
+ from datetime import datetime
11
+ from enum import Enum
12
+ from typing import Annotated, Optional
13
+ from zoneinfo import ZoneInfo
14
+
15
+ from ddeutil.core import str2list
16
+ from typer import Argument, Option, Typer
17
+
18
+ from .log import get_logger
19
+
20
+ logger = get_logger("ddeutil.workflow")
21
+ cli: Typer = Typer()
22
+ cli_log: Typer = Typer()
23
+ cli.add_typer(
24
+ cli_log,
25
+ name="log",
26
+ help="Logging of workflow CLI",
27
+ )
28
+
29
+
30
+ @cli.command()
31
+ def run(
32
+ pipeline: Annotated[
33
+ str,
34
+ Argument(help="A pipeline name that want to run manually"),
35
+ ],
36
+ params: Annotated[
37
+ str,
38
+ Argument(
39
+ help="A json string for parameters of this pipeline execution."
40
+ ),
41
+ ],
42
+ ):
43
+ """Run pipeline workflow manually with an input custom parameters that able
44
+ to receive with pipeline params config.
45
+ """
46
+ logger.info(f"Running pipeline name: {pipeline}")
47
+ logger.info(f"... with Parameters: {json.dumps(json.loads(params))}")
48
+
49
+
50
+ @cli.command()
51
+ def schedule(
52
+ stop: Annotated[
53
+ Optional[datetime],
54
+ Argument(
55
+ formats=["%Y-%m-%d", "%Y-%m-%d %H:%M:%S"],
56
+ help="A stopping datetime that want to stop on schedule app.",
57
+ ),
58
+ ] = None,
59
+ excluded: Annotated[
60
+ Optional[str],
61
+ Argument(help="A list of exclude workflow name in str."),
62
+ ] = None,
63
+ externals: Annotated[
64
+ Optional[str],
65
+ Argument(
66
+ help="A json string for parameters of this pipeline execution."
67
+ ),
68
+ ] = None,
69
+ ):
70
+ """Start workflow scheduler that will call workflow function from scheduler
71
+ module.
72
+ """
73
+ excluded: list[str] = str2list(excluded) if excluded else []
74
+ externals: str = externals or "{}"
75
+ if stop:
76
+ stop: datetime = stop.astimezone(
77
+ tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
78
+ )
79
+
80
+ from .scheduler import workflow
81
+
82
+ # NOTE: Start running workflow scheduler application.
83
+ workflow_rs: list[str] = workflow(
84
+ stop=stop, excluded=excluded, externals=json.loads(externals)
85
+ )
86
+ logger.info(f"Application run success: {workflow_rs}")
87
+
88
+
89
+ @cli_log.command("pipeline-get")
90
+ def pipeline_log_get(
91
+ name: Annotated[
92
+ str,
93
+ Argument(help="A pipeline name that want to getting log"),
94
+ ],
95
+ limit: Annotated[
96
+ int,
97
+ Argument(help="A number of the limitation of logging"),
98
+ ] = 100,
99
+ desc: Annotated[
100
+ bool,
101
+ Option(
102
+ "--desc",
103
+ help="A descending flag that order by logging release datetime.",
104
+ ),
105
+ ] = True,
106
+ ):
107
+ logger.info(f"{name} : limit {limit} : desc: {desc}")
108
+ return [""]
109
+
110
+
111
+ class LogMode(str, Enum):
112
+ get = "get"
113
+ delete = "delete"
114
+
115
+
116
+ @cli_log.command("pipeline-delete")
117
+ def pipeline_log_delete(
118
+ mode: Annotated[
119
+ LogMode,
120
+ Argument(case_sensitive=True),
121
+ ]
122
+ ):
123
+ logger.info(mode)
124
+
125
+
126
+ @cli.callback()
127
+ def main():
128
+ """
129
+ Manage workflow with CLI.
130
+ """
131
+
132
+
133
+ if __name__ == "__main__":
134
+ cli()