ddeutil-workflow 0.0.10__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/PKG-INFO +66 -70
  2. ddeutil_workflow-0.0.11/README.md +148 -0
  3. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/pyproject.toml +2 -4
  4. ddeutil_workflow-0.0.11/src/ddeutil/workflow/__about__.py +1 -0
  5. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/__init__.py +3 -2
  6. ddeutil_workflow-0.0.11/src/ddeutil/workflow/api.py +157 -0
  7. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/cli.py +14 -14
  8. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/exceptions.py +6 -6
  9. ddeutil_workflow-0.0.11/src/ddeutil/workflow/job.py +572 -0
  10. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/log.py +10 -10
  11. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/repeat.py +4 -2
  12. ddeutil_workflow-0.0.11/src/ddeutil/workflow/route.py +221 -0
  13. ddeutil_workflow-0.0.11/src/ddeutil/workflow/scheduler.py +1243 -0
  14. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/stage.py +12 -12
  15. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/utils.py +4 -4
  16. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/PKG-INFO +66 -70
  17. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/SOURCES.txt +1 -1
  18. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/requires.txt +2 -4
  19. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_job.py +1 -1
  20. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_job_py.py +7 -7
  21. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_log.py +2 -2
  22. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline.py +3 -3
  23. ddeutil_workflow-0.0.11/tests/test_pipeline_desc.py +11 -0
  24. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_if.py +7 -7
  25. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_matrix.py +11 -11
  26. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_on.py +4 -4
  27. ddeutil_workflow-0.0.11/tests/test_pipeline_params.py +12 -0
  28. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_run.py +4 -4
  29. ddeutil_workflow-0.0.11/tests/test_pipeline_run_raise.py +12 -0
  30. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_pipeline_task.py +9 -9
  31. ddeutil_workflow-0.0.11/tests/test_poke.py +13 -0
  32. ddeutil_workflow-0.0.11/tests/test_scheduler.py +68 -0
  33. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_bash.py +10 -10
  34. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_condition.py +4 -4
  35. ddeutil_workflow-0.0.11/tests/test_stage_hook.py +56 -0
  36. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_py.py +11 -11
  37. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage_trigger.py +5 -5
  38. ddeutil_workflow-0.0.10/README.md +0 -151
  39. ddeutil_workflow-0.0.10/src/ddeutil/workflow/__about__.py +0 -1
  40. ddeutil_workflow-0.0.10/src/ddeutil/workflow/api.py +0 -89
  41. ddeutil_workflow-0.0.10/src/ddeutil/workflow/pipeline.py +0 -1186
  42. ddeutil_workflow-0.0.10/src/ddeutil/workflow/route.py +0 -92
  43. ddeutil_workflow-0.0.10/src/ddeutil/workflow/scheduler.py +0 -620
  44. ddeutil_workflow-0.0.10/tests/test_pipeline_desc.py +0 -11
  45. ddeutil_workflow-0.0.10/tests/test_pipeline_params.py +0 -12
  46. ddeutil_workflow-0.0.10/tests/test_pipeline_run_raise.py +0 -12
  47. ddeutil_workflow-0.0.10/tests/test_poke.py +0 -13
  48. ddeutil_workflow-0.0.10/tests/test_scheduler.py +0 -12
  49. ddeutil_workflow-0.0.10/tests/test_stage_hook.py +0 -56
  50. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/LICENSE +0 -0
  51. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/setup.cfg +0 -0
  52. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/__types.py +0 -0
  53. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/cron.py +0 -0
  54. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil/workflow/on.py +0 -0
  55. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  56. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/entry_points.txt +0 -0
  57. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
  58. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test__conf_exist.py +0 -0
  59. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test__local_and_global.py +0 -0
  60. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test__regex.py +0 -0
  61. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_conf.py +0 -0
  62. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_cron.py +0 -0
  63. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_on.py +0 -0
  64. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_params.py +0 -0
  65. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_stage.py +0 -0
  66. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_utils.py +0 -0
  67. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_utils_result.py +0 -0
  68. {ddeutil_workflow-0.0.10 → ddeutil_workflow-0.0.11}/tests/test_utils_template.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddeutil-workflow
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Summary: Lightweight workflow orchestration with less dependencies
5
5
  Author-email: ddeutils <korawich.anu@gmail.com>
6
6
  License: MIT
@@ -24,10 +24,9 @@ License-File: LICENSE
24
24
  Requires-Dist: ddeutil-io
25
25
  Requires-Dist: python-dotenv==1.0.1
26
26
  Requires-Dist: typer<1.0.0,==0.12.5
27
- Provides-Extra: schedule
28
- Requires-Dist: schedule<2.0.0,==1.2.2; extra == "schedule"
27
+ Requires-Dist: schedule<2.0.0,==1.2.2
29
28
  Provides-Extra: api
30
- Requires-Dist: fastapi[standard]<1.0.0,==0.112.2; extra == "api"
29
+ Requires-Dist: fastapi<1.0.0,==0.112.2; extra == "api"
31
30
 
32
31
  # Workflow
33
32
 
@@ -38,22 +37,22 @@ Requires-Dist: fastapi[standard]<1.0.0,==0.112.2; extra == "api"
38
37
  [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
39
38
 
40
39
  The **Lightweight workflow orchestration** with less dependencies the was created
41
- for easy to make a simple metadata driven for data pipeline orchestration.
40
+ for easy to make a simple metadata driven for data workflow orchestration.
42
41
  It can to use for data operator by a `.yaml` template.
43
42
 
44
43
  > [!WARNING]
45
44
  > This package provide only orchestration workload. That mean you should not use
46
45
  > workflow stage to process any large data which use lot of compute usecase.
47
46
 
48
- In my opinion, I think it should not create duplicate pipeline codes if I can
49
- write with dynamic input parameters on the one template pipeline that just change
47
+ In my opinion, I think it should not create duplicate workflow codes if I can
48
+ write with dynamic input parameters on the one template workflow that just change
50
49
  the input parameters per use-case instead.
51
- This way I can handle a lot of logical pipelines in our orgs with only metadata
52
- configuration. It called **Metadata Driven Data Pipeline**.
50
+ This way I can handle a lot of logical workflows in our orgs with only metadata
51
+ configuration. It called **Metadata Driven Data Workflow**.
53
52
 
54
53
  Next, we should get some monitoring tools for manage logging that return from
55
- pipeline running. Because it not show us what is a use-case that running data
56
- pipeline.
54
+ workflow running. Because it not show us what is a use-case that running data
55
+ workflow.
57
56
 
58
57
  > [!NOTE]
59
58
  > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
@@ -75,8 +74,7 @@ this package with application add-ons, you should add `app` in installation;
75
74
  | Usecase | Install Optional | Support |
76
75
  |-------------------|------------------------------------------|--------------------|
77
76
  | Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
78
- | Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
79
- | FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
77
+ | FastAPI Server | `pip install ddeutil-workflow[api]` | :heavy_check_mark: |
80
78
 
81
79
 
82
80
  > I added this feature to the main milestone.
@@ -97,67 +95,65 @@ use-case.
97
95
 
98
96
  > [!IMPORTANT]
99
97
  > I recommend you to use the `hook` stage for all actions that you want to do
100
- > with pipeline activity that you want to orchestrate. Because it able to dynamic
98
+ > with workflow activity that you want to orchestrate. Because it able to dynamic
101
99
  > an input argument with the same hook function that make you use less time to
102
- > maintenance your data pipelines.
100
+ > maintenance your data workflows.
103
101
 
104
102
  ```yaml
105
103
  run_py_local:
106
- type: pipeline.Pipeline
107
- on:
108
- - cronjob: '*/5 * * * *'
109
- timezone: "Asia/Bangkok"
110
- params:
111
- author-run: str
112
- run-date: datetime
113
- jobs:
114
- getting-api-data:
115
- stages:
116
- - name: "Retrieve API Data"
117
- id: retrieve-api
118
- uses: tasks/get-api-with-oauth-to-s3@requests
119
- with:
120
- url: https://open-data/
121
- auth: ${API_ACCESS_REFRESH_TOKEN}
122
- aws_s3_path: my-data/open-data/
123
-
124
- # This Authentication code should implement with your custom hook function.
125
- # The template allow you to use environment variable.
126
- aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
127
- aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
104
+ type: Workflow
105
+ on:
106
+ # If workflow deploy to schedule, it will running every 5 minutes
107
+ # with Asia/Bangkok timezone.
108
+ - cronjob: '*/5 * * * *'
109
+ timezone: "Asia/Bangkok"
110
+ params:
111
+ # Incoming execution parameters will validate with this type. It allow
112
+ # to set default value or templating.
113
+ author-run: str
114
+ run-date: datetime
115
+ jobs:
116
+ getting-api-data:
117
+ stages:
118
+ - name: "Retrieve API Data"
119
+ id: retrieve-api
120
+ uses: tasks/get-api-with-oauth-to-s3@requests
121
+ with:
122
+ url: https://open-data/
123
+ auth: ${API_ACCESS_REFRESH_TOKEN}
124
+ aws_s3_path: my-data/open-data/
125
+
126
+ # This Authentication code should implement with your custom hook function.
127
+ # The template allow you to use environment variable.
128
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
129
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
128
130
  ```
129
131
 
130
132
  ## Configuration
131
133
 
132
- | Environment | Component | Default | Description |
133
- |-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
134
- | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
135
- | `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
136
- | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
137
- | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
138
- | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
139
- | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
140
- | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
141
- | `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
142
- | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
143
- | `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
144
- | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
145
-
146
-
147
- **Application**:
148
-
149
- | Environment | Default | Description |
150
- |-------------------------------------|----------------------------------|-------------------------------------------------------------------------|
151
- | `WORKFLOW_APP_PROCESS_WORKER` | 2 | The maximum process worker number that run in scheduler app module |
152
- | `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | 100 | A schedule per process that run parallel |
153
- | `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
154
-
155
- **API server**:
156
-
157
- | Environment | Default | Description |
158
- |--------------------------------------|---------|-----------------------------------------------------------------------------------|
159
- | `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
160
- | `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
134
+ | Environment | Component | Default | Description |
135
+ |-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
136
+ | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
137
+ | `WORKFLOW_CORE_REGISTRY` | Core | src.ddeutil.workflow,tests.utils | List of importable string for the hook stage |
138
+ | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
139
+ | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
140
+ | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
141
+ | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
142
+ | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
143
+ | `WORKFLOW_CORE_MAX_NUM_POKING` | Core | 4 | |
144
+ | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in workflow executor |
145
+ | `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
146
+ | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
147
+ | `WORKFLOW_APP_PROCESS_WORKER` | Schedule | 2 | The maximum process worker number that run in scheduler app module |
148
+ | `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | Schedule | 100 | A schedule per process that run parallel |
149
+ | `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | Schedule | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
150
+
151
+ **API Application**:
152
+
153
+ | Environment | Component | Default | Description |
154
+ |--------------------------------------|-----------|---------|-----------------------------------------------------------------------------------|
155
+ | `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
156
+ | `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
161
157
 
162
158
  ## Deployment
163
159
 
@@ -165,18 +161,18 @@ This package able to run as a application service for receive manual trigger
165
161
  from the master node via RestAPI or use to be Scheduler background service
166
162
  like crontab job but via Python API.
167
163
 
168
- ### Schedule Service
164
+ ### Schedule App
169
165
 
170
166
  ```shell
171
- (venv) $ python src.ddeutil.workflow.app
167
+ (venv) $ ddeutil-workflow schedule
172
168
  ```
173
169
 
174
170
  ### API Server
175
171
 
176
172
  ```shell
177
- (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
173
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 127.0.0.1 --port 80
178
174
  ```
179
175
 
180
176
  > [!NOTE]
181
177
  > If this package already deploy, it able to use
182
- > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --workers 4`
178
+ > `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
@@ -0,0 +1,148 @@
1
+ # Workflow
2
+
3
+ [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
4
+ [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
5
+ [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
6
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
7
+ [![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
8
+
9
+ The **Lightweight workflow orchestration** with less dependencies the was created
10
+ for easy to make a simple metadata driven for data workflow orchestration.
11
+ It can to use for data operator by a `.yaml` template.
12
+
13
+ > [!WARNING]
14
+ > This package provide only orchestration workload. That mean you should not use
15
+ > workflow stage to process any large data which use lot of compute usecase.
16
+
17
+ In my opinion, I think it should not create duplicate workflow codes if I can
18
+ write with dynamic input parameters on the one template workflow that just change
19
+ the input parameters per use-case instead.
20
+ This way I can handle a lot of logical workflows in our orgs with only metadata
21
+ configuration. It called **Metadata Driven Data Workflow**.
22
+
23
+ Next, we should get some monitoring tools for manage logging that return from
24
+ workflow running. Because it not show us what is a use-case that running data
25
+ workflow.
26
+
27
+ > [!NOTE]
28
+ > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
29
+ > and all of config file from several data orchestration framework tools from my
30
+ > experience on Data Engineer.
31
+
32
+ **Rules of This Workflow engine**:
33
+
34
+ 1. Minimum unit of scheduling is 1 minute
35
+ 2. Cannot re-run only failed stage and its pending downstream
36
+ 3. All parallel tasks inside workflow engine use Threading
37
+ (Because Python 3.13 unlock GIL)
38
+
39
+ ## Installation
40
+
41
+ This project need `ddeutil-io` extension namespace packages. If you want to install
42
+ this package with application add-ons, you should add `app` in installation;
43
+
44
+ | Usecase | Install Optional | Support |
45
+ |-------------------|------------------------------------------|--------------------|
46
+ | Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
47
+ | FastAPI Server | `pip install ddeutil-workflow[api]` | :heavy_check_mark: |
48
+
49
+
50
+ > I added this feature to the main milestone.
51
+ >
52
+ > **Docker Images** supported:
53
+ >
54
+ > | Docker Image | Python Version | Support |
55
+ > |-----------------------------|----------------|---------|
56
+ > | ddeutil-workflow:latest | `3.9` | :x: |
57
+ > | ddeutil-workflow:python3.10 | `3.10` | :x: |
58
+ > | ddeutil-workflow:python3.11 | `3.11` | :x: |
59
+ > | ddeutil-workflow:python3.12 | `3.12` | :x: |
60
+
61
+ ## Usage
62
+
63
+ This is examples that use workflow file for running common Data Engineering
64
+ use-case.
65
+
66
+ > [!IMPORTANT]
67
+ > I recommend you to use the `hook` stage for all actions that you want to do
68
+ > with workflow activity that you want to orchestrate. Because it able to dynamic
69
+ > an input argument with the same hook function that make you use less time to
70
+ > maintenance your data workflows.
71
+
72
+ ```yaml
73
+ run_py_local:
74
+ type: Workflow
75
+ on:
76
+ # If workflow deploy to schedule, it will running every 5 minutes
77
+ # with Asia/Bangkok timezone.
78
+ - cronjob: '*/5 * * * *'
79
+ timezone: "Asia/Bangkok"
80
+ params:
81
+ # Incoming execution parameters will validate with this type. It allow
82
+ # to set default value or templating.
83
+ author-run: str
84
+ run-date: datetime
85
+ jobs:
86
+ getting-api-data:
87
+ stages:
88
+ - name: "Retrieve API Data"
89
+ id: retrieve-api
90
+ uses: tasks/get-api-with-oauth-to-s3@requests
91
+ with:
92
+ url: https://open-data/
93
+ auth: ${API_ACCESS_REFRESH_TOKEN}
94
+ aws_s3_path: my-data/open-data/
95
+
96
+ # This Authentication code should implement with your custom hook function.
97
+ # The template allow you to use environment variable.
98
+ aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
99
+ aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
100
+ ```
101
+
102
+ ## Configuration
103
+
104
+ | Environment | Component | Default | Description |
105
+ |-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
106
+ | `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
107
+ | `WORKFLOW_CORE_REGISTRY` | Core | src.ddeutil.workflow,tests.utils | List of importable string for the hook stage |
108
+ | `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
109
+ | `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
110
+ | `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
111
+ | `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
112
+ | `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
113
+ | `WORKFLOW_CORE_MAX_NUM_POKING` | Core | 4 | |
114
+ | `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in workflow executor |
115
+ | `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
116
+ | `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
117
+ | `WORKFLOW_APP_PROCESS_WORKER` | Schedule | 2 | The maximum process worker number that run in scheduler app module |
118
+ | `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | Schedule | 100 | A schedule per process that run parallel |
119
+ | `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | Schedule | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
120
+
121
+ **API Application**:
122
+
123
+ | Environment | Component | Default | Description |
124
+ |--------------------------------------|-----------|---------|-----------------------------------------------------------------------------------|
125
+ | `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
126
+ | `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
127
+
128
+ ## Deployment
129
+
130
+ This package able to run as a application service for receive manual trigger
131
+ from the master node via RestAPI or use to be Scheduler background service
132
+ like crontab job but via Python API.
133
+
134
+ ### Schedule App
135
+
136
+ ```shell
137
+ (venv) $ ddeutil-workflow schedule
138
+ ```
139
+
140
+ ### API Server
141
+
142
+ ```shell
143
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 127.0.0.1 --port 80
144
+ ```
145
+
146
+ > [!NOTE]
147
+ > If this package already deploy, it able to use
148
+ > `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
@@ -28,15 +28,13 @@ dependencies = [
28
28
  "ddeutil-io",
29
29
  "python-dotenv==1.0.1",
30
30
  "typer==0.12.5,<1.0.0",
31
+ "schedule==1.2.2,<2.0.0",
31
32
  ]
32
33
  dynamic = ["version"]
33
34
 
34
35
  [project.optional-dependencies]
35
- schedule = [
36
- "schedule==1.2.2,<2.0.0",
37
- ]
38
36
  api = [
39
- "fastapi[standard]==0.112.2,<1.0.0",
37
+ "fastapi==0.112.2,<1.0.0",
40
38
  ]
41
39
 
42
40
  [project.urls]
@@ -0,0 +1 @@
1
+ __version__: str = "0.0.11"
@@ -6,12 +6,13 @@
6
6
  from .exceptions import (
7
7
  JobException,
8
8
  ParamValueException,
9
- PipelineException,
10
9
  StageException,
11
10
  UtilException,
11
+ WorkflowException,
12
12
  )
13
+ from .job import Job, Strategy
13
14
  from .on import On, interval2crontab
14
- from .pipeline import Job, Pipeline, Strategy
15
+ from .scheduler import Workflow
15
16
  from .stage import Stage, handler_result
16
17
  from .utils import (
17
18
  Param,
@@ -0,0 +1,157 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import contextlib
10
+ import os
11
+ import uuid
12
+ from collections.abc import AsyncIterator
13
+ from datetime import datetime, timedelta
14
+ from queue import Empty, Queue
15
+ from threading import Thread
16
+ from typing import TypedDict
17
+
18
+ from ddeutil.core import str2bool
19
+ from dotenv import load_dotenv
20
+ from fastapi import FastAPI
21
+ from fastapi.middleware.gzip import GZipMiddleware
22
+ from fastapi.responses import UJSONResponse
23
+ from pydantic import BaseModel
24
+
25
+ from .__about__ import __version__
26
+ from .log import get_logger
27
+ from .repeat import repeat_at, repeat_every
28
+ from .scheduler import WorkflowTask
29
+
30
+ load_dotenv()
31
+ logger = get_logger("ddeutil.workflow")
32
+
33
+
34
+ class State(TypedDict):
35
+ upper_queue: Queue
36
+ upper_result: dict[str, str]
37
+ scheduler: list[str]
38
+ workflow_threads: dict[str, Thread]
39
+ workflow_tasks: list[WorkflowTask]
40
+ workflow_queue: dict[str, list[datetime]]
41
+ workflow_running: dict[str, list[datetime]]
42
+
43
+
44
+ @contextlib.asynccontextmanager
45
+ async def lifespan(a: FastAPI) -> AsyncIterator[State]:
46
+ a.state.upper_queue = Queue()
47
+ a.state.upper_result = {}
48
+ a.state.scheduler = []
49
+ a.state.workflow_threads = {}
50
+ a.state.workflow_tasks = []
51
+ a.state.workflow_queue = {}
52
+ a.state.workflow_running = {}
53
+
54
+ await asyncio.create_task(broker_upper_messages())
55
+
56
+ yield {
57
+ "upper_queue": a.state.upper_queue,
58
+ "upper_result": a.state.upper_result,
59
+ # NOTE: Scheduler value should be contain a key of workflow workflow and
60
+ # list of datetime of queue and running.
61
+ #
62
+ # ... {
63
+ # ... '<workflow-name>': (
64
+ # ... [<running-datetime>, ...], [<queue-datetime>, ...]
65
+ # ... )
66
+ # ... }
67
+ #
68
+ "scheduler": a.state.scheduler,
69
+ "workflow_queue": a.state.workflow_queue,
70
+ "workflow_running": a.state.workflow_running,
71
+ "workflow_threads": a.state.workflow_threads,
72
+ "workflow_tasks": a.state.workflow_tasks,
73
+ }
74
+
75
+
76
+ app = FastAPI(
77
+ titile="Workflow API",
78
+ description=(
79
+ "This is workflow FastAPI web application that use to manage manual "
80
+ "execute or schedule workflow via RestAPI."
81
+ ),
82
+ version=__version__,
83
+ lifespan=lifespan,
84
+ default_response_class=UJSONResponse,
85
+ )
86
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
87
+
88
+
89
+ @repeat_every(seconds=10)
90
+ async def broker_upper_messages():
91
+ """Broker for receive message from the `/upper` path and change it to upper
92
+ case. This broker use interval running in background every 10 seconds.
93
+ """
94
+ for _ in range(10):
95
+ try:
96
+ obj = app.state.upper_queue.get_nowait()
97
+ app.state.upper_result[obj["request_id"]] = obj["text"].upper()
98
+ logger.info(f"Upper message: {app.state.upper_result}")
99
+ except Empty:
100
+ pass
101
+ await asyncio.sleep(0.0001)
102
+
103
+
104
+ class Payload(BaseModel):
105
+ text: str
106
+
107
+
108
+ async def get_result(request_id: str) -> dict[str, str]:
109
+ """Get data from output dict that global."""
110
+ while True:
111
+ if request_id in app.state.upper_result:
112
+ result: str = app.state.upper_result[request_id]
113
+ del app.state.upper_result[request_id]
114
+ return {"message": result}
115
+ await asyncio.sleep(0.0025)
116
+
117
+
118
+ @app.get("/")
119
+ @app.get("/api")
120
+ async def health():
121
+ return {"message": "Workflow API already start up"}
122
+
123
+
124
+ @app.post("/api")
125
+ async def message_upper(payload: Payload):
126
+ """Convert message from any case to the upper case."""
127
+ request_id: str = str(uuid.uuid4())
128
+ app.state.upper_queue.put(
129
+ {"text": payload.text, "request_id": request_id},
130
+ )
131
+ return await get_result(request_id)
132
+
133
+
134
+ if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")):
135
+ from .route import workflow
136
+
137
+ app.include_router(workflow)
138
+
139
+ if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")):
140
+ from .route import schedule
141
+ from .scheduler import workflow_task
142
+
143
+ app.include_router(schedule)
144
+
145
+ @schedule.on_event("startup")
146
+ @repeat_at(cron="* * * * *", delay=2)
147
+ def schedule_broker_up():
148
+ logger.debug(
149
+ f"[SCHEDULER]: Start listening schedule from queue "
150
+ f"{app.state.scheduler}"
151
+ )
152
+ if app.state.workflow_tasks:
153
+ workflow_task(
154
+ app.state.workflow_tasks,
155
+ stop=datetime.now() + timedelta(minutes=1),
156
+ threads=app.state.workflow_threads,
157
+ )
@@ -29,21 +29,21 @@ cli.add_typer(
29
29
 
30
30
  @cli.command()
31
31
  def run(
32
- pipeline: Annotated[
32
+ workflow: Annotated[
33
33
  str,
34
- Argument(help="A pipeline name that want to run manually"),
34
+ Argument(help="A workflow name that want to run manually"),
35
35
  ],
36
36
  params: Annotated[
37
37
  str,
38
38
  Argument(
39
- help="A json string for parameters of this pipeline execution."
39
+ help="A json string for parameters of this workflow execution."
40
40
  ),
41
41
  ],
42
42
  ):
43
- """Run pipeline workflow manually with an input custom parameters that able
44
- to receive with pipeline params config.
43
+ """Run workflow workflow manually with an input custom parameters that able
44
+ to receive with workflow params config.
45
45
  """
46
- logger.info(f"Running pipeline name: {pipeline}")
46
+ logger.info(f"Running workflow name: {workflow}")
47
47
  logger.info(f"... with Parameters: {json.dumps(json.loads(params))}")
48
48
 
49
49
 
@@ -63,7 +63,7 @@ def schedule(
63
63
  externals: Annotated[
64
64
  Optional[str],
65
65
  Argument(
66
- help="A json string for parameters of this pipeline execution."
66
+ help="A json string for parameters of this workflow execution."
67
67
  ),
68
68
  ] = None,
69
69
  ):
@@ -77,20 +77,20 @@ def schedule(
77
77
  tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
78
78
  )
79
79
 
80
- from .scheduler import workflow
80
+ from .scheduler import workflow_runner
81
81
 
82
82
  # NOTE: Start running workflow scheduler application.
83
- workflow_rs: list[str] = workflow(
83
+ workflow_rs: list[str] = workflow_runner(
84
84
  stop=stop, excluded=excluded, externals=json.loads(externals)
85
85
  )
86
86
  logger.info(f"Application run success: {workflow_rs}")
87
87
 
88
88
 
89
- @cli_log.command("pipeline-get")
90
- def pipeline_log_get(
89
+ @cli_log.command("workflow-get")
90
+ def workflow_log_get(
91
91
  name: Annotated[
92
92
  str,
93
- Argument(help="A pipeline name that want to getting log"),
93
+ Argument(help="A workflow name that want to getting log"),
94
94
  ],
95
95
  limit: Annotated[
96
96
  int,
@@ -113,8 +113,8 @@ class LogMode(str, Enum):
113
113
  delete = "delete"
114
114
 
115
115
 
116
- @cli_log.command("pipeline-delete")
117
- def pipeline_log_delete(
116
+ @cli_log.command("workflow-delete")
117
+ def workflow_log_delete(
118
118
  mode: Annotated[
119
119
  LogMode,
120
120
  Argument(case_sensitive=True),
@@ -6,22 +6,22 @@
6
6
  from __future__ import annotations
7
7
 
8
8
 
9
- class WorkflowException(Exception): ...
9
+ class BaseWorkflowException(Exception): ...
10
10
 
11
11
 
12
- class UtilException(WorkflowException): ...
12
+ class UtilException(BaseWorkflowException): ...
13
13
 
14
14
 
15
- class StageException(WorkflowException): ...
15
+ class StageException(BaseWorkflowException): ...
16
16
 
17
17
 
18
- class JobException(WorkflowException): ...
18
+ class JobException(BaseWorkflowException): ...
19
19
 
20
20
 
21
- class PipelineException(WorkflowException): ...
21
+ class WorkflowException(BaseWorkflowException): ...
22
22
 
23
23
 
24
- class PipelineFailException(WorkflowException): ...
24
+ class WorkflowFailException(WorkflowException): ...
25
25
 
26
26
 
27
27
  class ParamValueException(WorkflowException): ...