ddeutil-workflow 0.0.5__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {ddeutil_workflow-0.0.5/src/ddeutil_workflow.egg-info → ddeutil_workflow-0.0.7}/PKG-INFO +144 -68
  2. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/README.md +137 -66
  3. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/pyproject.toml +18 -5
  4. ddeutil_workflow-0.0.7/src/ddeutil/workflow/__about__.py +1 -0
  5. ddeutil_workflow-0.0.7/src/ddeutil/workflow/__init__.py +31 -0
  6. ddeutil_workflow-0.0.5/src/ddeutil/workflow/__regex.py → ddeutil_workflow-0.0.7/src/ddeutil/workflow/__types.py +24 -4
  7. ddeutil_workflow-0.0.7/src/ddeutil/workflow/api.py +120 -0
  8. ddeutil_workflow-0.0.7/src/ddeutil/workflow/app.py +41 -0
  9. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/src/ddeutil/workflow/exceptions.py +16 -1
  10. ddeutil_workflow-0.0.7/src/ddeutil/workflow/loader.py +80 -0
  11. ddeutil_workflow-0.0.7/src/ddeutil/workflow/log.py +30 -0
  12. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/src/ddeutil/workflow/on.py +78 -26
  13. ddeutil_workflow-0.0.7/src/ddeutil/workflow/pipeline.py +733 -0
  14. ddeutil_workflow-0.0.7/src/ddeutil/workflow/repeat.py +134 -0
  15. ddeutil_workflow-0.0.7/src/ddeutil/workflow/route.py +78 -0
  16. ddeutil_workflow-0.0.5/src/ddeutil/workflow/__scheduler.py → ddeutil_workflow-0.0.7/src/ddeutil/workflow/scheduler.py +73 -45
  17. ddeutil_workflow-0.0.7/src/ddeutil/workflow/stage.py +431 -0
  18. ddeutil_workflow-0.0.7/src/ddeutil/workflow/utils.py +602 -0
  19. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7/src/ddeutil_workflow.egg-info}/PKG-INFO +144 -68
  20. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/src/ddeutil_workflow.egg-info/SOURCES.txt +19 -10
  21. ddeutil_workflow-0.0.7/src/ddeutil_workflow.egg-info/requires.txt +11 -0
  22. ddeutil_workflow-0.0.7/tests/test__conf_exist.py +11 -0
  23. ddeutil_workflow-0.0.7/tests/test__regex.py +90 -0
  24. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/tests/test_on.py +14 -5
  25. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/tests/test_pipeline.py +3 -9
  26. ddeutil_workflow-0.0.7/tests/test_pipeline_desc.py +11 -0
  27. ddeutil_workflow-0.0.7/tests/test_pipeline_if.py +28 -0
  28. ddeutil_workflow-0.0.7/tests/test_pipeline_matrix.py +159 -0
  29. ddeutil_workflow-0.0.7/tests/test_pipeline_on.py +12 -0
  30. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/tests/test_pipeline_params.py +1 -1
  31. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/tests/test_pipeline_run.py +40 -45
  32. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/tests/test_pipeline_task.py +26 -12
  33. ddeutil_workflow-0.0.5/tests/test_base_schedule.py → ddeutil_workflow-0.0.7/tests/test_scheduler.py +61 -18
  34. ddeutil_workflow-0.0.7/tests/test_stage_trigger.py +32 -0
  35. ddeutil_workflow-0.0.7/tests/test_utils.py +8 -0
  36. ddeutil_workflow-0.0.7/tests/test_utils_param2template.py +71 -0
  37. ddeutil_workflow-0.0.7/tests/test_utils_result.py +22 -0
  38. ddeutil_workflow-0.0.5/src/ddeutil/workflow/__about__.py +0 -1
  39. ddeutil_workflow-0.0.5/src/ddeutil/workflow/__init__.py +0 -0
  40. ddeutil_workflow-0.0.5/src/ddeutil/workflow/__types.py +0 -12
  41. ddeutil_workflow-0.0.5/src/ddeutil/workflow/loader.py +0 -182
  42. ddeutil_workflow-0.0.5/src/ddeutil/workflow/pipeline.py +0 -548
  43. ddeutil_workflow-0.0.5/src/ddeutil/workflow/tasks/__init__.py +0 -6
  44. ddeutil_workflow-0.0.5/src/ddeutil/workflow/tasks/dummy.py +0 -52
  45. ddeutil_workflow-0.0.5/src/ddeutil/workflow/utils.py +0 -208
  46. ddeutil_workflow-0.0.5/src/ddeutil_workflow.egg-info/requires.txt +0 -4
  47. ddeutil_workflow-0.0.5/tests/test_base_data.py +0 -13
  48. ddeutil_workflow-0.0.5/tests/test_base_regex.py +0 -46
  49. ddeutil_workflow-0.0.5/tests/test_loader.py +0 -6
  50. ddeutil_workflow-0.0.5/tests/test_pipeline_matrix.py +0 -29
  51. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/LICENSE +0 -0
  52. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/setup.cfg +0 -0
  53. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  54. {ddeutil_workflow-0.0.5 → ddeutil_workflow-0.0.7}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
  55. /ddeutil_workflow-0.0.5/tests/test_base_local_and_global.py → /ddeutil_workflow-0.0.7/tests/test__local_and_global.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddeutil-workflow
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Data Developer & Engineer Workflow Utility Objects
5
5
  Author-email: ddeutils <korawich.anu@gmail.com>
6
6
  License: MIT
@@ -24,36 +24,42 @@ License-File: LICENSE
24
24
  Requires-Dist: fmtutil
25
25
  Requires-Dist: ddeutil-io
26
26
  Requires-Dist: python-dotenv==1.0.1
27
- Requires-Dist: schedule==1.2.2
27
+ Provides-Extra: app
28
+ Requires-Dist: schedule<2.0.0,==1.2.2; extra == "app"
29
+ Provides-Extra: api
30
+ Requires-Dist: fastapi[standard]==0.112.0; extra == "api"
31
+ Requires-Dist: apscheduler[sqlalchemy]<4.0.0,==3.10.4; extra == "api"
32
+ Requires-Dist: croniter==3.0.3; extra == "api"
28
33
 
29
- # Data Utility: _Workflow_
34
+ # Workflow
30
35
 
31
36
  [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
32
37
  [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
33
38
  [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
34
39
  [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
35
40
 
36
-
37
41
  **Table of Contents**:
38
42
 
39
43
  - [Installation](#installation)
40
44
  - [Getting Started](#getting-started)
41
- - [Connection](#connection)
42
- - [Dataset](#dataset)
43
- - [Schedule](#schedule)
44
- - [Pipeline Examples](#examples)
45
- - [Python & Shell](#python--shell)
46
- - [Tasks (EL)](#tasks-extract--load)
47
- - [Hooks (T)](#tasks-transform)
45
+ - [On](#on)
46
+ - [Pipeline](#pipeline)
47
+ - [Usage](#usage)
48
+ - [Python & Bash](#python--bash)
49
+ - [Hook (EL)](#hook-extract--load)
50
+ - [Hook (T)](#hook-transform)
48
51
  - [Configuration](#configuration)
52
+ - [Deployment](#deployment)
49
53
 
50
- This **Utility Workflow** objects was created for easy to make a simple metadata
51
- driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
54
+ This **Workflow** objects was created for easy to make a simple metadata
55
+ driven for data pipeline orchestration that able to use for **ETL, T, EL, or
56
+ ELT** by a `.yaml` file template.
52
57
 
53
- I think we should not create the multiple pipeline per use-case if we able to
54
- write some dynamic pipeline that just change the input parameters per use-case
55
- instead. This way we can handle a lot of pipelines in our orgs with metadata only.
56
- It called **Metadata Driven**.
58
+ In my opinion, I think it should not create duplicate pipeline codes if I can
59
+ write with dynamic input parameters on the one template pipeline that just change
60
+ the input parameters per use-case instead.
61
+ This way I can handle a lot of logical pipelines in our orgs with only metadata
62
+ configuration. It called **Metadata Driven Data Pipeline**.
57
63
 
58
64
  Next, we should get some monitoring tools for manage logging that return from
59
65
  pipeline running. Because it not show us what is a use-case that running data
@@ -70,7 +76,16 @@ pipeline.
70
76
  pip install ddeutil-workflow
71
77
  ```
72
78
 
73
- This project need `ddeutil-io`, `ddeutil-model` extension namespace packages.
79
+ This project need `ddeutil-io` extension namespace packages. If you want to install
80
+ this package with application add-ons, you should add `app` in installation;
81
+
82
+ ```shell
83
+ pip install ddeutil-workflow[app]
84
+ ```
85
+
86
+ ```shell
87
+ pip install ddeutil-workflow[api]
88
+ ```
74
89
 
75
90
  ## Getting Started
76
91
 
@@ -87,38 +102,42 @@ will passing parameters and catching the output for re-use it to next step.
87
102
  > dynamic registries instead of main features because it have a lot of maintain
88
103
  > vendor codes and deps. (I do not have time to handle this features)
89
104
 
90
- ---
105
+ ### On
91
106
 
92
- ### Schedule
107
+ The **On** is schedule object.
93
108
 
94
109
  ```yaml
95
- schd_for_node:
96
- type: schedule.Schedule
110
+ on_every_5_min:
111
+ type: on.On
97
112
  cron: "*/5 * * * *"
98
113
  ```
99
114
 
100
115
  ```python
101
- from ddeutil.workflow.on import Schedule
116
+ from ddeutil.workflow.on import On
102
117
 
103
- scdl = Schedule.from_loader(name='schd_for_node', externals={})
104
- assert '*/5 * * * *' == str(scdl.cronjob)
118
+ schedule = On.from_loader(name='on_every_5_min', externals={})
119
+ assert '*/5 * * * *' == str(schedule.cronjob)
105
120
 
106
- cron_iterate = scdl.generate('2022-01-01 00:00:00')
107
- assert '2022-01-01 00:05:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
108
- assert '2022-01-01 00:10:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
109
- assert '2022-01-01 00:15:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
110
- assert '2022-01-01 00:20:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
111
- assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
121
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
122
+ assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
123
+ assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
124
+ assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
125
+ assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
112
126
  ```
113
127
 
114
- ---
115
-
116
128
  ### Pipeline
117
129
 
130
+ The **Pipeline** object that is the core feature of this project.
131
+
118
132
  ```yaml
119
133
  run_py_local:
120
134
  type: ddeutil.workflow.pipeline.Pipeline
121
- ...
135
+ on: 'on_every_5_min'
136
+ params:
137
+ author-run:
138
+ type: str
139
+ run-date:
140
+ type: datetime
122
141
  ```
123
142
 
124
143
  ```python
@@ -128,27 +147,39 @@ pipe = Pipeline.from_loader(name='run_py_local', externals={})
128
147
  pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
129
148
  ```
130
149
 
131
- ## Examples
150
+ > [!NOTE]
151
+ > The above parameter use short declarative statement. You can pass a parameter
152
+ > type to the key of a parameter name.
153
+ > ```yaml
154
+ > params:
155
+ > author-run: str
156
+ > run-date: datetime
157
+ > ```
158
+ >
159
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
160
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
161
+
162
+ ## Usage
132
163
 
133
164
  This is examples that use workflow file for running common Data Engineering
134
165
  use-case.
135
166
 
136
- ### Python & Shell
167
+ > [!IMPORTANT]
168
+ > I recommend you to use `task` stage for all actions that you want to do with
169
+ > pipeline object.
137
170
 
138
- The state of doing lists that worker should to do. It be collection of the stage.
171
+ ### Python & Bash
139
172
 
140
173
  ```yaml
141
174
  run_py_local:
142
- type: ddeutil.workflow.pipeline.Pipeline
175
+ type: pipeline.Pipeline
143
176
  params:
144
- author-run:
145
- type: str
146
- run-date:
147
- type: datetime
177
+ author-run: str
178
+ run-date: datetime
148
179
  jobs:
149
180
  first-job:
150
181
  stages:
151
- - name: Printing Information
182
+ - name: "Printing Information"
152
183
  id: define-func
153
184
  run: |
154
185
  x = '${{ params.author-run }}'
@@ -157,7 +188,7 @@ run_py_local:
157
188
  def echo(name: str):
158
189
  print(f'Hello {name}')
159
190
 
160
- - name: Run Sequence and use var from Above
191
+ - name: "Run Sequence and use var from Above"
161
192
  vars:
162
193
  x: ${{ params.author-run }}
163
194
  run: |
@@ -165,16 +196,16 @@ run_py_local:
165
196
  # Change x value
166
197
  x: int = 1
167
198
 
168
- - name: Call Function
199
+ - name: "Call Function"
169
200
  vars:
170
201
  echo: ${{ stages.define-func.outputs.echo }}
171
202
  run: |
172
203
  echo('Caller')
173
204
  second-job:
174
205
  stages:
175
- - name: Echo Shell Script
206
+ - name: "Echo Bash Script"
176
207
  id: shell-echo
177
- shell: |
208
+ bash: |
178
209
  echo "Hello World from Shell"
179
210
  ```
180
211
 
@@ -192,24 +223,20 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
192
223
  > Hello World from Shell
193
224
  ```
194
225
 
195
- ---
196
-
197
- ### Tasks (Extract & Load)
226
+ ### Hook (Extract & Load)
198
227
 
199
228
  ```yaml
200
229
  pipe_el_pg_to_lake:
201
- type: ddeutil.workflow.pipeline.Pipeline
230
+ type: pipeline.Pipeline
202
231
  params:
203
- run-date:
204
- type: datetime
205
- author-email:
206
- type: str
232
+ run-date: datetime
233
+ author-email: str
207
234
  jobs:
208
235
  extract-load:
209
236
  stages:
210
237
  - name: "Extract Load from Postgres to Lake"
211
238
  id: extract-load
212
- task: tasks/postgres-to-delta@polars
239
+ uses: tasks/postgres-to-delta@polars
213
240
  with:
214
241
  source:
215
242
  conn: conn_postgres_url
@@ -221,15 +248,23 @@ pipe_el_pg_to_lake:
221
248
  endpoint: "/${{ params.name }}"
222
249
  ```
223
250
 
224
- ---
251
+ Implement hook:
225
252
 
226
- ### Tasks (Transform)
253
+ ```python
254
+ from ddeutil.workflow.utils import tag
227
255
 
228
- > I recommend you to use task for all actions that you want to do.
256
+ @tag('polars', alias='postgres-to-delta')
257
+ def postgres_to_delta(source, sink):
258
+ return {
259
+ "source": source, "sink": sink
260
+ }
261
+ ```
262
+
263
+ ### Hook (Transform)
229
264
 
230
265
  ```yaml
231
- pipe_hook_mssql_proc:
232
- type: ddeutil.workflow.pipeline.Pipeline
266
+ pipeline_hook_mssql_proc:
267
+ type: pipeline.Pipeline
233
268
  params:
234
269
  run_date: datetime
235
270
  sp_name: str
@@ -240,7 +275,7 @@ pipe_hook_mssql_proc:
240
275
  stages:
241
276
  - name: "Transform Data in MS SQL Server"
242
277
  id: transform
243
- task: tasks/mssql-proc@odbc
278
+ uses: tasks/mssql-proc@odbc
244
279
  with:
245
280
  exec: ${{ params.sp_name }}
246
281
  params:
@@ -250,16 +285,57 @@ pipe_hook_mssql_proc:
250
285
  target: ${{ params.target_name }}
251
286
  ```
252
287
 
253
- > [!NOTE]
254
- > The above parameter use short declarative statement. You can pass a parameter
255
- > type to the key of a parameter name.
288
+ Implement hook:
289
+
290
+ ```python
291
+ from ddeutil.workflow.utils import tag
292
+
293
+ @tag('odbc', alias='mssql-proc')
294
+ def odbc_mssql_procedure(_exec: str, params: dict):
295
+ return {
296
+ "exec": _exec, "params": params
297
+ }
298
+ ```
256
299
 
257
300
  ## Configuration
258
301
 
259
- ```text
302
+ ```bash
303
+ export WORKFLOW_ROOT_PATH=.
304
+ export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
305
+ export WORKFLOW_CORE_REGISTRY_FILTER=ddeutil.workflow.utils
306
+ export WORKFLOW_CORE_PATH_CONF=conf
307
+ export WORKFLOW_CORE_TIMEZONE=Asia/Bangkok
308
+ export WORKFLOW_CORE_DEFAULT_STAGE_ID=true
260
309
 
310
+ export WORKFLOW_CORE_MAX_PIPELINE_POKING=4
311
+ export WORKFLOW_CORE_MAX_JOB_PARALLEL=2
261
312
  ```
262
313
 
263
- ## License
314
+ Application config:
264
315
 
265
- This project was licensed under the terms of the [MIT license](LICENSE).
316
+ ```bash
317
+ export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
318
+ export WORKFLOW_APP_INTERVAL=10
319
+ ```
320
+
321
+ ## Deployment
322
+
323
+ This package able to run as a application service for receive manual trigger
324
+ from the master node via RestAPI or use to be Scheduler background service
325
+ like crontab job but via Python API.
326
+
327
+ ### Schedule Service
328
+
329
+ ```shell
330
+ (venv) $ python src.ddeutil.workflow.app
331
+ ```
332
+
333
+ ### API Server
334
+
335
+ ```shell
336
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
337
+ ```
338
+
339
+ > [!NOTE]
340
+ > If this package already deploy, it able to use
341
+ > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
@@ -1,31 +1,32 @@
1
- # Data Utility: _Workflow_
1
+ # Workflow
2
2
 
3
3
  [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
4
4
  [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
5
5
  [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
6
6
  [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
7
7
 
8
-
9
8
  **Table of Contents**:
10
9
 
11
10
  - [Installation](#installation)
12
11
  - [Getting Started](#getting-started)
13
- - [Connection](#connection)
14
- - [Dataset](#dataset)
15
- - [Schedule](#schedule)
16
- - [Pipeline Examples](#examples)
17
- - [Python & Shell](#python--shell)
18
- - [Tasks (EL)](#tasks-extract--load)
19
- - [Hooks (T)](#tasks-transform)
12
+ - [On](#on)
13
+ - [Pipeline](#pipeline)
14
+ - [Usage](#usage)
15
+ - [Python & Bash](#python--bash)
16
+ - [Hook (EL)](#hook-extract--load)
17
+ - [Hook (T)](#hook-transform)
20
18
  - [Configuration](#configuration)
19
+ - [Deployment](#deployment)
21
20
 
22
- This **Utility Workflow** objects was created for easy to make a simple metadata
23
- driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
21
+ This **Workflow** objects was created for easy to make a simple metadata
22
+ driven for data pipeline orchestration that able to use for **ETL, T, EL, or
23
+ ELT** by a `.yaml` file template.
24
24
 
25
- I think we should not create the multiple pipeline per use-case if we able to
26
- write some dynamic pipeline that just change the input parameters per use-case
27
- instead. This way we can handle a lot of pipelines in our orgs with metadata only.
28
- It called **Metadata Driven**.
25
+ In my opinion, I think it should not create duplicate pipeline codes if I can
26
+ write with dynamic input parameters on the one template pipeline that just change
27
+ the input parameters per use-case instead.
28
+ This way I can handle a lot of logical pipelines in our orgs with only metadata
29
+ configuration. It called **Metadata Driven Data Pipeline**.
29
30
 
30
31
  Next, we should get some monitoring tools for manage logging that return from
31
32
  pipeline running. Because it not show us what is a use-case that running data
@@ -42,7 +43,16 @@ pipeline.
42
43
  pip install ddeutil-workflow
43
44
  ```
44
45
 
45
- This project need `ddeutil-io`, `ddeutil-model` extension namespace packages.
46
+ This project need `ddeutil-io` extension namespace packages. If you want to install
47
+ this package with application add-ons, you should add `app` in installation;
48
+
49
+ ```shell
50
+ pip install ddeutil-workflow[app]
51
+ ```
52
+
53
+ ```shell
54
+ pip install ddeutil-workflow[api]
55
+ ```
46
56
 
47
57
  ## Getting Started
48
58
 
@@ -59,38 +69,42 @@ will passing parameters and catching the output for re-use it to next step.
59
69
  > dynamic registries instead of main features because it have a lot of maintain
60
70
  > vendor codes and deps. (I do not have time to handle this features)
61
71
 
62
- ---
72
+ ### On
63
73
 
64
- ### Schedule
74
+ The **On** is schedule object.
65
75
 
66
76
  ```yaml
67
- schd_for_node:
68
- type: schedule.Schedule
77
+ on_every_5_min:
78
+ type: on.On
69
79
  cron: "*/5 * * * *"
70
80
  ```
71
81
 
72
82
  ```python
73
- from ddeutil.workflow.on import Schedule
83
+ from ddeutil.workflow.on import On
74
84
 
75
- scdl = Schedule.from_loader(name='schd_for_node', externals={})
76
- assert '*/5 * * * *' == str(scdl.cronjob)
85
+ schedule = On.from_loader(name='on_every_5_min', externals={})
86
+ assert '*/5 * * * *' == str(schedule.cronjob)
77
87
 
78
- cron_iterate = scdl.generate('2022-01-01 00:00:00')
79
- assert '2022-01-01 00:05:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
80
- assert '2022-01-01 00:10:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
81
- assert '2022-01-01 00:15:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
82
- assert '2022-01-01 00:20:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
83
- assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
88
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
89
+ assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
90
+ assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
91
+ assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
92
+ assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
84
93
  ```
85
94
 
86
- ---
87
-
88
95
  ### Pipeline
89
96
 
97
+ The **Pipeline** object that is the core feature of this project.
98
+
90
99
  ```yaml
91
100
  run_py_local:
92
101
  type: ddeutil.workflow.pipeline.Pipeline
93
- ...
102
+ on: 'on_every_5_min'
103
+ params:
104
+ author-run:
105
+ type: str
106
+ run-date:
107
+ type: datetime
94
108
  ```
95
109
 
96
110
  ```python
@@ -100,27 +114,39 @@ pipe = Pipeline.from_loader(name='run_py_local', externals={})
100
114
  pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
101
115
  ```
102
116
 
103
- ## Examples
117
+ > [!NOTE]
118
+ > The above parameter use short declarative statement. You can pass a parameter
119
+ > type to the key of a parameter name.
120
+ > ```yaml
121
+ > params:
122
+ > author-run: str
123
+ > run-date: datetime
124
+ > ```
125
+ >
126
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
127
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
128
+
129
+ ## Usage
104
130
 
105
131
  This is examples that use workflow file for running common Data Engineering
106
132
  use-case.
107
133
 
108
- ### Python & Shell
134
+ > [!IMPORTANT]
135
+ > I recommend you to use `task` stage for all actions that you want to do with
136
+ > pipeline object.
109
137
 
110
- The state of doing lists that worker should to do. It be collection of the stage.
138
+ ### Python & Bash
111
139
 
112
140
  ```yaml
113
141
  run_py_local:
114
- type: ddeutil.workflow.pipeline.Pipeline
142
+ type: pipeline.Pipeline
115
143
  params:
116
- author-run:
117
- type: str
118
- run-date:
119
- type: datetime
144
+ author-run: str
145
+ run-date: datetime
120
146
  jobs:
121
147
  first-job:
122
148
  stages:
123
- - name: Printing Information
149
+ - name: "Printing Information"
124
150
  id: define-func
125
151
  run: |
126
152
  x = '${{ params.author-run }}'
@@ -129,7 +155,7 @@ run_py_local:
129
155
  def echo(name: str):
130
156
  print(f'Hello {name}')
131
157
 
132
- - name: Run Sequence and use var from Above
158
+ - name: "Run Sequence and use var from Above"
133
159
  vars:
134
160
  x: ${{ params.author-run }}
135
161
  run: |
@@ -137,16 +163,16 @@ run_py_local:
137
163
  # Change x value
138
164
  x: int = 1
139
165
 
140
- - name: Call Function
166
+ - name: "Call Function"
141
167
  vars:
142
168
  echo: ${{ stages.define-func.outputs.echo }}
143
169
  run: |
144
170
  echo('Caller')
145
171
  second-job:
146
172
  stages:
147
- - name: Echo Shell Script
173
+ - name: "Echo Bash Script"
148
174
  id: shell-echo
149
- shell: |
175
+ bash: |
150
176
  echo "Hello World from Shell"
151
177
  ```
152
178
 
@@ -164,24 +190,20 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
164
190
  > Hello World from Shell
165
191
  ```
166
192
 
167
- ---
168
-
169
- ### Tasks (Extract & Load)
193
+ ### Hook (Extract & Load)
170
194
 
171
195
  ```yaml
172
196
  pipe_el_pg_to_lake:
173
- type: ddeutil.workflow.pipeline.Pipeline
197
+ type: pipeline.Pipeline
174
198
  params:
175
- run-date:
176
- type: datetime
177
- author-email:
178
- type: str
199
+ run-date: datetime
200
+ author-email: str
179
201
  jobs:
180
202
  extract-load:
181
203
  stages:
182
204
  - name: "Extract Load from Postgres to Lake"
183
205
  id: extract-load
184
- task: tasks/postgres-to-delta@polars
206
+ uses: tasks/postgres-to-delta@polars
185
207
  with:
186
208
  source:
187
209
  conn: conn_postgres_url
@@ -193,15 +215,23 @@ pipe_el_pg_to_lake:
193
215
  endpoint: "/${{ params.name }}"
194
216
  ```
195
217
 
196
- ---
218
+ Implement hook:
197
219
 
198
- ### Tasks (Transform)
220
+ ```python
221
+ from ddeutil.workflow.utils import tag
199
222
 
200
- > I recommend you to use task for all actions that you want to do.
223
+ @tag('polars', alias='postgres-to-delta')
224
+ def postgres_to_delta(source, sink):
225
+ return {
226
+ "source": source, "sink": sink
227
+ }
228
+ ```
229
+
230
+ ### Hook (Transform)
201
231
 
202
232
  ```yaml
203
- pipe_hook_mssql_proc:
204
- type: ddeutil.workflow.pipeline.Pipeline
233
+ pipeline_hook_mssql_proc:
234
+ type: pipeline.Pipeline
205
235
  params:
206
236
  run_date: datetime
207
237
  sp_name: str
@@ -212,7 +242,7 @@ pipe_hook_mssql_proc:
212
242
  stages:
213
243
  - name: "Transform Data in MS SQL Server"
214
244
  id: transform
215
- task: tasks/mssql-proc@odbc
245
+ uses: tasks/mssql-proc@odbc
216
246
  with:
217
247
  exec: ${{ params.sp_name }}
218
248
  params:
@@ -222,16 +252,57 @@ pipe_hook_mssql_proc:
222
252
  target: ${{ params.target_name }}
223
253
  ```
224
254
 
225
- > [!NOTE]
226
- > The above parameter use short declarative statement. You can pass a parameter
227
- > type to the key of a parameter name.
255
+ Implement hook:
256
+
257
+ ```python
258
+ from ddeutil.workflow.utils import tag
259
+
260
+ @tag('odbc', alias='mssql-proc')
261
+ def odbc_mssql_procedure(_exec: str, params: dict):
262
+ return {
263
+ "exec": _exec, "params": params
264
+ }
265
+ ```
228
266
 
229
267
  ## Configuration
230
268
 
231
- ```text
269
+ ```bash
270
+ export WORKFLOW_ROOT_PATH=.
271
+ export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
272
+ export WORKFLOW_CORE_REGISTRY_FILTER=ddeutil.workflow.utils
273
+ export WORKFLOW_CORE_PATH_CONF=conf
274
+ export WORKFLOW_CORE_TIMEZONE=Asia/Bangkok
275
+ export WORKFLOW_CORE_DEFAULT_STAGE_ID=true
232
276
 
277
+ export WORKFLOW_CORE_MAX_PIPELINE_POKING=4
278
+ export WORKFLOW_CORE_MAX_JOB_PARALLEL=2
233
279
  ```
234
280
 
235
- ## License
281
+ Application config:
236
282
 
237
- This project was licensed under the terms of the [MIT license](LICENSE).
283
+ ```bash
284
+ export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
285
+ export WORKFLOW_APP_INTERVAL=10
286
+ ```
287
+
288
+ ## Deployment
289
+
290
+ This package able to run as a application service for receive manual trigger
291
+ from the master node via RestAPI or use to be Scheduler background service
292
+ like crontab job but via Python API.
293
+
294
+ ### Schedule Service
295
+
296
+ ```shell
297
+ (venv) $ python src.ddeutil.workflow.app
298
+ ```
299
+
300
+ ### API Server
301
+
302
+ ```shell
303
+ (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
304
+ ```
305
+
306
+ > [!NOTE]
307
+ > If this package already deploy, it able to use
308
+ > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`