ddeutil-workflow 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {ddeutil_workflow-0.0.4/src/ddeutil_workflow.egg-info → ddeutil_workflow-0.0.6}/PKG-INFO +118 -90
  2. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/README.md +111 -78
  3. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/pyproject.toml +8 -18
  4. ddeutil_workflow-0.0.6/src/ddeutil/workflow/__about__.py +1 -0
  5. {ddeutil_workflow-0.0.4/src/ddeutil/workflow/tasks → ddeutil_workflow-0.0.6/src/ddeutil/workflow}/__init__.py +4 -1
  6. ddeutil_workflow-0.0.4/src/ddeutil/workflow/__regex.py → ddeutil_workflow-0.0.6/src/ddeutil/workflow/__types.py +13 -3
  7. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/src/ddeutil/workflow/exceptions.py +13 -1
  8. ddeutil_workflow-0.0.6/src/ddeutil/workflow/loader.py +80 -0
  9. ddeutil_workflow-0.0.6/src/ddeutil/workflow/on.py +195 -0
  10. ddeutil_workflow-0.0.6/src/ddeutil/workflow/pipeline.py +497 -0
  11. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/__schedule.py → ddeutil_workflow-0.0.6/src/ddeutil/workflow/scheduler.py +222 -176
  12. ddeutil_workflow-0.0.6/src/ddeutil/workflow/stage.py +402 -0
  13. ddeutil_workflow-0.0.6/src/ddeutil/workflow/utils.py +378 -0
  14. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6/src/ddeutil_workflow.egg-info}/PKG-INFO +118 -90
  15. ddeutil_workflow-0.0.6/src/ddeutil_workflow.egg-info/SOURCES.txt +34 -0
  16. ddeutil_workflow-0.0.6/src/ddeutil_workflow.egg-info/requires.txt +7 -0
  17. ddeutil_workflow-0.0.6/tests/test__conf_exist.py +11 -0
  18. ddeutil_workflow-0.0.4/tests/test_base_local_and_global.py → ddeutil_workflow-0.0.6/tests/test__local_and_global.py +4 -4
  19. ddeutil_workflow-0.0.4/tests/test_base_regex.py → ddeutil_workflow-0.0.6/tests/test__regex.py +11 -3
  20. ddeutil_workflow-0.0.4/tests/test_schedule.py → ddeutil_workflow-0.0.6/tests/test_on.py +22 -4
  21. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/tests/test_pipeline.py +3 -9
  22. ddeutil_workflow-0.0.6/tests/test_pipeline_desc.py +11 -0
  23. ddeutil_workflow-0.0.6/tests/test_pipeline_if.py +28 -0
  24. ddeutil_workflow-0.0.6/tests/test_pipeline_matrix.py +87 -0
  25. ddeutil_workflow-0.0.6/tests/test_pipeline_on.py +12 -0
  26. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/tests/test_pipeline_params.py +1 -1
  27. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/tests/test_pipeline_run.py +44 -33
  28. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/tests/test_pipeline_task.py +11 -13
  29. ddeutil_workflow-0.0.6/tests/test_scheduler.py +118 -0
  30. ddeutil_workflow-0.0.6/tests/test_stage_trigger.py +10 -0
  31. ddeutil_workflow-0.0.6/tests/test_utils.py +8 -0
  32. ddeutil_workflow-0.0.6/tests/test_utils_result.py +22 -0
  33. ddeutil_workflow-0.0.4/src/ddeutil/workflow/__about__.py +0 -1
  34. ddeutil_workflow-0.0.4/src/ddeutil/workflow/__init__.py +0 -0
  35. ddeutil_workflow-0.0.4/src/ddeutil/workflow/__types.py +0 -12
  36. ddeutil_workflow-0.0.4/src/ddeutil/workflow/conn.py +0 -240
  37. ddeutil_workflow-0.0.4/src/ddeutil/workflow/loader.py +0 -174
  38. ddeutil_workflow-0.0.4/src/ddeutil/workflow/pipeline.py +0 -517
  39. ddeutil_workflow-0.0.4/src/ddeutil/workflow/schedule.py +0 -82
  40. ddeutil_workflow-0.0.4/src/ddeutil/workflow/tasks/_pandas.py +0 -54
  41. ddeutil_workflow-0.0.4/src/ddeutil/workflow/tasks/_polars.py +0 -92
  42. ddeutil_workflow-0.0.4/src/ddeutil/workflow/utils.py +0 -187
  43. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/__dataset.py +0 -127
  44. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/__dict.py +0 -333
  45. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/__init__.py +0 -0
  46. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/aws.py +0 -185
  47. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/az.py +0 -0
  48. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/minio.py +0 -11
  49. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/pd.py +0 -13
  50. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/pg.py +0 -11
  51. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/pl.py +0 -172
  52. ddeutil_workflow-0.0.4/src/ddeutil/workflow/vendors/sftp.py +0 -209
  53. ddeutil_workflow-0.0.4/src/ddeutil_workflow.egg-info/SOURCES.txt +0 -44
  54. ddeutil_workflow-0.0.4/src/ddeutil_workflow.egg-info/requires.txt +0 -12
  55. ddeutil_workflow-0.0.4/tests/test_base_data.py +0 -14
  56. ddeutil_workflow-0.0.4/tests/test_conn.py +0 -93
  57. ddeutil_workflow-0.0.4/tests/test_dataset.py +0 -90
  58. ddeutil_workflow-0.0.4/tests/test_loader.py +0 -6
  59. ddeutil_workflow-0.0.4/tests/test_pipeline_matrix.py +0 -29
  60. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/LICENSE +0 -0
  61. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/setup.cfg +0 -0
  62. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
  63. {ddeutil_workflow-0.0.4 → ddeutil_workflow-0.0.6}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddeutil-workflow
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Data Developer & Engineer Workflow Utility Objects
5
5
  Author-email: ddeutils <korawich.anu@gmail.com>
6
6
  License: MIT
@@ -9,7 +9,7 @@ Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
9
9
  Keywords: data,workflow,utility,pipeline
10
10
  Classifier: Topic :: Utilities
11
11
  Classifier: Natural Language :: English
12
- Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: Operating System :: OS Independent
15
15
  Classifier: Programming Language :: Python
@@ -23,35 +23,33 @@ Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
24
  Requires-Dist: fmtutil
25
25
  Requires-Dist: ddeutil-io
26
- Requires-Dist: python-dotenv
27
- Provides-Extra: test
28
- Requires-Dist: sqlalchemy==2.0.30; extra == "test"
29
- Requires-Dist: paramiko==3.4.0; extra == "test"
30
- Requires-Dist: sshtunnel==0.4.0; extra == "test"
31
- Requires-Dist: boto3==1.34.117; extra == "test"
32
- Requires-Dist: fsspec==2024.5.0; extra == "test"
33
- Requires-Dist: polars==0.20.31; extra == "test"
34
- Requires-Dist: pyarrow==16.1.0; extra == "test"
35
-
36
- # Data Utility: _Workflow_
26
+ Requires-Dist: python-dotenv==1.0.1
27
+ Provides-Extra: app
28
+ Requires-Dist: fastapi==0.112.0; extra == "app"
29
+ Requires-Dist: apscheduler[sqlalchemy]==3.10.4; extra == "app"
30
+
31
+ # Workflow
37
32
 
38
33
  [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
39
34
  [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
40
35
  [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
36
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
41
37
 
42
38
  **Table of Contents**:
43
39
 
44
40
  - [Installation](#installation)
45
41
  - [Getting Started](#getting-started)
46
- - [Connection](#connection)
47
- - [Dataset](#dataset)
48
- - [Schedule](#schedule)
49
- - [Examples](#examples)
50
- - [Python](#python)
51
- - [Tasks (EL)](#tasks-extract--load)
52
- - [Hooks (T)](#hooks-transform)
53
-
54
- This **Utility Workflow** objects was created for easy to make a simple metadata
42
+ - [Core Features](#core-features)
43
+ - [On](#on)
44
+ - [Pipeline](#pipeline)
45
+ - [Usage](#usage)
46
+ - [Python & Bash](#python--bash)
47
+ - [Hook (EL)](#hook-extract--load)
48
+ - [Hook (T)](#hook-transform)
49
+ - [Configuration](#configuration)
50
+ - [Deployment](#deployment)
51
+
52
+ This **Workflow** objects was created for easy to make a simple metadata
55
53
  driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
56
54
 
57
55
  I think we should not create the multiple pipeline per use-case if we able to
@@ -74,13 +72,18 @@ pipeline.
74
72
  pip install ddeutil-workflow
75
73
  ```
76
74
 
77
- This project need `ddeutil-io`, `ddeutil-model` extension namespace packages.
75
+ This project need `ddeutil-io` extension namespace packages. If you want to install
76
+ this package with application add-ons, you should add `app` in installation;
77
+
78
+ ```shell
79
+ pip install ddeutil-workflow[app]
80
+ ```
78
81
 
79
82
  ## Getting Started
80
83
 
81
84
  The first step, you should start create the connections and datasets for In and
82
85
  Out of you data that want to use in pipeline of workflow. Some of this component
83
- is similar component of the **Airflow** because I like it concepts.
86
+ is similar component of the **Airflow** because I like it orchestration concepts.
84
87
 
85
88
  The main feature of this project is the `Pipeline` object that can call any
86
89
  registries function. The pipeline can handle everything that you want to do, it
@@ -91,88 +94,84 @@ will passing parameters and catching the output for re-use it to next step.
91
94
  > dynamic registries instead of main features because it have a lot of maintain
92
95
  > vendor codes and deps. (I do not have time to handle this features)
93
96
 
94
- ### Connection
97
+ ### On
95
98
 
96
- The connection for worker able to do any thing.
99
+ The **On** is schedule object.
97
100
 
98
101
  ```yaml
99
- conn_postgres_data:
100
- type: conn.Postgres
101
- url: 'postgres//username:${ENV_PASS}@hostname:port/database?echo=True&time_out=10'
102
+ on_every_5_min:
103
+ type: on.On
104
+ cron: "*/5 * * * *"
102
105
  ```
103
106
 
104
107
  ```python
105
- from ddeutil.workflow.conn import Conn
108
+ from ddeutil.workflow.on import On
106
109
 
107
- conn = Conn.from_loader(name='conn_postgres_data', externals={})
108
- assert conn.ping()
109
- ```
110
+ schedule = On.from_loader(name='on_every_5_min', externals={})
111
+ assert '*/5 * * * *' == str(schedule.cronjob)
110
112
 
111
- ### Dataset
112
-
113
- The dataset is define any objects on the connection. This feature was implemented
114
- on `/vendors` because it has a lot of tools that can interact with any data systems
115
- in the data tool stacks.
116
-
117
- ```yaml
118
- ds_postgres_customer_tbl:
119
- type: dataset.PostgresTbl
120
- conn: 'conn_postgres_data'
121
- features:
122
- id: serial primary key
123
- name: varchar( 100 ) not null
113
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
114
+ assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
115
+ assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
116
+ assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
117
+ assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
124
118
  ```
125
119
 
126
- ```python
127
- from ddeutil.workflow.vendors.pg import PostgresTbl
128
-
129
- dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
130
- assert dataset.exists()
131
- ```
120
+ ### Pipeline
132
121
 
133
- ### Schedule
122
+ The **Pipeline** object that is the core feature of this project.
134
123
 
135
124
  ```yaml
136
- schd_for_node:
137
- type: schedule.Schedule
138
- cron: "*/5 * * * *"
125
+ run_py_local:
126
+ type: ddeutil.workflow.pipeline.Pipeline
127
+ on: 'on_every_5_min'
128
+ params:
129
+ author-run:
130
+ type: str
131
+ run-date:
132
+ type: datetime
139
133
  ```
140
134
 
141
135
  ```python
142
- from ddeutil.workflow.schedule import Schedule
143
-
144
- scdl = Schedule.from_loader(name='schd_for_node', externals={})
145
- assert '*/5 * * * *' == str(scdl.cronjob)
136
+ from ddeutil.workflow.pipeline import Pipeline
146
137
 
147
- cron_iterate = scdl.generate('2022-01-01 00:00:00')
148
- assert '2022-01-01 00:05:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
149
- assert '2022-01-01 00:10:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
150
- assert '2022-01-01 00:15:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
151
- assert '2022-01-01 00:20:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
152
- assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
138
+ pipe = Pipeline.from_loader(name='run_py_local', externals={})
139
+ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
153
140
  ```
154
141
 
155
- ## Examples
142
+ > [!NOTE]
143
+ > The above parameter use short declarative statement. You can pass a parameter
144
+ > type to the key of a parameter name.
145
+ > ```yaml
146
+ > params:
147
+ > author-run: str
148
+ > run-date: datetime
149
+ > ```
150
+ >
151
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
152
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
153
+
154
+ ## Usage
156
155
 
157
156
  This is examples that use workflow file for running common Data Engineering
158
157
  use-case.
159
158
 
160
- ### Python
159
+ > [!IMPORTANT]
160
+ > I recommend you to use `task` stage for all actions that you want to do with
161
+ > pipeline object.
161
162
 
162
- The state of doing lists that worker should to do. It be collection of the stage.
163
+ ### Python & Bash
163
164
 
164
165
  ```yaml
165
166
  run_py_local:
166
- type: ddeutil.workflow.pipe.Pipeline
167
+ type: pipeline.Pipeline
167
168
  params:
168
- author-run:
169
- type: str
170
- run-date:
171
- type: datetime
169
+ author-run: str
170
+ run-date: datetime
172
171
  jobs:
173
172
  first-job:
174
173
  stages:
175
- - name: Printing Information
174
+ - name: "Printing Information"
176
175
  id: define-func
177
176
  run: |
178
177
  x = '${{ params.author-run }}'
@@ -181,7 +180,7 @@ run_py_local:
181
180
  def echo(name: str):
182
181
  print(f'Hello {name}')
183
182
 
184
- - name: Run Sequence and use var from Above
183
+ - name: "Run Sequence and use var from Above"
185
184
  vars:
186
185
  x: ${{ params.author-run }}
187
186
  run: |
@@ -189,11 +188,17 @@ run_py_local:
189
188
  # Change x value
190
189
  x: int = 1
191
190
 
192
- - name: Call Function
191
+ - name: "Call Function"
193
192
  vars:
194
193
  echo: ${{ stages.define-func.outputs.echo }}
195
194
  run: |
196
195
  echo('Caller')
196
+ second-job:
197
+ stages:
198
+ - name: "Echo Bash Script"
199
+ id: shell-echo
200
+ bash: |
201
+ echo "Hello World from Shell"
197
202
  ```
198
203
 
199
204
  ```python
@@ -207,24 +212,23 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
207
212
  > Hello Local Workflow
208
213
  > Receive x from above with Local Workflow
209
214
  > Hello Caller
215
+ > Hello World from Shell
210
216
  ```
211
217
 
212
- ### Tasks (Extract & Load)
218
+ ### Hook (Extract & Load)
213
219
 
214
220
  ```yaml
215
221
  pipe_el_pg_to_lake:
216
- type: ddeutil.workflow.pipe.Pipeline
222
+ type: pipeline.Pipeline
217
223
  params:
218
- run-date:
219
- type: datetime
220
- author-email:
221
- type: str
224
+ run-date: datetime
225
+ author-email: str
222
226
  jobs:
223
227
  extract-load:
224
228
  stages:
225
229
  - name: "Extract Load from Postgres to Lake"
226
230
  id: extract-load
227
- task: tasks/postgres-to-delta@polars
231
+ uses: tasks/postgres-to-delta@polars
228
232
  with:
229
233
  source:
230
234
  conn: conn_postgres_url
@@ -236,11 +240,11 @@ pipe_el_pg_to_lake:
236
240
  endpoint: "/${{ params.name }}"
237
241
  ```
238
242
 
239
- ### Tasks (Transform)
243
+ ### Hook (Transform)
240
244
 
241
245
  ```yaml
242
- pipe_hook_mssql_proc:
243
- type: ddeutil.workflow.pipe.Pipeline
246
+ pipeline_hook_mssql_proc:
247
+ type: pipeline.Pipeline
244
248
  params:
245
249
  run_date: datetime
246
250
  sp_name: str
@@ -251,7 +255,7 @@ pipe_hook_mssql_proc:
251
255
  stages:
252
256
  - name: "Transform Data in MS SQL Server"
253
257
  id: transform
254
- task: tasks/mssql-proc@odbc
258
+ uses: tasks/mssql-proc@odbc
255
259
  with:
256
260
  exec: ${{ params.sp_name }}
257
261
  params:
@@ -261,6 +265,30 @@ pipe_hook_mssql_proc:
261
265
  target: ${{ params.target_name }}
262
266
  ```
263
267
 
264
- ## License
268
+ ## Configuration
265
269
 
266
- This project was licensed under the terms of the [MIT license](LICENSE).
270
+ ```bash
271
+ export WORKFLOW_ROOT_PATH=.
272
+ export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
273
+ export WORKFLOW_CORE_PATH_CONF=conf
274
+ ```
275
+
276
+ Application config:
277
+
278
+ ```bash
279
+ export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
280
+ export WORKFLOW_APP_INTERVAL=10
281
+ ```
282
+
283
+ ## Deployment
284
+
285
+ This package able to run as a application service for receive manual trigger
286
+ from the master node via RestAPI.
287
+
288
+ > [!WARNING]
289
+ > This feature do not start yet because I still research and find the best tool
290
+ > to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
291
+
292
+ ```shell
293
+ (venv) $ workflow start -p 7070
294
+ ```
@@ -1,22 +1,25 @@
1
- # Data Utility: _Workflow_
1
+ # Workflow
2
2
 
3
3
  [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
4
4
  [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
5
5
  [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
6
+ [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
6
7
 
7
8
  **Table of Contents**:
8
9
 
9
10
  - [Installation](#installation)
10
11
  - [Getting Started](#getting-started)
11
- - [Connection](#connection)
12
- - [Dataset](#dataset)
13
- - [Schedule](#schedule)
14
- - [Examples](#examples)
15
- - [Python](#python)
16
- - [Tasks (EL)](#tasks-extract--load)
17
- - [Hooks (T)](#hooks-transform)
18
-
19
- This **Utility Workflow** objects was created for easy to make a simple metadata
12
+ - [Core Features](#core-features)
13
+ - [On](#on)
14
+ - [Pipeline](#pipeline)
15
+ - [Usage](#usage)
16
+ - [Python & Bash](#python--bash)
17
+ - [Hook (EL)](#hook-extract--load)
18
+ - [Hook (T)](#hook-transform)
19
+ - [Configuration](#configuration)
20
+ - [Deployment](#deployment)
21
+
22
+ This **Workflow** objects was created for easy to make a simple metadata
20
23
  driven pipeline that able to **ETL, T, EL, or ELT** by `.yaml` file.
21
24
 
22
25
  I think we should not create the multiple pipeline per use-case if we able to
@@ -39,13 +42,18 @@ pipeline.
39
42
  pip install ddeutil-workflow
40
43
  ```
41
44
 
42
- This project need `ddeutil-io`, `ddeutil-model` extension namespace packages.
45
+ This project need `ddeutil-io` extension namespace packages. If you want to install
46
+ this package with application add-ons, you should add `app` in installation;
47
+
48
+ ```shell
49
+ pip install ddeutil-workflow[app]
50
+ ```
43
51
 
44
52
  ## Getting Started
45
53
 
46
54
  The first step, you should start create the connections and datasets for In and
47
55
  Out of you data that want to use in pipeline of workflow. Some of this component
48
- is similar component of the **Airflow** because I like it concepts.
56
+ is similar component of the **Airflow** because I like it orchestration concepts.
49
57
 
50
58
  The main feature of this project is the `Pipeline` object that can call any
51
59
  registries function. The pipeline can handle everything that you want to do, it
@@ -56,88 +64,84 @@ will passing parameters and catching the output for re-use it to next step.
56
64
  > dynamic registries instead of main features because it have a lot of maintain
57
65
  > vendor codes and deps. (I do not have time to handle this features)
58
66
 
59
- ### Connection
67
+ ### On
60
68
 
61
- The connection for worker able to do any thing.
69
+ The **On** is schedule object.
62
70
 
63
71
  ```yaml
64
- conn_postgres_data:
65
- type: conn.Postgres
66
- url: 'postgres//username:${ENV_PASS}@hostname:port/database?echo=True&time_out=10'
72
+ on_every_5_min:
73
+ type: on.On
74
+ cron: "*/5 * * * *"
67
75
  ```
68
76
 
69
77
  ```python
70
- from ddeutil.workflow.conn import Conn
71
-
72
- conn = Conn.from_loader(name='conn_postgres_data', externals={})
73
- assert conn.ping()
74
- ```
78
+ from ddeutil.workflow.on import On
75
79
 
76
- ### Dataset
80
+ schedule = On.from_loader(name='on_every_5_min', externals={})
81
+ assert '*/5 * * * *' == str(schedule.cronjob)
77
82
 
78
- The dataset is define any objects on the connection. This feature was implemented
79
- on `/vendors` because it has a lot of tools that can interact with any data systems
80
- in the data tool stacks.
81
-
82
- ```yaml
83
- ds_postgres_customer_tbl:
84
- type: dataset.PostgresTbl
85
- conn: 'conn_postgres_data'
86
- features:
87
- id: serial primary key
88
- name: varchar( 100 ) not null
83
+ cron_iter = schedule.generate('2022-01-01 00:00:00')
84
+ assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
85
+ assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
86
+ assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
87
+ assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
89
88
  ```
90
89
 
91
- ```python
92
- from ddeutil.workflow.vendors.pg import PostgresTbl
93
-
94
- dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
95
- assert dataset.exists()
96
- ```
90
+ ### Pipeline
97
91
 
98
- ### Schedule
92
+ The **Pipeline** object that is the core feature of this project.
99
93
 
100
94
  ```yaml
101
- schd_for_node:
102
- type: schedule.Schedule
103
- cron: "*/5 * * * *"
95
+ run_py_local:
96
+ type: ddeutil.workflow.pipeline.Pipeline
97
+ on: 'on_every_5_min'
98
+ params:
99
+ author-run:
100
+ type: str
101
+ run-date:
102
+ type: datetime
104
103
  ```
105
104
 
106
105
  ```python
107
- from ddeutil.workflow.schedule import Schedule
108
-
109
- scdl = Schedule.from_loader(name='schd_for_node', externals={})
110
- assert '*/5 * * * *' == str(scdl.cronjob)
106
+ from ddeutil.workflow.pipeline import Pipeline
111
107
 
112
- cron_iterate = scdl.generate('2022-01-01 00:00:00')
113
- assert '2022-01-01 00:05:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
114
- assert '2022-01-01 00:10:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
115
- assert '2022-01-01 00:15:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
116
- assert '2022-01-01 00:20:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
117
- assert '2022-01-01 00:25:00' f"{cron_iterate.next:%Y-%m-%d %H:%M:%S}"
108
+ pipe = Pipeline.from_loader(name='run_py_local', externals={})
109
+ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
118
110
  ```
119
111
 
120
- ## Examples
112
+ > [!NOTE]
113
+ > The above parameter use short declarative statement. You can pass a parameter
114
+ > type to the key of a parameter name.
115
+ > ```yaml
116
+ > params:
117
+ > author-run: str
118
+ > run-date: datetime
119
+ > ```
120
+ >
121
+ > And for the type, you can remove `ddeutil.workflow` prefix because we can find
122
+ > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
123
+
124
+ ## Usage
121
125
 
122
126
  This is examples that use workflow file for running common Data Engineering
123
127
  use-case.
124
128
 
125
- ### Python
129
+ > [!IMPORTANT]
130
+ > I recommend you to use `task` stage for all actions that you want to do with
131
+ > pipeline object.
126
132
 
127
- The state of doing lists that worker should to do. It be collection of the stage.
133
+ ### Python & Bash
128
134
 
129
135
  ```yaml
130
136
  run_py_local:
131
- type: ddeutil.workflow.pipe.Pipeline
137
+ type: pipeline.Pipeline
132
138
  params:
133
- author-run:
134
- type: str
135
- run-date:
136
- type: datetime
139
+ author-run: str
140
+ run-date: datetime
137
141
  jobs:
138
142
  first-job:
139
143
  stages:
140
- - name: Printing Information
144
+ - name: "Printing Information"
141
145
  id: define-func
142
146
  run: |
143
147
  x = '${{ params.author-run }}'
@@ -146,7 +150,7 @@ run_py_local:
146
150
  def echo(name: str):
147
151
  print(f'Hello {name}')
148
152
 
149
- - name: Run Sequence and use var from Above
153
+ - name: "Run Sequence and use var from Above"
150
154
  vars:
151
155
  x: ${{ params.author-run }}
152
156
  run: |
@@ -154,11 +158,17 @@ run_py_local:
154
158
  # Change x value
155
159
  x: int = 1
156
160
 
157
- - name: Call Function
161
+ - name: "Call Function"
158
162
  vars:
159
163
  echo: ${{ stages.define-func.outputs.echo }}
160
164
  run: |
161
165
  echo('Caller')
166
+ second-job:
167
+ stages:
168
+ - name: "Echo Bash Script"
169
+ id: shell-echo
170
+ bash: |
171
+ echo "Hello World from Shell"
162
172
  ```
163
173
 
164
174
  ```python
@@ -172,24 +182,23 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
172
182
  > Hello Local Workflow
173
183
  > Receive x from above with Local Workflow
174
184
  > Hello Caller
185
+ > Hello World from Shell
175
186
  ```
176
187
 
177
- ### Tasks (Extract & Load)
188
+ ### Hook (Extract & Load)
178
189
 
179
190
  ```yaml
180
191
  pipe_el_pg_to_lake:
181
- type: ddeutil.workflow.pipe.Pipeline
192
+ type: pipeline.Pipeline
182
193
  params:
183
- run-date:
184
- type: datetime
185
- author-email:
186
- type: str
194
+ run-date: datetime
195
+ author-email: str
187
196
  jobs:
188
197
  extract-load:
189
198
  stages:
190
199
  - name: "Extract Load from Postgres to Lake"
191
200
  id: extract-load
192
- task: tasks/postgres-to-delta@polars
201
+ uses: tasks/postgres-to-delta@polars
193
202
  with:
194
203
  source:
195
204
  conn: conn_postgres_url
@@ -201,11 +210,11 @@ pipe_el_pg_to_lake:
201
210
  endpoint: "/${{ params.name }}"
202
211
  ```
203
212
 
204
- ### Tasks (Transform)
213
+ ### Hook (Transform)
205
214
 
206
215
  ```yaml
207
- pipe_hook_mssql_proc:
208
- type: ddeutil.workflow.pipe.Pipeline
216
+ pipeline_hook_mssql_proc:
217
+ type: pipeline.Pipeline
209
218
  params:
210
219
  run_date: datetime
211
220
  sp_name: str
@@ -216,7 +225,7 @@ pipe_hook_mssql_proc:
216
225
  stages:
217
226
  - name: "Transform Data in MS SQL Server"
218
227
  id: transform
219
- task: tasks/mssql-proc@odbc
228
+ uses: tasks/mssql-proc@odbc
220
229
  with:
221
230
  exec: ${{ params.sp_name }}
222
231
  params:
@@ -226,6 +235,30 @@ pipe_hook_mssql_proc:
226
235
  target: ${{ params.target_name }}
227
236
  ```
228
237
 
229
- ## License
238
+ ## Configuration
230
239
 
231
- This project was licensed under the terms of the [MIT license](LICENSE).
240
+ ```bash
241
+ export WORKFLOW_ROOT_PATH=.
242
+ export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
243
+ export WORKFLOW_CORE_PATH_CONF=conf
244
+ ```
245
+
246
+ Application config:
247
+
248
+ ```bash
249
+ export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
250
+ export WORKFLOW_APP_INTERVAL=10
251
+ ```
252
+
253
+ ## Deployment
254
+
255
+ This package able to run as a application service for receive manual trigger
256
+ from the master node via RestAPI.
257
+
258
+ > [!WARNING]
259
+ > This feature do not start yet because I still research and find the best tool
260
+ > to use it provision an app service, like `starlette`, `fastapi`, `apscheduler`.
261
+
262
+ ```shell
263
+ (venv) $ workflow start -p 7070
264
+ ```