ddeutil-workflow 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/app.py DELETED
@@ -1,41 +0,0 @@
1
- # ------------------------------------------------------------------------------
2
- # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
- # Licensed under the MIT License. See LICENSE in the project root for
4
- # license information.
5
- # ------------------------------------------------------------------------------
6
- import functools
7
- import time
8
-
9
- import schedule
10
-
11
-
12
- def catch_exceptions(cancel_on_failure=False):
13
- def catch_exceptions_decorator(job_func):
14
- @functools.wraps(job_func)
15
- def wrapper(*args, **kwargs):
16
- try:
17
- return job_func(*args, **kwargs)
18
- except Exception as err:
19
- print(err)
20
-
21
- if cancel_on_failure:
22
- return schedule.CancelJob
23
-
24
- return wrapper
25
-
26
- return catch_exceptions_decorator
27
-
28
-
29
- @catch_exceptions(cancel_on_failure=True)
30
- def bad_task():
31
- return 1 / 0
32
-
33
-
34
- schedule.every(5).seconds.do(bad_task)
35
-
36
- if __name__ == "__main__":
37
- while True:
38
- schedule.run_pending()
39
- time.sleep(1)
40
- if not schedule.get_jobs():
41
- break
@@ -1,341 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: ddeutil-workflow
3
- Version: 0.0.7
4
- Summary: Data Developer & Engineer Workflow Utility Objects
5
- Author-email: ddeutils <korawich.anu@gmail.com>
6
- License: MIT
7
- Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
8
- Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
9
- Keywords: data,workflow,utility,pipeline
10
- Classifier: Topic :: Utilities
11
- Classifier: Natural Language :: English
12
- Classifier: Development Status :: 4 - Beta
13
- Classifier: Intended Audience :: Developers
14
- Classifier: Operating System :: OS Independent
15
- Classifier: Programming Language :: Python
16
- Classifier: Programming Language :: Python :: 3 :: Only
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Requires-Python: >=3.9.13
22
- Description-Content-Type: text/markdown
23
- License-File: LICENSE
24
- Requires-Dist: fmtutil
25
- Requires-Dist: ddeutil-io
26
- Requires-Dist: python-dotenv ==1.0.1
27
- Provides-Extra: api
28
- Requires-Dist: fastapi[standard] ==0.112.0 ; extra == 'api'
29
- Requires-Dist: apscheduler[sqlalchemy] <4.0.0,==3.10.4 ; extra == 'api'
30
- Requires-Dist: croniter ==3.0.3 ; extra == 'api'
31
- Provides-Extra: app
32
- Requires-Dist: schedule <2.0.0,==1.2.2 ; extra == 'app'
33
-
34
- # Workflow
35
-
36
- [![test](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
37
- [![python support version](https://img.shields.io/pypi/pyversions/ddeutil-workflow)](https://pypi.org/project/ddeutil-workflow/)
38
- [![size](https://img.shields.io/github/languages/code-size/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow)
39
- [![gh license](https://img.shields.io/github/license/ddeutils/ddeutil-workflow)](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
40
-
41
- **Table of Contents**:
42
-
43
- - [Installation](#installation)
44
- - [Getting Started](#getting-started)
45
- - [On](#on)
46
- - [Pipeline](#pipeline)
47
- - [Usage](#usage)
48
- - [Python & Bash](#python--bash)
49
- - [Hook (EL)](#hook-extract--load)
50
- - [Hook (T)](#hook-transform)
51
- - [Configuration](#configuration)
52
- - [Deployment](#deployment)
53
-
54
- This **Workflow** objects was created for easy to make a simple metadata
55
- driven for data pipeline orchestration that able to use for **ETL, T, EL, or
56
- ELT** by a `.yaml` file template.
57
-
58
- In my opinion, I think it should not create duplicate pipeline codes if I can
59
- write with dynamic input parameters on the one template pipeline that just change
60
- the input parameters per use-case instead.
61
- This way I can handle a lot of logical pipelines in our orgs with only metadata
62
- configuration. It called **Metadata Driven Data Pipeline**.
63
-
64
- Next, we should get some monitoring tools for manage logging that return from
65
- pipeline running. Because it not show us what is a use-case that running data
66
- pipeline.
67
-
68
- > [!NOTE]
69
- > _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
70
- > and all of config file from several data orchestration framework tools from my
71
- > experience on Data Engineer.
72
-
73
- ## Installation
74
-
75
- ```shell
76
- pip install ddeutil-workflow
77
- ```
78
-
79
- This project need `ddeutil-io` extension namespace packages. If you want to install
80
- this package with application add-ons, you should add `app` in installation;
81
-
82
- ```shell
83
- pip install ddeutil-workflow[app]
84
- ```
85
-
86
- ```shell
87
- pip install ddeutil-workflow[api]
88
- ```
89
-
90
- ## Getting Started
91
-
92
- The first step, you should start create the connections and datasets for In and
93
- Out of you data that want to use in pipeline of workflow. Some of this component
94
- is similar component of the **Airflow** because I like it orchestration concepts.
95
-
96
- The main feature of this project is the `Pipeline` object that can call any
97
- registries function. The pipeline can handle everything that you want to do, it
98
- will passing parameters and catching the output for re-use it to next step.
99
-
100
- > [!IMPORTANT]
101
- > In the future of this project, I will drop the connection and dataset to
102
- > dynamic registries instead of main features because it have a lot of maintain
103
- > vendor codes and deps. (I do not have time to handle this features)
104
-
105
- ### On
106
-
107
- The **On** is schedule object.
108
-
109
- ```yaml
110
- on_every_5_min:
111
- type: on.On
112
- cron: "*/5 * * * *"
113
- ```
114
-
115
- ```python
116
- from ddeutil.workflow.on import On
117
-
118
- schedule = On.from_loader(name='on_every_5_min', externals={})
119
- assert '*/5 * * * *' == str(schedule.cronjob)
120
-
121
- cron_iter = schedule.generate('2022-01-01 00:00:00')
122
- assert '2022-01-01 00:05:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
123
- assert '2022-01-01 00:10:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
124
- assert '2022-01-01 00:15:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
125
- assert '2022-01-01 00:20:00' f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
126
- ```
127
-
128
- ### Pipeline
129
-
130
- The **Pipeline** object that is the core feature of this project.
131
-
132
- ```yaml
133
- run_py_local:
134
- type: ddeutil.workflow.pipeline.Pipeline
135
- on: 'on_every_5_min'
136
- params:
137
- author-run:
138
- type: str
139
- run-date:
140
- type: datetime
141
- ```
142
-
143
- ```python
144
- from ddeutil.workflow.pipeline import Pipeline
145
-
146
- pipe = Pipeline.from_loader(name='run_py_local', externals={})
147
- pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
148
- ```
149
-
150
- > [!NOTE]
151
- > The above parameter use short declarative statement. You can pass a parameter
152
- > type to the key of a parameter name.
153
- > ```yaml
154
- > params:
155
- > author-run: str
156
- > run-date: datetime
157
- > ```
158
- >
159
- > And for the type, you can remove `ddeutil.workflow` prefix because we can find
160
- > it by looping search from `WORKFLOW_CORE_REGISTRY` value.
161
-
162
- ## Usage
163
-
164
- This is examples that use workflow file for running common Data Engineering
165
- use-case.
166
-
167
- > [!IMPORTANT]
168
- > I recommend you to use `task` stage for all actions that you want to do with
169
- > pipeline object.
170
-
171
- ### Python & Bash
172
-
173
- ```yaml
174
- run_py_local:
175
- type: pipeline.Pipeline
176
- params:
177
- author-run: str
178
- run-date: datetime
179
- jobs:
180
- first-job:
181
- stages:
182
- - name: "Printing Information"
183
- id: define-func
184
- run: |
185
- x = '${{ params.author-run }}'
186
- print(f'Hello {x}')
187
-
188
- def echo(name: str):
189
- print(f'Hello {name}')
190
-
191
- - name: "Run Sequence and use var from Above"
192
- vars:
193
- x: ${{ params.author-run }}
194
- run: |
195
- print(f'Receive x from above with {x}')
196
- # Change x value
197
- x: int = 1
198
-
199
- - name: "Call Function"
200
- vars:
201
- echo: ${{ stages.define-func.outputs.echo }}
202
- run: |
203
- echo('Caller')
204
- second-job:
205
- stages:
206
- - name: "Echo Bash Script"
207
- id: shell-echo
208
- bash: |
209
- echo "Hello World from Shell"
210
- ```
211
-
212
- ```python
213
- from ddeutil.workflow.pipeline import Pipeline
214
-
215
- pipe = Pipeline.from_loader(name='run_py_local', externals={})
216
- pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
217
- ```
218
-
219
- ```shell
220
- > Hello Local Workflow
221
- > Receive x from above with Local Workflow
222
- > Hello Caller
223
- > Hello World from Shell
224
- ```
225
-
226
- ### Hook (Extract & Load)
227
-
228
- ```yaml
229
- pipe_el_pg_to_lake:
230
- type: pipeline.Pipeline
231
- params:
232
- run-date: datetime
233
- author-email: str
234
- jobs:
235
- extract-load:
236
- stages:
237
- - name: "Extract Load from Postgres to Lake"
238
- id: extract-load
239
- uses: tasks/postgres-to-delta@polars
240
- with:
241
- source:
242
- conn: conn_postgres_url
243
- query: |
244
- select * from ${{ params.name }}
245
- where update_date = '${{ params.datetime }}'
246
- sink:
247
- conn: conn_az_lake
248
- endpoint: "/${{ params.name }}"
249
- ```
250
-
251
- Implement hook:
252
-
253
- ```python
254
- from ddeutil.workflow.utils import tag
255
-
256
- @tag('polars', alias='postgres-to-delta')
257
- def postgres_to_delta(source, sink):
258
- return {
259
- "source": source, "sink": sink
260
- }
261
- ```
262
-
263
- ### Hook (Transform)
264
-
265
- ```yaml
266
- pipeline_hook_mssql_proc:
267
- type: pipeline.Pipeline
268
- params:
269
- run_date: datetime
270
- sp_name: str
271
- source_name: str
272
- target_name: str
273
- jobs:
274
- transform:
275
- stages:
276
- - name: "Transform Data in MS SQL Server"
277
- id: transform
278
- uses: tasks/mssql-proc@odbc
279
- with:
280
- exec: ${{ params.sp_name }}
281
- params:
282
- run_mode: "T"
283
- run_date: ${{ params.run_date }}
284
- source: ${{ params.source_name }}
285
- target: ${{ params.target_name }}
286
- ```
287
-
288
- Implement hook:
289
-
290
- ```python
291
- from ddeutil.workflow.utils import tag
292
-
293
- @tag('odbc', alias='mssql-proc')
294
- def odbc_mssql_procedure(_exec: str, params: dict):
295
- return {
296
- "exec": _exec, "params": params
297
- }
298
- ```
299
-
300
- ## Configuration
301
-
302
- ```bash
303
- export WORKFLOW_ROOT_PATH=.
304
- export WORKFLOW_CORE_REGISTRY=ddeutil.workflow,tests.utils
305
- export WORKFLOW_CORE_REGISTRY_FILTER=ddeutil.workflow.utils
306
- export WORKFLOW_CORE_PATH_CONF=conf
307
- export WORKFLOW_CORE_TIMEZONE=Asia/Bangkok
308
- export WORKFLOW_CORE_DEFAULT_STAGE_ID=true
309
-
310
- export WORKFLOW_CORE_MAX_PIPELINE_POKING=4
311
- export WORKFLOW_CORE_MAX_JOB_PARALLEL=2
312
- ```
313
-
314
- Application config:
315
-
316
- ```bash
317
- export WORKFLOW_APP_DB_URL=postgresql+asyncpg://user:pass@localhost:5432/schedule
318
- export WORKFLOW_APP_INTERVAL=10
319
- ```
320
-
321
- ## Deployment
322
-
323
- This package able to run as a application service for receive manual trigger
324
- from the master node via RestAPI or use to be Scheduler background service
325
- like crontab job but via Python API.
326
-
327
- ### Schedule Service
328
-
329
- ```shell
330
- (venv) $ python src.ddeutil.workflow.app
331
- ```
332
-
333
- ### API Server
334
-
335
- ```shell
336
- (venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
337
- ```
338
-
339
- > [!NOTE]
340
- > If this package already deploy, it able to use
341
- > `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
@@ -1,20 +0,0 @@
1
- ddeutil/workflow/__about__.py,sha256=b23XabBwtuoPOLmS_Hj_gSA4LZ0fRfAkACM6c3szVoc,27
2
- ddeutil/workflow/__init__.py,sha256=4PEL3RdHmUowK0Dz-tK7fO0wvFX4u9CLd0Up7b3lrAQ,760
3
- ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
4
- ddeutil/workflow/api.py,sha256=d2Mmv9jTtN3FITIy-2mivyAKdBOGZxtkNWRMPbCLlFI,3341
5
- ddeutil/workflow/app.py,sha256=GbdwvUkE8lO2Ze4pZ0-J-7p9mcZAaORfjkHwW_oZIP0,1076
6
- ddeutil/workflow/exceptions.py,sha256=BH7COn_3uz3z7oJBZOQGiuo8osBFgeXL8HYymnjCOPQ,671
7
- ddeutil/workflow/loader.py,sha256=_ZD-XP5P7VbUeqItrUVPaKIZu6dMUZ2aywbCbReW1hQ,2778
8
- ddeutil/workflow/log.py,sha256=_GJEdJr7bqpcQDxZjrqHd-hkiW3NKFaVoR6voE6Ty0o,952
9
- ddeutil/workflow/on.py,sha256=YoEqDbzJUwqOA3JRltbvlYr0rNTtxdmb7cWMxl8U19k,6717
10
- ddeutil/workflow/pipeline.py,sha256=dKF09TFS_v5TCD-5o8tp1UhB5sGuWIQu4zl_UFtlIC0,25951
11
- ddeutil/workflow/repeat.py,sha256=sNoRfbOR4cYm_edrSvlVy9N8Dk_osLIq9FC5GMZz32M,4621
12
- ddeutil/workflow/route.py,sha256=Ck_O1xJwI-vKkMJr37El0-1PGKlwKF8__DDNWVQrf0A,2079
13
- ddeutil/workflow/scheduler.py,sha256=FqmkvWCqwJ4eRf8aDn5Ce4FcNWqmcvu2aTTfL34lfgs,22184
14
- ddeutil/workflow/stage.py,sha256=z05bKk2QFQDXjidSnQYCVOdceSpSO13sHXE0B1UH6XA,14978
15
- ddeutil/workflow/utils.py,sha256=pDM2jaYVP-USH0pLd_XmHOguxVPGVzZ76hOh1AZdINU,18495
16
- ddeutil_workflow-0.0.7.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
17
- ddeutil_workflow-0.0.7.dist-info/METADATA,sha256=ba2nH57cpHB2P4ldQCRT8ZWDj3r1OPx9a1dgcB0a2Ws,9702
18
- ddeutil_workflow-0.0.7.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
19
- ddeutil_workflow-0.0.7.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
20
- ddeutil_workflow-0.0.7.dist-info/RECORD,,