ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +51 -0
- ddeutil/workflow/cron.py +713 -0
- ddeutil/workflow/loader.py +65 -13
- ddeutil/workflow/log.py +147 -49
- ddeutil/workflow/on.py +18 -15
- ddeutil/workflow/pipeline.py +389 -140
- ddeutil/workflow/repeat.py +9 -5
- ddeutil/workflow/route.py +30 -37
- ddeutil/workflow/scheduler.py +398 -659
- ddeutil/workflow/stage.py +145 -73
- ddeutil/workflow/utils.py +133 -42
- ddeutil_workflow-0.0.9.dist-info/METADATA +273 -0
- ddeutil_workflow-0.0.9.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -45
- ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
- ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/top_level.txt +0 -0
ddeutil/workflow/utils.py
CHANGED
@@ -9,6 +9,7 @@ import inspect
|
|
9
9
|
import logging
|
10
10
|
import os
|
11
11
|
import stat
|
12
|
+
import time
|
12
13
|
from abc import ABC, abstractmethod
|
13
14
|
from ast import Call, Constant, Expr, Module, Name, parse
|
14
15
|
from collections.abc import Iterator
|
@@ -17,21 +18,29 @@ from functools import wraps
|
|
17
18
|
from hashlib import md5
|
18
19
|
from importlib import import_module
|
19
20
|
from inspect import isfunction
|
20
|
-
from itertools import product
|
21
|
+
from itertools import chain, islice, product
|
21
22
|
from pathlib import Path
|
23
|
+
from random import randrange
|
22
24
|
from typing import Any, Callable, Literal, Optional, Protocol, Union
|
23
25
|
from zoneinfo import ZoneInfo
|
24
26
|
|
27
|
+
try:
|
28
|
+
from typing import ParamSpec
|
29
|
+
except ImportError:
|
30
|
+
from typing_extensions import ParamSpec
|
31
|
+
|
25
32
|
from ddeutil.core import getdot, hasdot, hash_str, import_string, lazy, str2bool
|
26
33
|
from ddeutil.io import PathData, search_env_replace
|
27
34
|
from ddeutil.io.models.lineage import dt_now
|
28
|
-
from pydantic import BaseModel, ConfigDict, Field
|
35
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
29
36
|
from pydantic.functional_validators import model_validator
|
30
37
|
from typing_extensions import Self
|
31
38
|
|
32
39
|
from .__types import DictData, Matrix, Re
|
33
40
|
from .exceptions import ParamValueException, UtilException
|
34
41
|
|
42
|
+
P = ParamSpec("P")
|
43
|
+
|
35
44
|
|
36
45
|
def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
37
46
|
"""Return second value that come from diff of an input datetime and the
|
@@ -42,6 +51,13 @@ def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
|
42
51
|
)
|
43
52
|
|
44
53
|
|
54
|
+
def delay() -> None:
|
55
|
+
"""Delay time that use time.sleep with random second value between
|
56
|
+
0.00 - 0.99 seconds.
|
57
|
+
"""
|
58
|
+
time.sleep(randrange(0, 99, step=10) / 100)
|
59
|
+
|
60
|
+
|
45
61
|
class Engine(BaseModel):
|
46
62
|
"""Engine Model"""
|
47
63
|
|
@@ -257,7 +273,8 @@ class DefaultParam(BaseParam):
|
|
257
273
|
)
|
258
274
|
|
259
275
|
@model_validator(mode="after")
|
260
|
-
def
|
276
|
+
def __check_default(self) -> Self:
|
277
|
+
"""Check default value should pass when it set required."""
|
261
278
|
if not self.required and self.default is None:
|
262
279
|
raise ParamValueException(
|
263
280
|
"Default should set when this parameter does not required."
|
@@ -273,7 +290,13 @@ class DatetimeParam(DefaultParam):
|
|
273
290
|
default: datetime = Field(default_factory=dt_now)
|
274
291
|
|
275
292
|
def receive(self, value: str | datetime | date | None = None) -> datetime:
|
276
|
-
"""Receive value that match with datetime.
|
293
|
+
"""Receive value that match with datetime. If a input value pass with
|
294
|
+
None, it will use default value instead.
|
295
|
+
|
296
|
+
:param value: A value that want to validate with datetime parameter
|
297
|
+
type.
|
298
|
+
:rtype: datetime
|
299
|
+
"""
|
277
300
|
if value is None:
|
278
301
|
return self.default
|
279
302
|
|
@@ -295,7 +318,11 @@ class StrParam(DefaultParam):
|
|
295
318
|
type: Literal["str"] = "str"
|
296
319
|
|
297
320
|
def receive(self, value: Optional[str] = None) -> str | None:
|
298
|
-
"""Receive value that match with str.
|
321
|
+
"""Receive value that match with str.
|
322
|
+
|
323
|
+
:param value: A value that want to validate with string parameter type.
|
324
|
+
:rtype: str | None
|
325
|
+
"""
|
299
326
|
if value is None:
|
300
327
|
return self.default
|
301
328
|
return str(value)
|
@@ -307,7 +334,11 @@ class IntParam(DefaultParam):
|
|
307
334
|
type: Literal["int"] = "int"
|
308
335
|
|
309
336
|
def receive(self, value: Optional[int] = None) -> int | None:
|
310
|
-
"""Receive value that match with int.
|
337
|
+
"""Receive value that match with int.
|
338
|
+
|
339
|
+
:param value: A value that want to validate with integer parameter type.
|
340
|
+
:rtype: int | None
|
341
|
+
"""
|
311
342
|
if value is None:
|
312
343
|
return self.default
|
313
344
|
if not isinstance(value, int):
|
@@ -348,57 +379,57 @@ Param = Union[
|
|
348
379
|
]
|
349
380
|
|
350
381
|
|
351
|
-
class Context(BaseModel):
|
352
|
-
"""Context Pydantic Model"""
|
353
|
-
|
354
|
-
params: dict = Field(default_factory=dict)
|
355
|
-
jobs: dict = Field(default_factory=dict)
|
356
|
-
error: dict = Field(default_factory=dict)
|
357
|
-
|
358
|
-
|
359
382
|
class Result(BaseModel):
|
360
383
|
"""Result Pydantic Model for passing parameter and receiving output from
|
361
384
|
the pipeline execution.
|
362
385
|
"""
|
363
386
|
|
364
|
-
# TODO: Add running ID to this result dataclass.
|
365
|
-
# ---
|
366
|
-
# parent_run_id: str
|
367
|
-
# run_id: str
|
368
|
-
#
|
369
387
|
status: int = Field(default=2)
|
370
388
|
context: DictData = Field(default_factory=dict)
|
371
389
|
|
390
|
+
# NOTE: Ignore this field to compare another result model with __eq__.
|
391
|
+
_parent_run_id: Optional[str] = PrivateAttr(default=None)
|
392
|
+
_run_id: Optional[str] = PrivateAttr(default=None)
|
393
|
+
|
394
|
+
@model_validator(mode="after")
|
395
|
+
def __prepare_run_id(self):
|
396
|
+
if self._run_id is None:
|
397
|
+
self._run_id = gen_id("manual", unique=True)
|
398
|
+
return self
|
399
|
+
|
400
|
+
def set_run_id(self, running_id: str) -> Self:
|
401
|
+
self._run_id = running_id
|
402
|
+
return self
|
403
|
+
|
404
|
+
def set_parent_run_id(self, running_id: str) -> Self:
|
405
|
+
self._parent_run_id = running_id
|
406
|
+
return self
|
407
|
+
|
408
|
+
@property
|
409
|
+
def parent_run_id(self):
|
410
|
+
return self._parent_run_id
|
411
|
+
|
412
|
+
@property
|
413
|
+
def run_id(self):
|
414
|
+
return self._run_id
|
415
|
+
|
372
416
|
def receive(self, result: Result) -> Result:
|
373
417
|
self.__dict__["status"] = result.status
|
374
418
|
self.__dict__["context"].update(result.context)
|
419
|
+
self._parent_run_id = result.parent_run_id
|
420
|
+
self._run_id = result.run_id
|
375
421
|
return self
|
376
422
|
|
377
423
|
def receive_jobs(self, result: Result) -> Result:
|
378
424
|
self.__dict__["status"] = result.status
|
425
|
+
|
426
|
+
# NOTE: Check the context has jobs key.
|
379
427
|
if "jobs" not in self.__dict__["context"]:
|
380
428
|
self.__dict__["context"]["jobs"] = {}
|
381
|
-
self.__dict__["context"]["jobs"].update(result.context)
|
382
|
-
return self
|
383
|
-
|
384
429
|
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
"""
|
389
|
-
|
390
|
-
# TODO: Add running ID to this result dataclass.
|
391
|
-
# ---
|
392
|
-
# parent_run_id: str
|
393
|
-
# run_id: str
|
394
|
-
#
|
395
|
-
status: int = Field(default=2)
|
396
|
-
context: Context = Field(default_factory=Context)
|
397
|
-
|
398
|
-
def receive(self, result: ReResult) -> ReResult:
|
399
|
-
self.__dict__["status"] = result.status
|
400
|
-
self.__dict__["context"].__dict__["jobs"].update(result.context.jobs)
|
401
|
-
self.__dict__["context"].__dict__["error"].update(result.context.error)
|
430
|
+
self.__dict__["context"]["jobs"].update(result.context)
|
431
|
+
self._parent_run_id = result.parent_run_id
|
432
|
+
self._run_id = result.run_id
|
402
433
|
return self
|
403
434
|
|
404
435
|
|
@@ -426,7 +457,7 @@ class FilterFunc(Protocol):
|
|
426
457
|
def __call__(self, *args, **kwargs): ...
|
427
458
|
|
428
459
|
|
429
|
-
def custom_filter(name: str):
|
460
|
+
def custom_filter(name: str) -> Callable[P, TagFunc]:
|
430
461
|
"""Custom filter decorator function that set function attributes, ``filter``
|
431
462
|
for making filter registries variable.
|
432
463
|
|
@@ -511,7 +542,11 @@ def get_args_const(
|
|
511
542
|
|
512
543
|
@custom_filter("fmt")
|
513
544
|
def datetime_format(value: datetime, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
|
514
|
-
|
545
|
+
if isinstance(value, datetime):
|
546
|
+
return value.strftime(fmt)
|
547
|
+
raise UtilException(
|
548
|
+
"This custom function should pass input value with datetime type."
|
549
|
+
)
|
515
550
|
|
516
551
|
|
517
552
|
def map_post_filter(
|
@@ -556,6 +591,40 @@ def map_post_filter(
|
|
556
591
|
return value
|
557
592
|
|
558
593
|
|
594
|
+
def not_in_template(value: Any, *, not_in: str = "matrix.") -> bool:
|
595
|
+
"""Check value should not pass template with not_in value prefix.
|
596
|
+
|
597
|
+
:param value:
|
598
|
+
:param not_in:
|
599
|
+
:rtype: bool
|
600
|
+
"""
|
601
|
+
if isinstance(value, dict):
|
602
|
+
return any(not_in_template(value[k], not_in=not_in) for k in value)
|
603
|
+
elif isinstance(value, (list, tuple, set)):
|
604
|
+
return any(not_in_template(i, not_in=not_in) for i in value)
|
605
|
+
elif not isinstance(value, str):
|
606
|
+
return False
|
607
|
+
return any(
|
608
|
+
(not found.group("caller").strip().startswith(not_in))
|
609
|
+
for found in Re.RE_CALLER.finditer(value.strip())
|
610
|
+
)
|
611
|
+
|
612
|
+
|
613
|
+
def has_template(value: Any) -> bool:
|
614
|
+
"""Check value include templating string.
|
615
|
+
|
616
|
+
:param value:
|
617
|
+
:rtype: bool
|
618
|
+
"""
|
619
|
+
if isinstance(value, dict):
|
620
|
+
return any(has_template(value[k]) for k in value)
|
621
|
+
elif isinstance(value, (list, tuple, set)):
|
622
|
+
return any(has_template(i) for i in value)
|
623
|
+
elif not isinstance(value, str):
|
624
|
+
return False
|
625
|
+
return bool(Re.RE_CALLER.findall(value.strip()))
|
626
|
+
|
627
|
+
|
559
628
|
def str2template(
|
560
629
|
value: str,
|
561
630
|
params: DictData,
|
@@ -639,7 +708,7 @@ def param2template(
|
|
639
708
|
return str2template(value, params, filters=filters)
|
640
709
|
|
641
710
|
|
642
|
-
def filter_func(value: Any):
|
711
|
+
def filter_func(value: Any) -> Any:
|
643
712
|
"""Filter own created function out of any value with replace it to its
|
644
713
|
function name. If it is built-in function, it does not have any changing.
|
645
714
|
"""
|
@@ -678,3 +747,25 @@ def cross_product(matrix: Matrix) -> Iterator[DictData]:
|
|
678
747
|
*[[{k: v} for v in vs] for k, vs in matrix.items()]
|
679
748
|
)
|
680
749
|
)
|
750
|
+
|
751
|
+
|
752
|
+
def batch(iterable: Iterator[Any], n: int) -> Iterator[Any]:
|
753
|
+
"""Batch data into iterators of length n. The last batch may be shorter.
|
754
|
+
|
755
|
+
Example:
|
756
|
+
>>> for b in batch('ABCDEFG', 3):
|
757
|
+
... print(list(b))
|
758
|
+
['A', 'B', 'C']
|
759
|
+
['D', 'E', 'F']
|
760
|
+
['G']
|
761
|
+
"""
|
762
|
+
if n < 1:
|
763
|
+
raise ValueError("n must be at least one")
|
764
|
+
it = iter(iterable)
|
765
|
+
while True:
|
766
|
+
chunk_it = islice(it, n)
|
767
|
+
try:
|
768
|
+
first_el = next(chunk_it)
|
769
|
+
except StopIteration:
|
770
|
+
return
|
771
|
+
yield chain((first_el,), chunk_it)
|
@@ -0,0 +1,273 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: ddeutil-workflow
|
3
|
+
Version: 0.0.9
|
4
|
+
Summary: Lightweight workflow orchestration with less dependencies
|
5
|
+
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
|
8
|
+
Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
|
9
|
+
Keywords: orchestration,workflow
|
10
|
+
Classifier: Topic :: Utilities
|
11
|
+
Classifier: Natural Language :: English
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Classifier: Programming Language :: Python
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Requires-Python: >=3.9.13
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Requires-Dist: fmtutil
|
25
|
+
Requires-Dist: ddeutil-io
|
26
|
+
Requires-Dist: python-dotenv ==1.0.1
|
27
|
+
Requires-Dist: typer ==0.12.4
|
28
|
+
Provides-Extra: api
|
29
|
+
Requires-Dist: fastapi[standard] ==0.112.1 ; extra == 'api'
|
30
|
+
Requires-Dist: croniter ==3.0.3 ; extra == 'api'
|
31
|
+
Provides-Extra: schedule
|
32
|
+
Requires-Dist: schedule <2.0.0,==1.2.2 ; extra == 'schedule'
|
33
|
+
|
34
|
+
# Workflow
|
35
|
+
|
36
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
37
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
38
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
39
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
40
|
+
[](https://github.com/psf/black)
|
41
|
+
|
42
|
+
**Table of Contents**:
|
43
|
+
|
44
|
+
- [Installation](#installation)
|
45
|
+
- [Getting Started](#getting-started)
|
46
|
+
- [On](#on)
|
47
|
+
- [Pipeline](#pipeline)
|
48
|
+
- [Usage](#usage)
|
49
|
+
- [Configuration](#configuration)
|
50
|
+
- [Future](#future)
|
51
|
+
- [Deployment](#deployment)
|
52
|
+
|
53
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
54
|
+
for easy to make a simple metadata driven for data pipeline orchestration.
|
55
|
+
It can to use for data operator by a `.yaml` template.
|
56
|
+
|
57
|
+
> [!WARNING]
|
58
|
+
> This package provide only orchestration workload. That mean you should not use
|
59
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
60
|
+
|
61
|
+
In my opinion, I think it should not create duplicate pipeline codes if I can
|
62
|
+
write with dynamic input parameters on the one template pipeline that just change
|
63
|
+
the input parameters per use-case instead.
|
64
|
+
This way I can handle a lot of logical pipelines in our orgs with only metadata
|
65
|
+
configuration. It called **Metadata Driven Data Pipeline**.
|
66
|
+
|
67
|
+
Next, we should get some monitoring tools for manage logging that return from
|
68
|
+
pipeline running. Because it not show us what is a use-case that running data
|
69
|
+
pipeline.
|
70
|
+
|
71
|
+
> [!NOTE]
|
72
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
73
|
+
> and all of config file from several data orchestration framework tools from my
|
74
|
+
> experience on Data Engineer.
|
75
|
+
|
76
|
+
**Rules of This Workflow engine**:
|
77
|
+
|
78
|
+
1. Minimum unit of scheduling is 1 minute
|
79
|
+
2. Cannot re-run only failed stage and its pending downstream
|
80
|
+
3. All parallel tasks inside workflow engine use Threading
|
81
|
+
(Because Python 3.13 unlock GIL)
|
82
|
+
|
83
|
+
## Installation
|
84
|
+
|
85
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
86
|
+
this package with application add-ons, you should add `app` in installation;
|
87
|
+
|
88
|
+
| Usecase | Install Optional | Support |
|
89
|
+
|-------------------|------------------------------------------|--------------------|
|
90
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
91
|
+
| Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
|
92
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
|
93
|
+
|
94
|
+
|
95
|
+
> I added this feature to the main milestone.
|
96
|
+
>
|
97
|
+
> **Docker Images** supported:
|
98
|
+
>
|
99
|
+
> | Docker Image | Python Version | Support |
|
100
|
+
> |-----------------------------|----------------|---------|
|
101
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
102
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
103
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
104
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
105
|
+
|
106
|
+
## Getting Started
|
107
|
+
|
108
|
+
The main feature of this project is the `Pipeline` object that can call any
|
109
|
+
registries function. The pipeline can handle everything that you want to do, it
|
110
|
+
will passing parameters and catching the output for re-use it to next step.
|
111
|
+
|
112
|
+
### On
|
113
|
+
|
114
|
+
The **On** is schedule object that receive crontab value and able to generate
|
115
|
+
datetime value with next or previous with any start point of an input datetime.
|
116
|
+
|
117
|
+
```yaml
|
118
|
+
# This file should keep under this path: `./root-path/conf-path/*`
|
119
|
+
on_every_5_min:
|
120
|
+
type: on.On
|
121
|
+
cron: "*/5 * * * *"
|
122
|
+
```
|
123
|
+
|
124
|
+
```python
|
125
|
+
from ddeutil.workflow.on import On
|
126
|
+
|
127
|
+
# NOTE: Start load the on data from `.yaml` template file with this key.
|
128
|
+
schedule = On.from_loader(name='on_every_5_min', externals={})
|
129
|
+
|
130
|
+
assert '*/5 * * * *' == str(schedule.cronjob)
|
131
|
+
|
132
|
+
cron_iter = schedule.generate('2022-01-01 00:00:00')
|
133
|
+
|
134
|
+
assert "2022-01-01 00:05:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
135
|
+
assert "2022-01-01 00:10:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
136
|
+
assert "2022-01-01 00:15:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
137
|
+
```
|
138
|
+
|
139
|
+
### Pipeline
|
140
|
+
|
141
|
+
The **Pipeline** object that is the core feature of this project.
|
142
|
+
|
143
|
+
```yaml
|
144
|
+
# This file should keep under this path: `./root-path/conf-path/*`
|
145
|
+
pipeline-name:
|
146
|
+
type: ddeutil.workflow.pipeline.Pipeline
|
147
|
+
on: 'on_every_5_min'
|
148
|
+
params:
|
149
|
+
author-run:
|
150
|
+
type: str
|
151
|
+
run-date:
|
152
|
+
type: datetime
|
153
|
+
jobs:
|
154
|
+
first-job:
|
155
|
+
stages:
|
156
|
+
- name: "Empty stage do logging to console only!!"
|
157
|
+
```
|
158
|
+
|
159
|
+
```python
|
160
|
+
from ddeutil.workflow.pipeline import Pipeline
|
161
|
+
|
162
|
+
pipe = Pipeline.from_loader(name='pipeline-name', externals={})
|
163
|
+
pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
164
|
+
```
|
165
|
+
|
166
|
+
> [!NOTE]
|
167
|
+
> The above parameter can use short declarative statement. You can pass a parameter
|
168
|
+
> type to the key of a parameter name but it does not handler default value if you
|
169
|
+
> run this pipeline workflow with schedule.
|
170
|
+
>
|
171
|
+
> ```yaml
|
172
|
+
> ...
|
173
|
+
> params:
|
174
|
+
> author-run: str
|
175
|
+
> run-date: datetime
|
176
|
+
> ...
|
177
|
+
> ```
|
178
|
+
>
|
179
|
+
> And for the type, you can remove `ddeutil.workflow` prefix because we can find
|
180
|
+
> it by looping search from `WORKFLOW_CORE_REGISTRY` value.
|
181
|
+
|
182
|
+
## Usage
|
183
|
+
|
184
|
+
This is examples that use workflow file for running common Data Engineering
|
185
|
+
use-case.
|
186
|
+
|
187
|
+
> [!IMPORTANT]
|
188
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
189
|
+
> with pipeline activity that you want to orchestrate. Because it able to dynamic
|
190
|
+
> an input argument with the same hook function that make you use less time to
|
191
|
+
> maintenance your data pipelines.
|
192
|
+
|
193
|
+
```yaml
|
194
|
+
run_py_local:
|
195
|
+
type: pipeline.Pipeline
|
196
|
+
on:
|
197
|
+
- cronjob: '*/5 * * * *'
|
198
|
+
timezone: "Asia/Bangkok"
|
199
|
+
params:
|
200
|
+
author-run: str
|
201
|
+
run-date: datetime
|
202
|
+
jobs:
|
203
|
+
getting-api-data:
|
204
|
+
stages:
|
205
|
+
- name: "Retrieve API Data"
|
206
|
+
id: retrieve-api
|
207
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
208
|
+
with:
|
209
|
+
url: https://open-data/
|
210
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
211
|
+
aws_s3_path: my-data/open-data/
|
212
|
+
# This Authentication code should implement with your custom hook function
|
213
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
214
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
215
|
+
```
|
216
|
+
|
217
|
+
## Configuration
|
218
|
+
|
219
|
+
| Environment | Component | Default | Description |
|
220
|
+
|-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
|
221
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
222
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
223
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
224
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
225
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
226
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
227
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
228
|
+
| `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
|
229
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
|
230
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
231
|
+
|
232
|
+
|
233
|
+
**Application**:
|
234
|
+
|
235
|
+
| Environment | Default | Description |
|
236
|
+
|-------------------------------------|---------|-------------|
|
237
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | 2 | |
|
238
|
+
| `WORKFLOW_APP_PIPELINE_PER_PROCESS` | 100 | |
|
239
|
+
|
240
|
+
**API server**:
|
241
|
+
|
242
|
+
| Environment | Default | Description |
|
243
|
+
|-----------------------|--------------------------------------------------------|--------------------------------------------------------------------|
|
244
|
+
| `WORKFLOW_API_DB_URL` | postgresql+asyncpg://user:pass@localhost:5432/schedule | A Database URL that will pass to SQLAlchemy create_engine function |
|
245
|
+
|
246
|
+
## Future
|
247
|
+
|
248
|
+
The current milestone that will develop and necessary features that should to
|
249
|
+
implement on this project.
|
250
|
+
|
251
|
+
- ...
|
252
|
+
|
253
|
+
## Deployment
|
254
|
+
|
255
|
+
This package able to run as a application service for receive manual trigger
|
256
|
+
from the master node via RestAPI or use to be Scheduler background service
|
257
|
+
like crontab job but via Python API.
|
258
|
+
|
259
|
+
### Schedule Service
|
260
|
+
|
261
|
+
```shell
|
262
|
+
(venv) $ python src.ddeutil.workflow.app
|
263
|
+
```
|
264
|
+
|
265
|
+
### API Server
|
266
|
+
|
267
|
+
```shell
|
268
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
|
269
|
+
```
|
270
|
+
|
271
|
+
> [!NOTE]
|
272
|
+
> If this package already deploy, it able to use
|
273
|
+
> `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
|
@@ -0,0 +1,22 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=gh9CIut-EzZx1bHdgqILjssQNzzmuo1z_7iXAotDuKk,27
|
2
|
+
ddeutil/workflow/__init__.py,sha256=oGvg_BpKKb_FG76DlMvXTKD7BsYhqF9wB1r4x5Q_lQI,647
|
3
|
+
ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
|
4
|
+
ddeutil/workflow/api.py,sha256=GxjGTLnohbsLsQbcJ0CL00d2LHpuw6J7PN6NqJ3oyRw,2502
|
5
|
+
ddeutil/workflow/cli.py,sha256=RsP7evb3HCkzzO89ODjX6VEemQsSv9I-XOdWUJsiLfg,1180
|
6
|
+
ddeutil/workflow/cron.py,sha256=FqmkvWCqwJ4eRf8aDn5Ce4FcNWqmcvu2aTTfL34lfgs,22184
|
7
|
+
ddeutil/workflow/exceptions.py,sha256=zuCcsfJ1hFivubXz6lXCpGYXk07d_PkRaUD5ew3_LC0,632
|
8
|
+
ddeutil/workflow/loader.py,sha256=uMMDc7hzPHqcmIoX2tF91KF1R9AerSC-TScrWmKLlNU,4490
|
9
|
+
ddeutil/workflow/log.py,sha256=MxRZMnpq_p0khgZQXffJ7mlGPeVPeY6ABYBBauxUapc,5192
|
10
|
+
ddeutil/workflow/on.py,sha256=6E8P4Cbc5y-nywF7xk0KDCJFEG8GhUVGnbjAQnQN2Dg,6892
|
11
|
+
ddeutil/workflow/pipeline.py,sha256=uSX5qtDvBXjTDZheQPPafb704R9C0upFPCNIDnoIFOE,39219
|
12
|
+
ddeutil/workflow/repeat.py,sha256=e127Z-Fl5Ft2CZSQwLOhInU21IBio0XAyk00B2TLQmU,4730
|
13
|
+
ddeutil/workflow/route.py,sha256=w095eB4zMQsqszVgll-M15ky1mxmLKCbwfcTXc9xOPE,1933
|
14
|
+
ddeutil/workflow/scheduler.py,sha256=06p0BAHehdP-23rUfrswZi1mF7Kgolqf4OLMtFVsVX4,14875
|
15
|
+
ddeutil/workflow/stage.py,sha256=4Xtjl0GQUceqe8VGV8DsqmvfuX6lq8C0ne-Ls9qtLMs,20589
|
16
|
+
ddeutil/workflow/utils.py,sha256=HY3tEARQHJrm4WTQX9jmeHUBwQaFmJFIrtzttYvaCRA,23963
|
17
|
+
ddeutil_workflow-0.0.9.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
18
|
+
ddeutil_workflow-0.0.9.dist-info/METADATA,sha256=VSDq5YFeEJ6Ni0e-I32B4M9Anwh18Pd7q2CCp_igTMY,11148
|
19
|
+
ddeutil_workflow-0.0.9.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
|
20
|
+
ddeutil_workflow-0.0.9.dist-info/entry_points.txt,sha256=gLS1mgLig424zJql6CYYz4TxjKzoOwsS_Ez_NkEw0DA,54
|
21
|
+
ddeutil_workflow-0.0.9.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
22
|
+
ddeutil_workflow-0.0.9.dist-info/RECORD,,
|
ddeutil/workflow/app.py
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
# ------------------------------------------------------------------------------
|
2
|
-
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
-
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
-
# license information.
|
5
|
-
# ------------------------------------------------------------------------------
|
6
|
-
from __future__ import annotations
|
7
|
-
|
8
|
-
import functools
|
9
|
-
import time
|
10
|
-
|
11
|
-
import schedule
|
12
|
-
|
13
|
-
|
14
|
-
def catch_exceptions(cancel_on_failure=False):
|
15
|
-
"""Catch exception error from scheduler job."""
|
16
|
-
|
17
|
-
def catch_exceptions_decorator(job_func):
|
18
|
-
@functools.wraps(job_func)
|
19
|
-
def wrapper(*args, **kwargs):
|
20
|
-
try:
|
21
|
-
return job_func(*args, **kwargs)
|
22
|
-
except Exception as err:
|
23
|
-
print(err)
|
24
|
-
|
25
|
-
if cancel_on_failure:
|
26
|
-
return schedule.CancelJob
|
27
|
-
|
28
|
-
return wrapper
|
29
|
-
|
30
|
-
return catch_exceptions_decorator
|
31
|
-
|
32
|
-
|
33
|
-
@catch_exceptions(cancel_on_failure=True)
|
34
|
-
def bad_task():
|
35
|
-
return 1 / 0
|
36
|
-
|
37
|
-
|
38
|
-
schedule.every(5).seconds.do(bad_task)
|
39
|
-
|
40
|
-
if __name__ == "__main__":
|
41
|
-
while True:
|
42
|
-
schedule.run_pending()
|
43
|
-
time.sleep(1)
|
44
|
-
if not schedule.get_jobs():
|
45
|
-
break
|