ddeutil-workflow 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +51 -0
- ddeutil/workflow/cron.py +713 -0
- ddeutil/workflow/exceptions.py +1 -4
- ddeutil/workflow/loader.py +65 -13
- ddeutil/workflow/log.py +164 -17
- ddeutil/workflow/on.py +18 -15
- ddeutil/workflow/pipeline.py +644 -235
- ddeutil/workflow/repeat.py +9 -5
- ddeutil/workflow/route.py +30 -37
- ddeutil/workflow/scheduler.py +398 -659
- ddeutil/workflow/stage.py +269 -103
- ddeutil/workflow/utils.py +198 -29
- ddeutil_workflow-0.0.9.dist-info/METADATA +273 -0
- ddeutil_workflow-0.0.9.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.9.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -41
- ddeutil_workflow-0.0.7.dist-info/METADATA +0 -341
- ddeutil_workflow-0.0.7.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.9.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.9.dist-info}/top_level.txt +0 -0
ddeutil/workflow/utils.py
CHANGED
@@ -9,29 +9,38 @@ import inspect
|
|
9
9
|
import logging
|
10
10
|
import os
|
11
11
|
import stat
|
12
|
+
import time
|
12
13
|
from abc import ABC, abstractmethod
|
13
14
|
from ast import Call, Constant, Expr, Module, Name, parse
|
14
15
|
from collections.abc import Iterator
|
15
|
-
from dataclasses import dataclass, field
|
16
16
|
from datetime import date, datetime
|
17
17
|
from functools import wraps
|
18
18
|
from hashlib import md5
|
19
19
|
from importlib import import_module
|
20
|
-
from
|
20
|
+
from inspect import isfunction
|
21
|
+
from itertools import chain, islice, product
|
21
22
|
from pathlib import Path
|
23
|
+
from random import randrange
|
22
24
|
from typing import Any, Callable, Literal, Optional, Protocol, Union
|
23
25
|
from zoneinfo import ZoneInfo
|
24
26
|
|
25
|
-
|
27
|
+
try:
|
28
|
+
from typing import ParamSpec
|
29
|
+
except ImportError:
|
30
|
+
from typing_extensions import ParamSpec
|
31
|
+
|
32
|
+
from ddeutil.core import getdot, hasdot, hash_str, import_string, lazy, str2bool
|
26
33
|
from ddeutil.io import PathData, search_env_replace
|
27
34
|
from ddeutil.io.models.lineage import dt_now
|
28
|
-
from pydantic import BaseModel, ConfigDict, Field
|
35
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
29
36
|
from pydantic.functional_validators import model_validator
|
30
37
|
from typing_extensions import Self
|
31
38
|
|
32
39
|
from .__types import DictData, Matrix, Re
|
33
40
|
from .exceptions import ParamValueException, UtilException
|
34
41
|
|
42
|
+
P = ParamSpec("P")
|
43
|
+
|
35
44
|
|
36
45
|
def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
37
46
|
"""Return second value that come from diff of an input datetime and the
|
@@ -42,15 +51,22 @@ def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
|
42
51
|
)
|
43
52
|
|
44
53
|
|
54
|
+
def delay() -> None:
|
55
|
+
"""Delay time that use time.sleep with random second value between
|
56
|
+
0.00 - 0.99 seconds.
|
57
|
+
"""
|
58
|
+
time.sleep(randrange(0, 99, step=10) / 100)
|
59
|
+
|
60
|
+
|
45
61
|
class Engine(BaseModel):
|
46
62
|
"""Engine Model"""
|
47
63
|
|
48
64
|
paths: PathData = Field(default_factory=PathData)
|
49
65
|
registry: list[str] = Field(
|
50
|
-
default_factory=lambda: ["ddeutil.workflow"],
|
66
|
+
default_factory=lambda: ["ddeutil.workflow"], # pragma: no cover
|
51
67
|
)
|
52
68
|
registry_filter: list[str] = Field(
|
53
|
-
|
69
|
+
default_factory=lambda: ["ddeutil.workflow.utils"], # pragma: no cover
|
54
70
|
)
|
55
71
|
|
56
72
|
@model_validator(mode="before")
|
@@ -89,7 +105,15 @@ class ConfParams(BaseModel):
|
|
89
105
|
|
90
106
|
|
91
107
|
def config() -> ConfParams:
|
92
|
-
"""Load Config data from ``workflows-conf.yaml`` file.
|
108
|
+
"""Load Config data from ``workflows-conf.yaml`` file.
|
109
|
+
|
110
|
+
Configuration Docs:
|
111
|
+
---
|
112
|
+
:var engine.registry:
|
113
|
+
:var engine.registry_filter:
|
114
|
+
:var paths.root:
|
115
|
+
:var paths.conf:
|
116
|
+
"""
|
93
117
|
root_path: str = os.getenv("WORKFLOW_ROOT_PATH", ".")
|
94
118
|
|
95
119
|
regis: list[str] = ["ddeutil.workflow"]
|
@@ -119,19 +143,31 @@ def config() -> ConfParams:
|
|
119
143
|
)
|
120
144
|
|
121
145
|
|
122
|
-
def gen_id(
|
146
|
+
def gen_id(
|
147
|
+
value: Any,
|
148
|
+
*,
|
149
|
+
sensitive: bool = True,
|
150
|
+
unique: bool = False,
|
151
|
+
) -> str:
|
123
152
|
"""Generate running ID for able to tracking. This generate process use `md5`
|
124
|
-
function.
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
:param
|
153
|
+
algorithm function if ``WORKFLOW_CORE_PIPELINE_ID_SIMPLE`` set to false.
|
154
|
+
But it will cut this hashing value length to 10 it the setting value set to
|
155
|
+
true.
|
156
|
+
|
157
|
+
:param value: A value that want to add to prefix before hashing with md5.
|
158
|
+
:param sensitive: A flag that convert the value to lower case before hashing
|
159
|
+
:param unique: A flag that add timestamp at microsecond level to value
|
160
|
+
before hashing.
|
129
161
|
:rtype: str
|
130
162
|
"""
|
131
163
|
if not isinstance(value, str):
|
132
164
|
value: str = str(value)
|
133
165
|
|
134
166
|
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
167
|
+
if str2bool(os.getenv("WORKFLOW_CORE_PIPELINE_ID_SIMPLE", "true")):
|
168
|
+
return hash_str(f"{(value if sensitive else value.lower())}", n=10) + (
|
169
|
+
f"{datetime.now(tz=tz):%Y%m%d%H%M%S%f}" if unique else ""
|
170
|
+
)
|
135
171
|
return md5(
|
136
172
|
(
|
137
173
|
f"{(value if sensitive else value.lower())}"
|
@@ -237,7 +273,8 @@ class DefaultParam(BaseParam):
|
|
237
273
|
)
|
238
274
|
|
239
275
|
@model_validator(mode="after")
|
240
|
-
def
|
276
|
+
def __check_default(self) -> Self:
|
277
|
+
"""Check default value should pass when it set required."""
|
241
278
|
if not self.required and self.default is None:
|
242
279
|
raise ParamValueException(
|
243
280
|
"Default should set when this parameter does not required."
|
@@ -253,7 +290,13 @@ class DatetimeParam(DefaultParam):
|
|
253
290
|
default: datetime = Field(default_factory=dt_now)
|
254
291
|
|
255
292
|
def receive(self, value: str | datetime | date | None = None) -> datetime:
|
256
|
-
"""Receive value that match with datetime.
|
293
|
+
"""Receive value that match with datetime. If a input value pass with
|
294
|
+
None, it will use default value instead.
|
295
|
+
|
296
|
+
:param value: A value that want to validate with datetime parameter
|
297
|
+
type.
|
298
|
+
:rtype: datetime
|
299
|
+
"""
|
257
300
|
if value is None:
|
258
301
|
return self.default
|
259
302
|
|
@@ -275,7 +318,11 @@ class StrParam(DefaultParam):
|
|
275
318
|
type: Literal["str"] = "str"
|
276
319
|
|
277
320
|
def receive(self, value: Optional[str] = None) -> str | None:
|
278
|
-
"""Receive value that match with str.
|
321
|
+
"""Receive value that match with str.
|
322
|
+
|
323
|
+
:param value: A value that want to validate with string parameter type.
|
324
|
+
:rtype: str | None
|
325
|
+
"""
|
279
326
|
if value is None:
|
280
327
|
return self.default
|
281
328
|
return str(value)
|
@@ -287,7 +334,11 @@ class IntParam(DefaultParam):
|
|
287
334
|
type: Literal["int"] = "int"
|
288
335
|
|
289
336
|
def receive(self, value: Optional[int] = None) -> int | None:
|
290
|
-
"""Receive value that match with int.
|
337
|
+
"""Receive value that match with int.
|
338
|
+
|
339
|
+
:param value: A value that want to validate with integer parameter type.
|
340
|
+
:rtype: int | None
|
341
|
+
"""
|
291
342
|
if value is None:
|
292
343
|
return self.default
|
293
344
|
if not isinstance(value, int):
|
@@ -328,19 +379,58 @@ Param = Union[
|
|
328
379
|
]
|
329
380
|
|
330
381
|
|
331
|
-
|
332
|
-
|
333
|
-
"""Result Dataclass object for passing parameter and receiving output from
|
382
|
+
class Result(BaseModel):
|
383
|
+
"""Result Pydantic Model for passing parameter and receiving output from
|
334
384
|
the pipeline execution.
|
335
385
|
"""
|
336
386
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
#
|
341
|
-
|
342
|
-
|
343
|
-
|
387
|
+
status: int = Field(default=2)
|
388
|
+
context: DictData = Field(default_factory=dict)
|
389
|
+
|
390
|
+
# NOTE: Ignore this field to compare another result model with __eq__.
|
391
|
+
_parent_run_id: Optional[str] = PrivateAttr(default=None)
|
392
|
+
_run_id: Optional[str] = PrivateAttr(default=None)
|
393
|
+
|
394
|
+
@model_validator(mode="after")
|
395
|
+
def __prepare_run_id(self):
|
396
|
+
if self._run_id is None:
|
397
|
+
self._run_id = gen_id("manual", unique=True)
|
398
|
+
return self
|
399
|
+
|
400
|
+
def set_run_id(self, running_id: str) -> Self:
|
401
|
+
self._run_id = running_id
|
402
|
+
return self
|
403
|
+
|
404
|
+
def set_parent_run_id(self, running_id: str) -> Self:
|
405
|
+
self._parent_run_id = running_id
|
406
|
+
return self
|
407
|
+
|
408
|
+
@property
|
409
|
+
def parent_run_id(self):
|
410
|
+
return self._parent_run_id
|
411
|
+
|
412
|
+
@property
|
413
|
+
def run_id(self):
|
414
|
+
return self._run_id
|
415
|
+
|
416
|
+
def receive(self, result: Result) -> Result:
|
417
|
+
self.__dict__["status"] = result.status
|
418
|
+
self.__dict__["context"].update(result.context)
|
419
|
+
self._parent_run_id = result.parent_run_id
|
420
|
+
self._run_id = result.run_id
|
421
|
+
return self
|
422
|
+
|
423
|
+
def receive_jobs(self, result: Result) -> Result:
|
424
|
+
self.__dict__["status"] = result.status
|
425
|
+
|
426
|
+
# NOTE: Check the context has jobs key.
|
427
|
+
if "jobs" not in self.__dict__["context"]:
|
428
|
+
self.__dict__["context"]["jobs"] = {}
|
429
|
+
|
430
|
+
self.__dict__["context"]["jobs"].update(result.context)
|
431
|
+
self._parent_run_id = result.parent_run_id
|
432
|
+
self._run_id = result.run_id
|
433
|
+
return self
|
344
434
|
|
345
435
|
|
346
436
|
def make_exec(path: str | Path):
|
@@ -367,7 +457,7 @@ class FilterFunc(Protocol):
|
|
367
457
|
def __call__(self, *args, **kwargs): ...
|
368
458
|
|
369
459
|
|
370
|
-
def custom_filter(name: str):
|
460
|
+
def custom_filter(name: str) -> Callable[P, TagFunc]:
|
371
461
|
"""Custom filter decorator function that set function attributes, ``filter``
|
372
462
|
for making filter registries variable.
|
373
463
|
|
@@ -452,7 +542,11 @@ def get_args_const(
|
|
452
542
|
|
453
543
|
@custom_filter("fmt")
|
454
544
|
def datetime_format(value: datetime, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
|
455
|
-
|
545
|
+
if isinstance(value, datetime):
|
546
|
+
return value.strftime(fmt)
|
547
|
+
raise UtilException(
|
548
|
+
"This custom function should pass input value with datetime type."
|
549
|
+
)
|
456
550
|
|
457
551
|
|
458
552
|
def map_post_filter(
|
@@ -497,6 +591,40 @@ def map_post_filter(
|
|
497
591
|
return value
|
498
592
|
|
499
593
|
|
594
|
+
def not_in_template(value: Any, *, not_in: str = "matrix.") -> bool:
|
595
|
+
"""Check value should not pass template with not_in value prefix.
|
596
|
+
|
597
|
+
:param value:
|
598
|
+
:param not_in:
|
599
|
+
:rtype: bool
|
600
|
+
"""
|
601
|
+
if isinstance(value, dict):
|
602
|
+
return any(not_in_template(value[k], not_in=not_in) for k in value)
|
603
|
+
elif isinstance(value, (list, tuple, set)):
|
604
|
+
return any(not_in_template(i, not_in=not_in) for i in value)
|
605
|
+
elif not isinstance(value, str):
|
606
|
+
return False
|
607
|
+
return any(
|
608
|
+
(not found.group("caller").strip().startswith(not_in))
|
609
|
+
for found in Re.RE_CALLER.finditer(value.strip())
|
610
|
+
)
|
611
|
+
|
612
|
+
|
613
|
+
def has_template(value: Any) -> bool:
|
614
|
+
"""Check value include templating string.
|
615
|
+
|
616
|
+
:param value:
|
617
|
+
:rtype: bool
|
618
|
+
"""
|
619
|
+
if isinstance(value, dict):
|
620
|
+
return any(has_template(value[k]) for k in value)
|
621
|
+
elif isinstance(value, (list, tuple, set)):
|
622
|
+
return any(has_template(i) for i in value)
|
623
|
+
elif not isinstance(value, str):
|
624
|
+
return False
|
625
|
+
return bool(Re.RE_CALLER.findall(value.strip()))
|
626
|
+
|
627
|
+
|
500
628
|
def str2template(
|
501
629
|
value: str,
|
502
630
|
params: DictData,
|
@@ -580,6 +708,25 @@ def param2template(
|
|
580
708
|
return str2template(value, params, filters=filters)
|
581
709
|
|
582
710
|
|
711
|
+
def filter_func(value: Any) -> Any:
|
712
|
+
"""Filter own created function out of any value with replace it to its
|
713
|
+
function name. If it is built-in function, it does not have any changing.
|
714
|
+
"""
|
715
|
+
if isinstance(value, dict):
|
716
|
+
return {k: filter_func(value[k]) for k in value}
|
717
|
+
elif isinstance(value, (list, tuple, set)):
|
718
|
+
return type(value)([filter_func(i) for i in value])
|
719
|
+
|
720
|
+
if isfunction(value):
|
721
|
+
# NOTE: If it want to improve to get this function, it able to save to
|
722
|
+
# some global memory storage.
|
723
|
+
# ---
|
724
|
+
# >>> GLOBAL_DICT[value.__name__] = value
|
725
|
+
#
|
726
|
+
return value.__name__
|
727
|
+
return value
|
728
|
+
|
729
|
+
|
583
730
|
def dash2underscore(
|
584
731
|
key: str,
|
585
732
|
values: DictData,
|
@@ -600,3 +747,25 @@ def cross_product(matrix: Matrix) -> Iterator[DictData]:
|
|
600
747
|
*[[{k: v} for v in vs] for k, vs in matrix.items()]
|
601
748
|
)
|
602
749
|
)
|
750
|
+
|
751
|
+
|
752
|
+
def batch(iterable: Iterator[Any], n: int) -> Iterator[Any]:
|
753
|
+
"""Batch data into iterators of length n. The last batch may be shorter.
|
754
|
+
|
755
|
+
Example:
|
756
|
+
>>> for b in batch('ABCDEFG', 3):
|
757
|
+
... print(list(b))
|
758
|
+
['A', 'B', 'C']
|
759
|
+
['D', 'E', 'F']
|
760
|
+
['G']
|
761
|
+
"""
|
762
|
+
if n < 1:
|
763
|
+
raise ValueError("n must be at least one")
|
764
|
+
it = iter(iterable)
|
765
|
+
while True:
|
766
|
+
chunk_it = islice(it, n)
|
767
|
+
try:
|
768
|
+
first_el = next(chunk_it)
|
769
|
+
except StopIteration:
|
770
|
+
return
|
771
|
+
yield chain((first_el,), chunk_it)
|
@@ -0,0 +1,273 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: ddeutil-workflow
|
3
|
+
Version: 0.0.9
|
4
|
+
Summary: Lightweight workflow orchestration with less dependencies
|
5
|
+
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
|
8
|
+
Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
|
9
|
+
Keywords: orchestration,workflow
|
10
|
+
Classifier: Topic :: Utilities
|
11
|
+
Classifier: Natural Language :: English
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Classifier: Programming Language :: Python
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Requires-Python: >=3.9.13
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Requires-Dist: fmtutil
|
25
|
+
Requires-Dist: ddeutil-io
|
26
|
+
Requires-Dist: python-dotenv ==1.0.1
|
27
|
+
Requires-Dist: typer ==0.12.4
|
28
|
+
Provides-Extra: api
|
29
|
+
Requires-Dist: fastapi[standard] ==0.112.1 ; extra == 'api'
|
30
|
+
Requires-Dist: croniter ==3.0.3 ; extra == 'api'
|
31
|
+
Provides-Extra: schedule
|
32
|
+
Requires-Dist: schedule <2.0.0,==1.2.2 ; extra == 'schedule'
|
33
|
+
|
34
|
+
# Workflow
|
35
|
+
|
36
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
37
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
38
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
39
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
40
|
+
[](https://github.com/psf/black)
|
41
|
+
|
42
|
+
**Table of Contents**:
|
43
|
+
|
44
|
+
- [Installation](#installation)
|
45
|
+
- [Getting Started](#getting-started)
|
46
|
+
- [On](#on)
|
47
|
+
- [Pipeline](#pipeline)
|
48
|
+
- [Usage](#usage)
|
49
|
+
- [Configuration](#configuration)
|
50
|
+
- [Future](#future)
|
51
|
+
- [Deployment](#deployment)
|
52
|
+
|
53
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
54
|
+
for easy to make a simple metadata driven for data pipeline orchestration.
|
55
|
+
It can to use for data operator by a `.yaml` template.
|
56
|
+
|
57
|
+
> [!WARNING]
|
58
|
+
> This package provide only orchestration workload. That mean you should not use
|
59
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
60
|
+
|
61
|
+
In my opinion, I think it should not create duplicate pipeline codes if I can
|
62
|
+
write with dynamic input parameters on the one template pipeline that just change
|
63
|
+
the input parameters per use-case instead.
|
64
|
+
This way I can handle a lot of logical pipelines in our orgs with only metadata
|
65
|
+
configuration. It called **Metadata Driven Data Pipeline**.
|
66
|
+
|
67
|
+
Next, we should get some monitoring tools for manage logging that return from
|
68
|
+
pipeline running. Because it not show us what is a use-case that running data
|
69
|
+
pipeline.
|
70
|
+
|
71
|
+
> [!NOTE]
|
72
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
73
|
+
> and all of config file from several data orchestration framework tools from my
|
74
|
+
> experience on Data Engineer.
|
75
|
+
|
76
|
+
**Rules of This Workflow engine**:
|
77
|
+
|
78
|
+
1. Minimum unit of scheduling is 1 minute
|
79
|
+
2. Cannot re-run only failed stage and its pending downstream
|
80
|
+
3. All parallel tasks inside workflow engine use Threading
|
81
|
+
(Because Python 3.13 unlock GIL)
|
82
|
+
|
83
|
+
## Installation
|
84
|
+
|
85
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
86
|
+
this package with application add-ons, you should add `app` in installation;
|
87
|
+
|
88
|
+
| Usecase | Install Optional | Support |
|
89
|
+
|-------------------|------------------------------------------|--------------------|
|
90
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
91
|
+
| Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
|
92
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
|
93
|
+
|
94
|
+
|
95
|
+
> I added this feature to the main milestone.
|
96
|
+
>
|
97
|
+
> **Docker Images** supported:
|
98
|
+
>
|
99
|
+
> | Docker Image | Python Version | Support |
|
100
|
+
> |-----------------------------|----------------|---------|
|
101
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
102
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
103
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
104
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
105
|
+
|
106
|
+
## Getting Started
|
107
|
+
|
108
|
+
The main feature of this project is the `Pipeline` object that can call any
|
109
|
+
registries function. The pipeline can handle everything that you want to do, it
|
110
|
+
will passing parameters and catching the output for re-use it to next step.
|
111
|
+
|
112
|
+
### On
|
113
|
+
|
114
|
+
The **On** is schedule object that receive crontab value and able to generate
|
115
|
+
datetime value with next or previous with any start point of an input datetime.
|
116
|
+
|
117
|
+
```yaml
|
118
|
+
# This file should keep under this path: `./root-path/conf-path/*`
|
119
|
+
on_every_5_min:
|
120
|
+
type: on.On
|
121
|
+
cron: "*/5 * * * *"
|
122
|
+
```
|
123
|
+
|
124
|
+
```python
|
125
|
+
from ddeutil.workflow.on import On
|
126
|
+
|
127
|
+
# NOTE: Start load the on data from `.yaml` template file with this key.
|
128
|
+
schedule = On.from_loader(name='on_every_5_min', externals={})
|
129
|
+
|
130
|
+
assert '*/5 * * * *' == str(schedule.cronjob)
|
131
|
+
|
132
|
+
cron_iter = schedule.generate('2022-01-01 00:00:00')
|
133
|
+
|
134
|
+
assert "2022-01-01 00:05:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
135
|
+
assert "2022-01-01 00:10:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
136
|
+
assert "2022-01-01 00:15:00" f"{cron_iter.next:%Y-%m-%d %H:%M:%S}"
|
137
|
+
```
|
138
|
+
|
139
|
+
### Pipeline
|
140
|
+
|
141
|
+
The **Pipeline** object that is the core feature of this project.
|
142
|
+
|
143
|
+
```yaml
|
144
|
+
# This file should keep under this path: `./root-path/conf-path/*`
|
145
|
+
pipeline-name:
|
146
|
+
type: ddeutil.workflow.pipeline.Pipeline
|
147
|
+
on: 'on_every_5_min'
|
148
|
+
params:
|
149
|
+
author-run:
|
150
|
+
type: str
|
151
|
+
run-date:
|
152
|
+
type: datetime
|
153
|
+
jobs:
|
154
|
+
first-job:
|
155
|
+
stages:
|
156
|
+
- name: "Empty stage do logging to console only!!"
|
157
|
+
```
|
158
|
+
|
159
|
+
```python
|
160
|
+
from ddeutil.workflow.pipeline import Pipeline
|
161
|
+
|
162
|
+
pipe = Pipeline.from_loader(name='pipeline-name', externals={})
|
163
|
+
pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
164
|
+
```
|
165
|
+
|
166
|
+
> [!NOTE]
|
167
|
+
> The above parameter can use short declarative statement. You can pass a parameter
|
168
|
+
> type to the key of a parameter name but it does not handler default value if you
|
169
|
+
> run this pipeline workflow with schedule.
|
170
|
+
>
|
171
|
+
> ```yaml
|
172
|
+
> ...
|
173
|
+
> params:
|
174
|
+
> author-run: str
|
175
|
+
> run-date: datetime
|
176
|
+
> ...
|
177
|
+
> ```
|
178
|
+
>
|
179
|
+
> And for the type, you can remove `ddeutil.workflow` prefix because we can find
|
180
|
+
> it by looping search from `WORKFLOW_CORE_REGISTRY` value.
|
181
|
+
|
182
|
+
## Usage
|
183
|
+
|
184
|
+
This is examples that use workflow file for running common Data Engineering
|
185
|
+
use-case.
|
186
|
+
|
187
|
+
> [!IMPORTANT]
|
188
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
189
|
+
> with pipeline activity that you want to orchestrate. Because it able to dynamic
|
190
|
+
> an input argument with the same hook function that make you use less time to
|
191
|
+
> maintenance your data pipelines.
|
192
|
+
|
193
|
+
```yaml
|
194
|
+
run_py_local:
|
195
|
+
type: pipeline.Pipeline
|
196
|
+
on:
|
197
|
+
- cronjob: '*/5 * * * *'
|
198
|
+
timezone: "Asia/Bangkok"
|
199
|
+
params:
|
200
|
+
author-run: str
|
201
|
+
run-date: datetime
|
202
|
+
jobs:
|
203
|
+
getting-api-data:
|
204
|
+
stages:
|
205
|
+
- name: "Retrieve API Data"
|
206
|
+
id: retrieve-api
|
207
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
208
|
+
with:
|
209
|
+
url: https://open-data/
|
210
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
211
|
+
aws_s3_path: my-data/open-data/
|
212
|
+
# This Authentication code should implement with your custom hook function
|
213
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
214
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
215
|
+
```
|
216
|
+
|
217
|
+
## Configuration
|
218
|
+
|
219
|
+
| Environment | Component | Default | Description |
|
220
|
+
|-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
|
221
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
222
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
223
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
224
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
225
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
226
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
227
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
228
|
+
| `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
|
229
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
|
230
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
231
|
+
|
232
|
+
|
233
|
+
**Application**:
|
234
|
+
|
235
|
+
| Environment | Default | Description |
|
236
|
+
|-------------------------------------|---------|-------------|
|
237
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | 2 | |
|
238
|
+
| `WORKFLOW_APP_PIPELINE_PER_PROCESS` | 100 | |
|
239
|
+
|
240
|
+
**API server**:
|
241
|
+
|
242
|
+
| Environment | Default | Description |
|
243
|
+
|-----------------------|--------------------------------------------------------|--------------------------------------------------------------------|
|
244
|
+
| `WORKFLOW_API_DB_URL` | postgresql+asyncpg://user:pass@localhost:5432/schedule | A Database URL that will pass to SQLAlchemy create_engine function |
|
245
|
+
|
246
|
+
## Future
|
247
|
+
|
248
|
+
The current milestone that will develop and necessary features that should to
|
249
|
+
implement on this project.
|
250
|
+
|
251
|
+
- ...
|
252
|
+
|
253
|
+
## Deployment
|
254
|
+
|
255
|
+
This package able to run as a application service for receive manual trigger
|
256
|
+
from the master node via RestAPI or use to be Scheduler background service
|
257
|
+
like crontab job but via Python API.
|
258
|
+
|
259
|
+
### Schedule Service
|
260
|
+
|
261
|
+
```shell
|
262
|
+
(venv) $ python src.ddeutil.workflow.app
|
263
|
+
```
|
264
|
+
|
265
|
+
### API Server
|
266
|
+
|
267
|
+
```shell
|
268
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
|
269
|
+
```
|
270
|
+
|
271
|
+
> [!NOTE]
|
272
|
+
> If this package already deploy, it able to use
|
273
|
+
> `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80`
|
@@ -0,0 +1,22 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=gh9CIut-EzZx1bHdgqILjssQNzzmuo1z_7iXAotDuKk,27
|
2
|
+
ddeutil/workflow/__init__.py,sha256=oGvg_BpKKb_FG76DlMvXTKD7BsYhqF9wB1r4x5Q_lQI,647
|
3
|
+
ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
|
4
|
+
ddeutil/workflow/api.py,sha256=GxjGTLnohbsLsQbcJ0CL00d2LHpuw6J7PN6NqJ3oyRw,2502
|
5
|
+
ddeutil/workflow/cli.py,sha256=RsP7evb3HCkzzO89ODjX6VEemQsSv9I-XOdWUJsiLfg,1180
|
6
|
+
ddeutil/workflow/cron.py,sha256=FqmkvWCqwJ4eRf8aDn5Ce4FcNWqmcvu2aTTfL34lfgs,22184
|
7
|
+
ddeutil/workflow/exceptions.py,sha256=zuCcsfJ1hFivubXz6lXCpGYXk07d_PkRaUD5ew3_LC0,632
|
8
|
+
ddeutil/workflow/loader.py,sha256=uMMDc7hzPHqcmIoX2tF91KF1R9AerSC-TScrWmKLlNU,4490
|
9
|
+
ddeutil/workflow/log.py,sha256=MxRZMnpq_p0khgZQXffJ7mlGPeVPeY6ABYBBauxUapc,5192
|
10
|
+
ddeutil/workflow/on.py,sha256=6E8P4Cbc5y-nywF7xk0KDCJFEG8GhUVGnbjAQnQN2Dg,6892
|
11
|
+
ddeutil/workflow/pipeline.py,sha256=uSX5qtDvBXjTDZheQPPafb704R9C0upFPCNIDnoIFOE,39219
|
12
|
+
ddeutil/workflow/repeat.py,sha256=e127Z-Fl5Ft2CZSQwLOhInU21IBio0XAyk00B2TLQmU,4730
|
13
|
+
ddeutil/workflow/route.py,sha256=w095eB4zMQsqszVgll-M15ky1mxmLKCbwfcTXc9xOPE,1933
|
14
|
+
ddeutil/workflow/scheduler.py,sha256=06p0BAHehdP-23rUfrswZi1mF7Kgolqf4OLMtFVsVX4,14875
|
15
|
+
ddeutil/workflow/stage.py,sha256=4Xtjl0GQUceqe8VGV8DsqmvfuX6lq8C0ne-Ls9qtLMs,20589
|
16
|
+
ddeutil/workflow/utils.py,sha256=HY3tEARQHJrm4WTQX9jmeHUBwQaFmJFIrtzttYvaCRA,23963
|
17
|
+
ddeutil_workflow-0.0.9.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
18
|
+
ddeutil_workflow-0.0.9.dist-info/METADATA,sha256=VSDq5YFeEJ6Ni0e-I32B4M9Anwh18Pd7q2CCp_igTMY,11148
|
19
|
+
ddeutil_workflow-0.0.9.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
|
20
|
+
ddeutil_workflow-0.0.9.dist-info/entry_points.txt,sha256=gLS1mgLig424zJql6CYYz4TxjKzoOwsS_Ez_NkEw0DA,54
|
21
|
+
ddeutil_workflow-0.0.9.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
22
|
+
ddeutil_workflow-0.0.9.dist-info/RECORD,,
|