ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +134 -0
- ddeutil/workflow/cron.py +803 -0
- ddeutil/workflow/exceptions.py +3 -0
- ddeutil/workflow/log.py +152 -47
- ddeutil/workflow/on.py +27 -18
- ddeutil/workflow/pipeline.py +527 -234
- ddeutil/workflow/repeat.py +71 -40
- ddeutil/workflow/route.py +77 -63
- ddeutil/workflow/scheduler.py +523 -616
- ddeutil/workflow/stage.py +158 -82
- ddeutil/workflow/utils.py +273 -46
- ddeutil_workflow-0.0.10.dist-info/METADATA +182 -0
- ddeutil_workflow-0.0.10.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.10.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -45
- ddeutil/workflow/loader.py +0 -80
- ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
- ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/top_level.txt +0 -0
ddeutil/workflow/utils.py
CHANGED
@@ -9,29 +9,42 @@ import inspect
|
|
9
9
|
import logging
|
10
10
|
import os
|
11
11
|
import stat
|
12
|
+
import time
|
12
13
|
from abc import ABC, abstractmethod
|
13
14
|
from ast import Call, Constant, Expr, Module, Name, parse
|
14
15
|
from collections.abc import Iterator
|
15
16
|
from datetime import date, datetime
|
16
|
-
from functools import wraps
|
17
|
+
from functools import cached_property, wraps
|
17
18
|
from hashlib import md5
|
18
19
|
from importlib import import_module
|
19
20
|
from inspect import isfunction
|
20
|
-
from itertools import product
|
21
|
+
from itertools import chain, islice, product
|
21
22
|
from pathlib import Path
|
22
|
-
from
|
23
|
+
from random import randrange
|
24
|
+
from typing import Any, Callable, Literal, Optional, Protocol, TypeVar, Union
|
23
25
|
from zoneinfo import ZoneInfo
|
24
26
|
|
27
|
+
try:
|
28
|
+
from typing import ParamSpec
|
29
|
+
except ImportError:
|
30
|
+
from typing_extensions import ParamSpec
|
31
|
+
|
25
32
|
from ddeutil.core import getdot, hasdot, hash_str, import_string, lazy, str2bool
|
26
|
-
from ddeutil.io import PathData, search_env_replace
|
33
|
+
from ddeutil.io import PathData, PathSearch, YamlFlResolve, search_env_replace
|
27
34
|
from ddeutil.io.models.lineage import dt_now
|
28
|
-
from pydantic import BaseModel, ConfigDict, Field
|
35
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
36
|
+
from pydantic.functional_serializers import field_serializer
|
29
37
|
from pydantic.functional_validators import model_validator
|
30
38
|
from typing_extensions import Self
|
31
39
|
|
32
40
|
from .__types import DictData, Matrix, Re
|
33
41
|
from .exceptions import ParamValueException, UtilException
|
34
42
|
|
43
|
+
logger = logging.getLogger("ddeutil.workflow")
|
44
|
+
P = ParamSpec("P")
|
45
|
+
AnyModel = TypeVar("AnyModel", bound=BaseModel)
|
46
|
+
AnyModelType = type[AnyModel]
|
47
|
+
|
35
48
|
|
36
49
|
def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
37
50
|
"""Return second value that come from diff of an input datetime and the
|
@@ -42,6 +55,15 @@ def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
|
42
55
|
)
|
43
56
|
|
44
57
|
|
58
|
+
def delay(second: float = 0) -> None:
|
59
|
+
"""Delay time that use time.sleep with random second value between
|
60
|
+
0.00 - 0.99 seconds.
|
61
|
+
|
62
|
+
:param second: A second number that want to adds-on random value.
|
63
|
+
"""
|
64
|
+
time.sleep(second + randrange(0, 99, step=10) / 100)
|
65
|
+
|
66
|
+
|
45
67
|
class Engine(BaseModel):
|
46
68
|
"""Engine Model"""
|
47
69
|
|
@@ -127,6 +149,112 @@ def config() -> ConfParams:
|
|
127
149
|
)
|
128
150
|
|
129
151
|
|
152
|
+
class SimLoad:
|
153
|
+
"""Simple Load Object that will search config data by given some identity
|
154
|
+
value like name of pipeline or on.
|
155
|
+
|
156
|
+
:param name: A name of config data that will read by Yaml Loader object.
|
157
|
+
:param params: A Params model object.
|
158
|
+
:param externals: An external parameters
|
159
|
+
|
160
|
+
Noted:
|
161
|
+
---
|
162
|
+
The config data should have ``type`` key for modeling validation that
|
163
|
+
make this loader know what is config should to do pass to.
|
164
|
+
|
165
|
+
... <identity-key>:
|
166
|
+
... type: <importable-object>
|
167
|
+
... <key-data>: <value-data>
|
168
|
+
... ...
|
169
|
+
|
170
|
+
"""
|
171
|
+
|
172
|
+
def __init__(
|
173
|
+
self,
|
174
|
+
name: str,
|
175
|
+
params: ConfParams,
|
176
|
+
externals: DictData | None = None,
|
177
|
+
) -> None:
|
178
|
+
self.data: DictData = {}
|
179
|
+
for file in PathSearch(params.engine.paths.conf).files:
|
180
|
+
if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
|
181
|
+
data := YamlFlResolve(file).read().get(name, {})
|
182
|
+
):
|
183
|
+
self.data = data
|
184
|
+
|
185
|
+
# VALIDATE: check the data that reading should not empty.
|
186
|
+
if not self.data:
|
187
|
+
raise ValueError(f"Config {name!r} does not found on conf path")
|
188
|
+
|
189
|
+
self.conf_params: ConfParams = params
|
190
|
+
self.externals: DictData = externals or {}
|
191
|
+
self.data.update(self.externals)
|
192
|
+
|
193
|
+
@classmethod
|
194
|
+
def finds(
|
195
|
+
cls,
|
196
|
+
obj: object,
|
197
|
+
params: ConfParams,
|
198
|
+
*,
|
199
|
+
include: list[str] | None = None,
|
200
|
+
exclude: list[str] | None = None,
|
201
|
+
) -> Iterator[tuple[str, DictData]]:
|
202
|
+
"""Find all data that match with object type in config path. This class
|
203
|
+
method can use include and exclude list of identity name for filter and
|
204
|
+
adds-on.
|
205
|
+
"""
|
206
|
+
exclude: list[str] = exclude or []
|
207
|
+
for file in PathSearch(params.engine.paths.conf).files:
|
208
|
+
if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
|
209
|
+
values := YamlFlResolve(file).read()
|
210
|
+
):
|
211
|
+
for key, data in values.items():
|
212
|
+
if key in exclude:
|
213
|
+
continue
|
214
|
+
if issubclass(get_type(data["type"], params), obj) and (
|
215
|
+
include is None or all(i in data for i in include)
|
216
|
+
):
|
217
|
+
yield key, data
|
218
|
+
|
219
|
+
@cached_property
|
220
|
+
def type(self) -> AnyModelType:
|
221
|
+
"""Return object of string type which implement on any registry. The
|
222
|
+
object type.
|
223
|
+
|
224
|
+
:rtype: AnyModelType
|
225
|
+
"""
|
226
|
+
if not (_typ := self.data.get("type")):
|
227
|
+
raise ValueError(
|
228
|
+
f"the 'type' value: {_typ} does not exists in config data."
|
229
|
+
)
|
230
|
+
return get_type(_typ, self.conf_params)
|
231
|
+
|
232
|
+
|
233
|
+
class Loader(SimLoad):
|
234
|
+
"""Loader Object that get the config `yaml` file from current path.
|
235
|
+
|
236
|
+
:param name: A name of config data that will read by Yaml Loader object.
|
237
|
+
:param externals: An external parameters
|
238
|
+
"""
|
239
|
+
|
240
|
+
@classmethod
|
241
|
+
def finds(
|
242
|
+
cls,
|
243
|
+
obj: object,
|
244
|
+
*,
|
245
|
+
include: list[str] | None = None,
|
246
|
+
exclude: list[str] | None = None,
|
247
|
+
**kwargs,
|
248
|
+
) -> DictData:
|
249
|
+
"""Override the find class method from the Simple Loader object."""
|
250
|
+
return super().finds(
|
251
|
+
obj=obj, params=config(), include=include, exclude=exclude
|
252
|
+
)
|
253
|
+
|
254
|
+
def __init__(self, name: str, externals: DictData) -> None:
|
255
|
+
super().__init__(name, config(), externals)
|
256
|
+
|
257
|
+
|
130
258
|
def gen_id(
|
131
259
|
value: Any,
|
132
260
|
*,
|
@@ -160,6 +288,26 @@ def gen_id(
|
|
160
288
|
).hexdigest()
|
161
289
|
|
162
290
|
|
291
|
+
def get_type(t: str, params: ConfParams) -> AnyModelType:
|
292
|
+
"""Return import type from string importable value in the type key.
|
293
|
+
|
294
|
+
:param t: A importable type string.
|
295
|
+
:param params: A config parameters that use registry to search this
|
296
|
+
type.
|
297
|
+
:rtype: AnyModelType
|
298
|
+
"""
|
299
|
+
try:
|
300
|
+
# NOTE: Auto adding module prefix if it does not set
|
301
|
+
return import_string(f"ddeutil.workflow.{t}")
|
302
|
+
except ModuleNotFoundError:
|
303
|
+
for registry in params.engine.registry:
|
304
|
+
try:
|
305
|
+
return import_string(f"{registry}.{t}")
|
306
|
+
except ModuleNotFoundError:
|
307
|
+
continue
|
308
|
+
return import_string(f"{t}")
|
309
|
+
|
310
|
+
|
163
311
|
class TagFunc(Protocol):
|
164
312
|
"""Tag Function Protocol"""
|
165
313
|
|
@@ -244,6 +392,10 @@ class BaseParam(BaseModel, ABC):
|
|
244
392
|
"Receive value and validate typing before return valid value."
|
245
393
|
)
|
246
394
|
|
395
|
+
@field_serializer("type")
|
396
|
+
def __serializer_type(self, value: str) -> str:
|
397
|
+
return value
|
398
|
+
|
247
399
|
|
248
400
|
class DefaultParam(BaseParam):
|
249
401
|
"""Default Parameter that will check default if it required"""
|
@@ -257,7 +409,8 @@ class DefaultParam(BaseParam):
|
|
257
409
|
)
|
258
410
|
|
259
411
|
@model_validator(mode="after")
|
260
|
-
def
|
412
|
+
def __check_default(self) -> Self:
|
413
|
+
"""Check default value should pass when it set required."""
|
261
414
|
if not self.required and self.default is None:
|
262
415
|
raise ParamValueException(
|
263
416
|
"Default should set when this parameter does not required."
|
@@ -273,7 +426,13 @@ class DatetimeParam(DefaultParam):
|
|
273
426
|
default: datetime = Field(default_factory=dt_now)
|
274
427
|
|
275
428
|
def receive(self, value: str | datetime | date | None = None) -> datetime:
|
276
|
-
"""Receive value that match with datetime.
|
429
|
+
"""Receive value that match with datetime. If a input value pass with
|
430
|
+
None, it will use default value instead.
|
431
|
+
|
432
|
+
:param value: A value that want to validate with datetime parameter
|
433
|
+
type.
|
434
|
+
:rtype: datetime
|
435
|
+
"""
|
277
436
|
if value is None:
|
278
437
|
return self.default
|
279
438
|
|
@@ -295,7 +454,11 @@ class StrParam(DefaultParam):
|
|
295
454
|
type: Literal["str"] = "str"
|
296
455
|
|
297
456
|
def receive(self, value: Optional[str] = None) -> str | None:
|
298
|
-
"""Receive value that match with str.
|
457
|
+
"""Receive value that match with str.
|
458
|
+
|
459
|
+
:param value: A value that want to validate with string parameter type.
|
460
|
+
:rtype: str | None
|
461
|
+
"""
|
299
462
|
if value is None:
|
300
463
|
return self.default
|
301
464
|
return str(value)
|
@@ -307,7 +470,11 @@ class IntParam(DefaultParam):
|
|
307
470
|
type: Literal["int"] = "int"
|
308
471
|
|
309
472
|
def receive(self, value: Optional[int] = None) -> int | None:
|
310
|
-
"""Receive value that match with int.
|
473
|
+
"""Receive value that match with int.
|
474
|
+
|
475
|
+
:param value: A value that want to validate with integer parameter type.
|
476
|
+
:rtype: int | None
|
477
|
+
"""
|
311
478
|
if value is None:
|
312
479
|
return self.default
|
313
480
|
if not isinstance(value, int):
|
@@ -348,57 +515,57 @@ Param = Union[
|
|
348
515
|
]
|
349
516
|
|
350
517
|
|
351
|
-
class Context(BaseModel):
|
352
|
-
"""Context Pydantic Model"""
|
353
|
-
|
354
|
-
params: dict = Field(default_factory=dict)
|
355
|
-
jobs: dict = Field(default_factory=dict)
|
356
|
-
error: dict = Field(default_factory=dict)
|
357
|
-
|
358
|
-
|
359
518
|
class Result(BaseModel):
|
360
519
|
"""Result Pydantic Model for passing parameter and receiving output from
|
361
520
|
the pipeline execution.
|
362
521
|
"""
|
363
522
|
|
364
|
-
# TODO: Add running ID to this result dataclass.
|
365
|
-
# ---
|
366
|
-
# parent_run_id: str
|
367
|
-
# run_id: str
|
368
|
-
#
|
369
523
|
status: int = Field(default=2)
|
370
524
|
context: DictData = Field(default_factory=dict)
|
371
525
|
|
526
|
+
# NOTE: Ignore this field to compare another result model with __eq__.
|
527
|
+
_parent_run_id: Optional[str] = PrivateAttr(default=None)
|
528
|
+
_run_id: Optional[str] = PrivateAttr(default=None)
|
529
|
+
|
530
|
+
@model_validator(mode="after")
|
531
|
+
def __prepare_run_id(self):
|
532
|
+
if self._run_id is None:
|
533
|
+
self._run_id = gen_id("manual", unique=True)
|
534
|
+
return self
|
535
|
+
|
536
|
+
def set_run_id(self, running_id: str) -> Self:
|
537
|
+
self._run_id = running_id
|
538
|
+
return self
|
539
|
+
|
540
|
+
def set_parent_run_id(self, running_id: str) -> Self:
|
541
|
+
self._parent_run_id = running_id
|
542
|
+
return self
|
543
|
+
|
544
|
+
@property
|
545
|
+
def parent_run_id(self):
|
546
|
+
return self._parent_run_id
|
547
|
+
|
548
|
+
@property
|
549
|
+
def run_id(self):
|
550
|
+
return self._run_id
|
551
|
+
|
372
552
|
def receive(self, result: Result) -> Result:
|
373
553
|
self.__dict__["status"] = result.status
|
374
554
|
self.__dict__["context"].update(result.context)
|
555
|
+
self._parent_run_id = result.parent_run_id
|
556
|
+
self._run_id = result.run_id
|
375
557
|
return self
|
376
558
|
|
377
559
|
def receive_jobs(self, result: Result) -> Result:
|
378
560
|
self.__dict__["status"] = result.status
|
561
|
+
|
562
|
+
# NOTE: Check the context has jobs key.
|
379
563
|
if "jobs" not in self.__dict__["context"]:
|
380
564
|
self.__dict__["context"]["jobs"] = {}
|
381
|
-
self.__dict__["context"]["jobs"].update(result.context)
|
382
|
-
return self
|
383
|
-
|
384
565
|
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
"""
|
389
|
-
|
390
|
-
# TODO: Add running ID to this result dataclass.
|
391
|
-
# ---
|
392
|
-
# parent_run_id: str
|
393
|
-
# run_id: str
|
394
|
-
#
|
395
|
-
status: int = Field(default=2)
|
396
|
-
context: Context = Field(default_factory=Context)
|
397
|
-
|
398
|
-
def receive(self, result: ReResult) -> ReResult:
|
399
|
-
self.__dict__["status"] = result.status
|
400
|
-
self.__dict__["context"].__dict__["jobs"].update(result.context.jobs)
|
401
|
-
self.__dict__["context"].__dict__["error"].update(result.context.error)
|
566
|
+
self.__dict__["context"]["jobs"].update(result.context)
|
567
|
+
self._parent_run_id = result.parent_run_id
|
568
|
+
self._run_id = result.run_id
|
402
569
|
return self
|
403
570
|
|
404
571
|
|
@@ -426,7 +593,7 @@ class FilterFunc(Protocol):
|
|
426
593
|
def __call__(self, *args, **kwargs): ...
|
427
594
|
|
428
595
|
|
429
|
-
def custom_filter(name: str):
|
596
|
+
def custom_filter(name: str) -> Callable[P, TagFunc]:
|
430
597
|
"""Custom filter decorator function that set function attributes, ``filter``
|
431
598
|
for making filter registries variable.
|
432
599
|
|
@@ -511,7 +678,11 @@ def get_args_const(
|
|
511
678
|
|
512
679
|
@custom_filter("fmt")
|
513
680
|
def datetime_format(value: datetime, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
|
514
|
-
|
681
|
+
if isinstance(value, datetime):
|
682
|
+
return value.strftime(fmt)
|
683
|
+
raise UtilException(
|
684
|
+
"This custom function should pass input value with datetime type."
|
685
|
+
)
|
515
686
|
|
516
687
|
|
517
688
|
def map_post_filter(
|
@@ -548,7 +719,7 @@ def map_post_filter(
|
|
548
719
|
else:
|
549
720
|
value: Any = f_func(value, *args, **kwargs)
|
550
721
|
except Exception as err:
|
551
|
-
|
722
|
+
logger.warning(str(err))
|
552
723
|
raise UtilException(
|
553
724
|
f"The post-filter function: {func_name} does not fit with "
|
554
725
|
f"{value} (type: {type(value).__name__})."
|
@@ -556,6 +727,40 @@ def map_post_filter(
|
|
556
727
|
return value
|
557
728
|
|
558
729
|
|
730
|
+
def not_in_template(value: Any, *, not_in: str = "matrix.") -> bool:
|
731
|
+
"""Check value should not pass template with not_in value prefix.
|
732
|
+
|
733
|
+
:param value:
|
734
|
+
:param not_in:
|
735
|
+
:rtype: bool
|
736
|
+
"""
|
737
|
+
if isinstance(value, dict):
|
738
|
+
return any(not_in_template(value[k], not_in=not_in) for k in value)
|
739
|
+
elif isinstance(value, (list, tuple, set)):
|
740
|
+
return any(not_in_template(i, not_in=not_in) for i in value)
|
741
|
+
elif not isinstance(value, str):
|
742
|
+
return False
|
743
|
+
return any(
|
744
|
+
(not found.group("caller").strip().startswith(not_in))
|
745
|
+
for found in Re.RE_CALLER.finditer(value.strip())
|
746
|
+
)
|
747
|
+
|
748
|
+
|
749
|
+
def has_template(value: Any) -> bool:
|
750
|
+
"""Check value include templating string.
|
751
|
+
|
752
|
+
:param value:
|
753
|
+
:rtype: bool
|
754
|
+
"""
|
755
|
+
if isinstance(value, dict):
|
756
|
+
return any(has_template(value[k]) for k in value)
|
757
|
+
elif isinstance(value, (list, tuple, set)):
|
758
|
+
return any(has_template(i) for i in value)
|
759
|
+
elif not isinstance(value, str):
|
760
|
+
return False
|
761
|
+
return bool(Re.RE_CALLER.findall(value.strip()))
|
762
|
+
|
763
|
+
|
559
764
|
def str2template(
|
560
765
|
value: str,
|
561
766
|
params: DictData,
|
@@ -639,7 +844,7 @@ def param2template(
|
|
639
844
|
return str2template(value, params, filters=filters)
|
640
845
|
|
641
846
|
|
642
|
-
def filter_func(value: Any):
|
847
|
+
def filter_func(value: Any) -> Any:
|
643
848
|
"""Filter own created function out of any value with replace it to its
|
644
849
|
function name. If it is built-in function, it does not have any changing.
|
645
850
|
"""
|
@@ -678,3 +883,25 @@ def cross_product(matrix: Matrix) -> Iterator[DictData]:
|
|
678
883
|
*[[{k: v} for v in vs] for k, vs in matrix.items()]
|
679
884
|
)
|
680
885
|
)
|
886
|
+
|
887
|
+
|
888
|
+
def batch(iterable: Iterator[Any], n: int) -> Iterator[Any]:
|
889
|
+
"""Batch data into iterators of length n. The last batch may be shorter.
|
890
|
+
|
891
|
+
Example:
|
892
|
+
>>> for b in batch('ABCDEFG', 3):
|
893
|
+
... print(list(b))
|
894
|
+
['A', 'B', 'C']
|
895
|
+
['D', 'E', 'F']
|
896
|
+
['G']
|
897
|
+
"""
|
898
|
+
if n < 1:
|
899
|
+
raise ValueError("n must be at least one")
|
900
|
+
it = iter(iterable)
|
901
|
+
while True:
|
902
|
+
chunk_it = islice(it, n)
|
903
|
+
try:
|
904
|
+
first_el = next(chunk_it)
|
905
|
+
except StopIteration:
|
906
|
+
return
|
907
|
+
yield chain((first_el,), chunk_it)
|
@@ -0,0 +1,182 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: ddeutil-workflow
|
3
|
+
Version: 0.0.10
|
4
|
+
Summary: Lightweight workflow orchestration with less dependencies
|
5
|
+
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
|
8
|
+
Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
|
9
|
+
Keywords: orchestration,workflow
|
10
|
+
Classifier: Topic :: Utilities
|
11
|
+
Classifier: Natural Language :: English
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Classifier: Programming Language :: Python
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Requires-Python: >=3.9.13
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Requires-Dist: ddeutil-io
|
25
|
+
Requires-Dist: python-dotenv ==1.0.1
|
26
|
+
Requires-Dist: typer <1.0.0,==0.12.5
|
27
|
+
Provides-Extra: api
|
28
|
+
Requires-Dist: fastapi[standard] <1.0.0,==0.112.2 ; extra == 'api'
|
29
|
+
Provides-Extra: schedule
|
30
|
+
Requires-Dist: schedule <2.0.0,==1.2.2 ; extra == 'schedule'
|
31
|
+
|
32
|
+
# Workflow
|
33
|
+
|
34
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
35
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
36
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
37
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
38
|
+
[](https://github.com/psf/black)
|
39
|
+
|
40
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
41
|
+
for easy to make a simple metadata driven for data pipeline orchestration.
|
42
|
+
It can to use for data operator by a `.yaml` template.
|
43
|
+
|
44
|
+
> [!WARNING]
|
45
|
+
> This package provide only orchestration workload. That mean you should not use
|
46
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
47
|
+
|
48
|
+
In my opinion, I think it should not create duplicate pipeline codes if I can
|
49
|
+
write with dynamic input parameters on the one template pipeline that just change
|
50
|
+
the input parameters per use-case instead.
|
51
|
+
This way I can handle a lot of logical pipelines in our orgs with only metadata
|
52
|
+
configuration. It called **Metadata Driven Data Pipeline**.
|
53
|
+
|
54
|
+
Next, we should get some monitoring tools for manage logging that return from
|
55
|
+
pipeline running. Because it not show us what is a use-case that running data
|
56
|
+
pipeline.
|
57
|
+
|
58
|
+
> [!NOTE]
|
59
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
60
|
+
> and all of config file from several data orchestration framework tools from my
|
61
|
+
> experience on Data Engineer.
|
62
|
+
|
63
|
+
**Rules of This Workflow engine**:
|
64
|
+
|
65
|
+
1. Minimum unit of scheduling is 1 minute
|
66
|
+
2. Cannot re-run only failed stage and its pending downstream
|
67
|
+
3. All parallel tasks inside workflow engine use Threading
|
68
|
+
(Because Python 3.13 unlock GIL)
|
69
|
+
|
70
|
+
## Installation
|
71
|
+
|
72
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
73
|
+
this package with application add-ons, you should add `app` in installation;
|
74
|
+
|
75
|
+
| Usecase | Install Optional | Support |
|
76
|
+
|-------------------|------------------------------------------|--------------------|
|
77
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
78
|
+
| Scheduler Service | `pip install ddeutil-workflow[schedule]` | :x: |
|
79
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :x: |
|
80
|
+
|
81
|
+
|
82
|
+
> I added this feature to the main milestone.
|
83
|
+
>
|
84
|
+
> **Docker Images** supported:
|
85
|
+
>
|
86
|
+
> | Docker Image | Python Version | Support |
|
87
|
+
> |-----------------------------|----------------|---------|
|
88
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
89
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
90
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
91
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
92
|
+
|
93
|
+
## Usage
|
94
|
+
|
95
|
+
This is examples that use workflow file for running common Data Engineering
|
96
|
+
use-case.
|
97
|
+
|
98
|
+
> [!IMPORTANT]
|
99
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
100
|
+
> with pipeline activity that you want to orchestrate. Because it able to dynamic
|
101
|
+
> an input argument with the same hook function that make you use less time to
|
102
|
+
> maintenance your data pipelines.
|
103
|
+
|
104
|
+
```yaml
|
105
|
+
run_py_local:
|
106
|
+
type: pipeline.Pipeline
|
107
|
+
on:
|
108
|
+
- cronjob: '*/5 * * * *'
|
109
|
+
timezone: "Asia/Bangkok"
|
110
|
+
params:
|
111
|
+
author-run: str
|
112
|
+
run-date: datetime
|
113
|
+
jobs:
|
114
|
+
getting-api-data:
|
115
|
+
stages:
|
116
|
+
- name: "Retrieve API Data"
|
117
|
+
id: retrieve-api
|
118
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
119
|
+
with:
|
120
|
+
url: https://open-data/
|
121
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
122
|
+
aws_s3_path: my-data/open-data/
|
123
|
+
|
124
|
+
# This Authentication code should implement with your custom hook function.
|
125
|
+
# The template allow you to use environment variable.
|
126
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
127
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
128
|
+
```
|
129
|
+
|
130
|
+
## Configuration
|
131
|
+
|
132
|
+
| Environment | Component | Default | Description |
|
133
|
+
|-------------------------------------|-----------|------------------------------|----------------------------------------------------------------------------|
|
134
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
135
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
136
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
137
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
138
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
139
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
140
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
141
|
+
| `WORKFLOW_CORE_MAX_PIPELINE_POKING` | Core | 4 | |
|
142
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in pipeline executor |
|
143
|
+
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
|
144
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
145
|
+
|
146
|
+
|
147
|
+
**Application**:
|
148
|
+
|
149
|
+
| Environment | Default | Description |
|
150
|
+
|-------------------------------------|----------------------------------|-------------------------------------------------------------------------|
|
151
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | 2 | The maximum process worker number that run in scheduler app module |
|
152
|
+
| `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | 100 | A schedule per process that run parallel |
|
153
|
+
| `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
|
154
|
+
|
155
|
+
**API server**:
|
156
|
+
|
157
|
+
| Environment | Default | Description |
|
158
|
+
|--------------------------------------|---------|-----------------------------------------------------------------------------------|
|
159
|
+
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
160
|
+
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | true | A flag that enable run scheduler |
|
161
|
+
|
162
|
+
## Deployment
|
163
|
+
|
164
|
+
This package able to run as a application service for receive manual trigger
|
165
|
+
from the master node via RestAPI or use to be Scheduler background service
|
166
|
+
like crontab job but via Python API.
|
167
|
+
|
168
|
+
### Schedule Service
|
169
|
+
|
170
|
+
```shell
|
171
|
+
(venv) $ python src.ddeutil.workflow.app
|
172
|
+
```
|
173
|
+
|
174
|
+
### API Server
|
175
|
+
|
176
|
+
```shell
|
177
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --reload
|
178
|
+
```
|
179
|
+
|
180
|
+
> [!NOTE]
|
181
|
+
> If this package already deploy, it able to use
|
182
|
+
> `uvicorn ddeutil.workflow.api:app --host 0.0.0.0 --port 80 --workers 4`
|
@@ -0,0 +1,21 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=KJfEGSDA5LiPvdupul6ulxozLD2x2GhgUvq8t60EXsI,28
|
2
|
+
ddeutil/workflow/__init__.py,sha256=oGvg_BpKKb_FG76DlMvXTKD7BsYhqF9wB1r4x5Q_lQI,647
|
3
|
+
ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
|
4
|
+
ddeutil/workflow/api.py,sha256=WHgmjvnnkM4djwHt4bsAqsQjjcjAITRSrNrYYO6bgn8,2582
|
5
|
+
ddeutil/workflow/cli.py,sha256=snJCM-LAqvWwhkSB-3KRWwcgbHAkHn4cZ_DtmfOL5gs,3360
|
6
|
+
ddeutil/workflow/cron.py,sha256=uhp3E5pl_tX_H88bsDujcwdhZmOE53csyV-ouPpPdK8,25321
|
7
|
+
ddeutil/workflow/exceptions.py,sha256=UHojJQmnG9OVuRhXBAzDW6KZn-uKxvxV034QhUBUzUI,686
|
8
|
+
ddeutil/workflow/log.py,sha256=a5L5KWEGS5oiY_y6jugeAoRyAcnAhlt1HfeTU77YeI4,6036
|
9
|
+
ddeutil/workflow/on.py,sha256=Sxwnu0vPbIrMR_WWvH3_rOvD0tbiJntcB5378WoV19M,7163
|
10
|
+
ddeutil/workflow/pipeline.py,sha256=lPw9R3gOnBcU2eogClG8b4e4rTvpn5EbACLNZDuuR38,40825
|
11
|
+
ddeutil/workflow/repeat.py,sha256=0O8voTRB8lNMWsk1AbOYcio_b2_CW98yrfiEzNBb6gA,4954
|
12
|
+
ddeutil/workflow/route.py,sha256=pcn_oDzc2nl6txFhu_TWAnntLggEOFV9A3EVdnazcHI,2597
|
13
|
+
ddeutil/workflow/scheduler.py,sha256=Vu9FZbiHDnshQ2O1SnkVX686eSfaZzip-1oQohfuH_Y,20140
|
14
|
+
ddeutil/workflow/stage.py,sha256=XZEPImipk83kNX9UHrwu7wWUBigXZpEkWqagOG0oS70,20656
|
15
|
+
ddeutil/workflow/utils.py,sha256=ehIcT_fIQL8N0wU16VJDKAFN9q4h1FyMxyT5uTeMIA0,28561
|
16
|
+
ddeutil_workflow-0.0.10.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
17
|
+
ddeutil_workflow-0.0.10.dist-info/METADATA,sha256=7jdDYS2WtaZFpwUCo4ur8NAFWyi-omELh8YLB3EL9ok,9433
|
18
|
+
ddeutil_workflow-0.0.10.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
|
19
|
+
ddeutil_workflow-0.0.10.dist-info/entry_points.txt,sha256=0BVOgO3LdUdXVZ-CiHHDKxzEk2c8J30jEwHeKn2YCWI,62
|
20
|
+
ddeutil_workflow-0.0.10.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
21
|
+
ddeutil_workflow-0.0.10.dist-info/RECORD,,
|