ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -2
- ddeutil/workflow/api.py +99 -31
- ddeutil/workflow/cli.py +105 -22
- ddeutil/workflow/cron.py +116 -26
- ddeutil/workflow/exceptions.py +8 -5
- ddeutil/workflow/job.py +572 -0
- ddeutil/workflow/log.py +73 -66
- ddeutil/workflow/on.py +10 -4
- ddeutil/workflow/repeat.py +68 -39
- ddeutil/workflow/route.py +194 -44
- ddeutil/workflow/scheduler.py +1020 -229
- ddeutil/workflow/stage.py +27 -23
- ddeutil/workflow/utils.py +145 -9
- ddeutil_workflow-0.0.11.dist-info/METADATA +178 -0
- ddeutil_workflow-0.0.11.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.11.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/loader.py +0 -132
- ddeutil/workflow/pipeline.py +0 -1142
- ddeutil_workflow-0.0.9.dist-info/METADATA +0 -273
- ddeutil_workflow-0.0.9.dist-info/RECORD +0 -22
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +0 -2
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/top_level.txt +0 -0
ddeutil/workflow/stage.py
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
"""Stage Model that use for getting stage data template from Job Model.
|
7
7
|
The stage that handle the minimize task that run in some thread (same thread at
|
8
|
-
its job owner) that mean it is the lowest executor of a
|
8
|
+
its job owner) that mean it is the lowest executor of a workflow workflow that
|
9
9
|
can tracking logs.
|
10
10
|
|
11
11
|
The output of stage execution only return 0 status because I do not want to
|
@@ -19,7 +19,6 @@ from __future__ import annotations
|
|
19
19
|
|
20
20
|
import contextlib
|
21
21
|
import inspect
|
22
|
-
import logging
|
23
22
|
import os
|
24
23
|
import subprocess
|
25
24
|
import sys
|
@@ -46,6 +45,7 @@ from typing_extensions import Self
|
|
46
45
|
|
47
46
|
from .__types import DictData, DictStr, Re, TupleStr
|
48
47
|
from .exceptions import StageException
|
48
|
+
from .log import get_logger
|
49
49
|
from .utils import (
|
50
50
|
Registry,
|
51
51
|
Result,
|
@@ -58,6 +58,9 @@ from .utils import (
|
|
58
58
|
)
|
59
59
|
|
60
60
|
P = ParamSpec("P")
|
61
|
+
logger = get_logger("ddeutil.workflow")
|
62
|
+
|
63
|
+
|
61
64
|
__all__: TupleStr = (
|
62
65
|
"Stage",
|
63
66
|
"EmptyStage",
|
@@ -86,7 +89,7 @@ def handler_result(message: str | None = None) -> Callable[P, Result]:
|
|
86
89
|
return func(self, *args, **kwargs).set_run_id(self.run_id)
|
87
90
|
except Exception as err:
|
88
91
|
# NOTE: Start catching error from the stage execution.
|
89
|
-
|
92
|
+
logger.error(
|
90
93
|
f"({self.run_id}) [STAGE]: {err.__class__.__name__}: {err}"
|
91
94
|
)
|
92
95
|
if str2bool(
|
@@ -141,6 +144,7 @@ class BaseStage(BaseModel, ABC):
|
|
141
144
|
default=None,
|
142
145
|
description="A running stage ID.",
|
143
146
|
repr=False,
|
147
|
+
exclude=True,
|
144
148
|
)
|
145
149
|
|
146
150
|
@model_validator(mode="after")
|
@@ -191,7 +195,7 @@ class BaseStage(BaseModel, ABC):
|
|
191
195
|
self.id
|
192
196
|
or str2bool(os.getenv("WORKFLOW_CORE_STAGE_DEFAULT_ID", "false"))
|
193
197
|
):
|
194
|
-
|
198
|
+
logger.debug(
|
195
199
|
f"({self.run_id}) [STAGE]: Output does not set because this "
|
196
200
|
f"stage does not set ID or default stage ID config flag not be "
|
197
201
|
f"True."
|
@@ -208,7 +212,7 @@ class BaseStage(BaseModel, ABC):
|
|
208
212
|
_id: str = gen_id(param2template(self.name, params=to))
|
209
213
|
|
210
214
|
# NOTE: Set the output to that stage generated ID.
|
211
|
-
|
215
|
+
logger.debug(
|
212
216
|
f"({self.run_id}) [STAGE]: Set output complete with stage ID: {_id}"
|
213
217
|
)
|
214
218
|
to["stages"][_id] = {"outputs": output}
|
@@ -231,7 +235,7 @@ class BaseStage(BaseModel, ABC):
|
|
231
235
|
raise TypeError("Return type of condition does not be boolean")
|
232
236
|
return not rs
|
233
237
|
except Exception as err:
|
234
|
-
|
238
|
+
logger.error(f"({self.run_id}) [STAGE]: {err}")
|
235
239
|
raise StageException(f"{err.__class__.__name__}: {err}") from err
|
236
240
|
|
237
241
|
|
@@ -258,7 +262,7 @@ class EmptyStage(BaseStage):
|
|
258
262
|
:param params: A context data that want to add output result. But this
|
259
263
|
stage does not pass any output.
|
260
264
|
"""
|
261
|
-
|
265
|
+
logger.info(
|
262
266
|
f"({self.run_id}) [STAGE]: Empty-Execute: {self.name!r}: "
|
263
267
|
f"( {param2template(self.echo, params=params) or '...'} )"
|
264
268
|
)
|
@@ -314,7 +318,7 @@ class BashStage(BaseStage):
|
|
314
318
|
# NOTE: Make this .sh file able to executable.
|
315
319
|
make_exec(f"./{f_name}")
|
316
320
|
|
317
|
-
|
321
|
+
logger.debug(
|
318
322
|
f"({self.run_id}) [STAGE]: Start create `.sh` file and running a "
|
319
323
|
f"bash statement."
|
320
324
|
)
|
@@ -336,7 +340,7 @@ class BashStage(BaseStage):
|
|
336
340
|
with self.__prepare_bash(
|
337
341
|
bash=bash, env=param2template(self.env, params)
|
338
342
|
) as sh:
|
339
|
-
|
343
|
+
logger.info(f"({self.run_id}) [STAGE]: Shell-Execute: {sh}")
|
340
344
|
rs: CompletedProcess = subprocess.run(
|
341
345
|
sh,
|
342
346
|
shell=False,
|
@@ -424,7 +428,7 @@ class PyStage(BaseStage):
|
|
424
428
|
_locals: DictData = {}
|
425
429
|
|
426
430
|
# NOTE: Start exec the run statement.
|
427
|
-
|
431
|
+
logger.info(f"({self.run_id}) [STAGE]: Py-Execute: {self.name}")
|
428
432
|
exec(run, _globals, _locals)
|
429
433
|
|
430
434
|
return Result(
|
@@ -531,7 +535,7 @@ class HookStage(BaseStage):
|
|
531
535
|
if k.removeprefix("_") in args:
|
532
536
|
args[k] = args.pop(k.removeprefix("_"))
|
533
537
|
|
534
|
-
|
538
|
+
logger.info(
|
535
539
|
f"({self.run_id}) [STAGE]: Hook-Execute: {t_func.name}@{t_func.tag}"
|
536
540
|
)
|
537
541
|
rs: DictData = t_func(**param2template(args, params))
|
@@ -547,12 +551,12 @@ class HookStage(BaseStage):
|
|
547
551
|
|
548
552
|
|
549
553
|
class TriggerStage(BaseStage):
|
550
|
-
"""Trigger
|
554
|
+
"""Trigger Workflow execution stage that execute another workflow object.
|
551
555
|
|
552
556
|
Data Validate:
|
553
557
|
>>> stage = {
|
554
|
-
... "name": "Trigger
|
555
|
-
... "trigger": '
|
558
|
+
... "name": "Trigger workflow stage execution",
|
559
|
+
... "trigger": 'workflow-name-for-loader',
|
556
560
|
... "params": {
|
557
561
|
... "run-date": "2024-08-01",
|
558
562
|
... "source": "src",
|
@@ -560,31 +564,31 @@ class TriggerStage(BaseStage):
|
|
560
564
|
... }
|
561
565
|
"""
|
562
566
|
|
563
|
-
trigger: str = Field(description="A trigger
|
567
|
+
trigger: str = Field(description="A trigger workflow name.")
|
564
568
|
params: DictData = Field(
|
565
569
|
default_factory=dict,
|
566
|
-
description="A parameter that want to pass to
|
570
|
+
description="A parameter that want to pass to workflow execution.",
|
567
571
|
)
|
568
572
|
|
569
573
|
@handler_result("Raise from TriggerStage")
|
570
574
|
def execute(self, params: DictData) -> Result:
|
571
|
-
"""Trigger
|
575
|
+
"""Trigger workflow execution.
|
572
576
|
|
573
577
|
:param params: A parameter data that want to use in this execution.
|
574
578
|
:rtype: Result
|
575
579
|
"""
|
576
|
-
from .
|
580
|
+
from . import Workflow
|
577
581
|
|
578
|
-
# NOTE: Loading
|
582
|
+
# NOTE: Loading workflow object from trigger name.
|
579
583
|
_trigger: str = param2template(self.trigger, params=params)
|
580
584
|
|
581
|
-
# NOTE: Set running
|
585
|
+
# NOTE: Set running workflow ID from running stage ID to external
|
582
586
|
# params on Loader object.
|
583
|
-
|
587
|
+
wf: Workflow = Workflow.from_loader(
|
584
588
|
name=_trigger, externals={"run_id": self.run_id}
|
585
589
|
)
|
586
|
-
|
587
|
-
return
|
590
|
+
logger.info(f"({self.run_id}) [STAGE]: Trigger-Execute: {_trigger!r}")
|
591
|
+
return wf.execute(params=param2template(self.params, params))
|
588
592
|
|
589
593
|
|
590
594
|
# NOTE: Order of parsing stage data
|
ddeutil/workflow/utils.py
CHANGED
@@ -14,14 +14,14 @@ from abc import ABC, abstractmethod
|
|
14
14
|
from ast import Call, Constant, Expr, Module, Name, parse
|
15
15
|
from collections.abc import Iterator
|
16
16
|
from datetime import date, datetime
|
17
|
-
from functools import wraps
|
17
|
+
from functools import cached_property, wraps
|
18
18
|
from hashlib import md5
|
19
19
|
from importlib import import_module
|
20
20
|
from inspect import isfunction
|
21
21
|
from itertools import chain, islice, product
|
22
22
|
from pathlib import Path
|
23
23
|
from random import randrange
|
24
|
-
from typing import Any, Callable, Literal, Optional, Protocol, Union
|
24
|
+
from typing import Any, Callable, Literal, Optional, Protocol, TypeVar, Union
|
25
25
|
from zoneinfo import ZoneInfo
|
26
26
|
|
27
27
|
try:
|
@@ -30,16 +30,20 @@ except ImportError:
|
|
30
30
|
from typing_extensions import ParamSpec
|
31
31
|
|
32
32
|
from ddeutil.core import getdot, hasdot, hash_str, import_string, lazy, str2bool
|
33
|
-
from ddeutil.io import PathData, search_env_replace
|
33
|
+
from ddeutil.io import PathData, PathSearch, YamlFlResolve, search_env_replace
|
34
34
|
from ddeutil.io.models.lineage import dt_now
|
35
35
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
36
|
+
from pydantic.functional_serializers import field_serializer
|
36
37
|
from pydantic.functional_validators import model_validator
|
37
38
|
from typing_extensions import Self
|
38
39
|
|
39
40
|
from .__types import DictData, Matrix, Re
|
40
41
|
from .exceptions import ParamValueException, UtilException
|
41
42
|
|
43
|
+
logger = logging.getLogger("ddeutil.workflow")
|
42
44
|
P = ParamSpec("P")
|
45
|
+
AnyModel = TypeVar("AnyModel", bound=BaseModel)
|
46
|
+
AnyModelType = type[AnyModel]
|
43
47
|
|
44
48
|
|
45
49
|
def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
@@ -51,11 +55,13 @@ def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
|
|
51
55
|
)
|
52
56
|
|
53
57
|
|
54
|
-
def delay() -> None:
|
58
|
+
def delay(second: float = 0) -> None:
|
55
59
|
"""Delay time that use time.sleep with random second value between
|
56
60
|
0.00 - 0.99 seconds.
|
61
|
+
|
62
|
+
:param second: A second number that want to adds-on random value.
|
57
63
|
"""
|
58
|
-
time.sleep(randrange(0, 99, step=10) / 100)
|
64
|
+
time.sleep(second + randrange(0, 99, step=10) / 100)
|
59
65
|
|
60
66
|
|
61
67
|
class Engine(BaseModel):
|
@@ -143,6 +149,112 @@ def config() -> ConfParams:
|
|
143
149
|
)
|
144
150
|
|
145
151
|
|
152
|
+
class SimLoad:
|
153
|
+
"""Simple Load Object that will search config data by given some identity
|
154
|
+
value like name of workflow or on.
|
155
|
+
|
156
|
+
:param name: A name of config data that will read by Yaml Loader object.
|
157
|
+
:param params: A Params model object.
|
158
|
+
:param externals: An external parameters
|
159
|
+
|
160
|
+
Noted:
|
161
|
+
---
|
162
|
+
The config data should have ``type`` key for modeling validation that
|
163
|
+
make this loader know what is config should to do pass to.
|
164
|
+
|
165
|
+
... <identity-key>:
|
166
|
+
... type: <importable-object>
|
167
|
+
... <key-data>: <value-data>
|
168
|
+
... ...
|
169
|
+
|
170
|
+
"""
|
171
|
+
|
172
|
+
def __init__(
|
173
|
+
self,
|
174
|
+
name: str,
|
175
|
+
params: ConfParams,
|
176
|
+
externals: DictData | None = None,
|
177
|
+
) -> None:
|
178
|
+
self.data: DictData = {}
|
179
|
+
for file in PathSearch(params.engine.paths.conf).files:
|
180
|
+
if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
|
181
|
+
data := YamlFlResolve(file).read().get(name, {})
|
182
|
+
):
|
183
|
+
self.data = data
|
184
|
+
|
185
|
+
# VALIDATE: check the data that reading should not empty.
|
186
|
+
if not self.data:
|
187
|
+
raise ValueError(f"Config {name!r} does not found on conf path")
|
188
|
+
|
189
|
+
self.conf_params: ConfParams = params
|
190
|
+
self.externals: DictData = externals or {}
|
191
|
+
self.data.update(self.externals)
|
192
|
+
|
193
|
+
@classmethod
|
194
|
+
def finds(
|
195
|
+
cls,
|
196
|
+
obj: object,
|
197
|
+
params: ConfParams,
|
198
|
+
*,
|
199
|
+
include: list[str] | None = None,
|
200
|
+
exclude: list[str] | None = None,
|
201
|
+
) -> Iterator[tuple[str, DictData]]:
|
202
|
+
"""Find all data that match with object type in config path. This class
|
203
|
+
method can use include and exclude list of identity name for filter and
|
204
|
+
adds-on.
|
205
|
+
"""
|
206
|
+
exclude: list[str] = exclude or []
|
207
|
+
for file in PathSearch(params.engine.paths.conf).files:
|
208
|
+
if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
|
209
|
+
values := YamlFlResolve(file).read()
|
210
|
+
):
|
211
|
+
for key, data in values.items():
|
212
|
+
if key in exclude:
|
213
|
+
continue
|
214
|
+
if issubclass(get_type(data["type"], params), obj) and (
|
215
|
+
include is None or all(i in data for i in include)
|
216
|
+
):
|
217
|
+
yield key, data
|
218
|
+
|
219
|
+
@cached_property
|
220
|
+
def type(self) -> AnyModelType:
|
221
|
+
"""Return object of string type which implement on any registry. The
|
222
|
+
object type.
|
223
|
+
|
224
|
+
:rtype: AnyModelType
|
225
|
+
"""
|
226
|
+
if not (_typ := self.data.get("type")):
|
227
|
+
raise ValueError(
|
228
|
+
f"the 'type' value: {_typ} does not exists in config data."
|
229
|
+
)
|
230
|
+
return get_type(_typ, self.conf_params)
|
231
|
+
|
232
|
+
|
233
|
+
class Loader(SimLoad):
|
234
|
+
"""Loader Object that get the config `yaml` file from current path.
|
235
|
+
|
236
|
+
:param name: A name of config data that will read by Yaml Loader object.
|
237
|
+
:param externals: An external parameters
|
238
|
+
"""
|
239
|
+
|
240
|
+
@classmethod
|
241
|
+
def finds(
|
242
|
+
cls,
|
243
|
+
obj: object,
|
244
|
+
*,
|
245
|
+
include: list[str] | None = None,
|
246
|
+
exclude: list[str] | None = None,
|
247
|
+
**kwargs,
|
248
|
+
) -> DictData:
|
249
|
+
"""Override the find class method from the Simple Loader object."""
|
250
|
+
return super().finds(
|
251
|
+
obj=obj, params=config(), include=include, exclude=exclude
|
252
|
+
)
|
253
|
+
|
254
|
+
def __init__(self, name: str, externals: DictData) -> None:
|
255
|
+
super().__init__(name, config(), externals)
|
256
|
+
|
257
|
+
|
146
258
|
def gen_id(
|
147
259
|
value: Any,
|
148
260
|
*,
|
@@ -176,6 +288,26 @@ def gen_id(
|
|
176
288
|
).hexdigest()
|
177
289
|
|
178
290
|
|
291
|
+
def get_type(t: str, params: ConfParams) -> AnyModelType:
|
292
|
+
"""Return import type from string importable value in the type key.
|
293
|
+
|
294
|
+
:param t: A importable type string.
|
295
|
+
:param params: A config parameters that use registry to search this
|
296
|
+
type.
|
297
|
+
:rtype: AnyModelType
|
298
|
+
"""
|
299
|
+
try:
|
300
|
+
# NOTE: Auto adding module prefix if it does not set
|
301
|
+
return import_string(f"ddeutil.workflow.{t}")
|
302
|
+
except ModuleNotFoundError:
|
303
|
+
for registry in params.engine.registry:
|
304
|
+
try:
|
305
|
+
return import_string(f"{registry}.{t}")
|
306
|
+
except ModuleNotFoundError:
|
307
|
+
continue
|
308
|
+
return import_string(f"{t}")
|
309
|
+
|
310
|
+
|
179
311
|
class TagFunc(Protocol):
|
180
312
|
"""Tag Function Protocol"""
|
181
313
|
|
@@ -260,6 +392,10 @@ class BaseParam(BaseModel, ABC):
|
|
260
392
|
"Receive value and validate typing before return valid value."
|
261
393
|
)
|
262
394
|
|
395
|
+
@field_serializer("type")
|
396
|
+
def __serializer_type(self, value: str) -> str:
|
397
|
+
return value
|
398
|
+
|
263
399
|
|
264
400
|
class DefaultParam(BaseParam):
|
265
401
|
"""Default Parameter that will check default if it required"""
|
@@ -381,7 +517,7 @@ Param = Union[
|
|
381
517
|
|
382
518
|
class Result(BaseModel):
|
383
519
|
"""Result Pydantic Model for passing parameter and receiving output from
|
384
|
-
the
|
520
|
+
the workflow execution.
|
385
521
|
"""
|
386
522
|
|
387
523
|
status: int = Field(default=2)
|
@@ -519,7 +655,7 @@ def get_args_const(
|
|
519
655
|
|
520
656
|
if len(body) > 1:
|
521
657
|
raise UtilException(
|
522
|
-
"Post-filter function should be only one calling per
|
658
|
+
"Post-filter function should be only one calling per wf"
|
523
659
|
)
|
524
660
|
|
525
661
|
caller: Union[Name, Call]
|
@@ -583,7 +719,7 @@ def map_post_filter(
|
|
583
719
|
else:
|
584
720
|
value: Any = f_func(value, *args, **kwargs)
|
585
721
|
except Exception as err:
|
586
|
-
|
722
|
+
logger.warning(str(err))
|
587
723
|
raise UtilException(
|
588
724
|
f"The post-filter function: {func_name} does not fit with "
|
589
725
|
f"{value} (type: {type(value).__name__})."
|
@@ -635,7 +771,7 @@ def str2template(
|
|
635
771
|
``RE_CALLER`` regular expression.
|
636
772
|
|
637
773
|
The getter value that map a template should have typing support align
|
638
|
-
with the
|
774
|
+
with the workflow parameter types that is `str`, `int`, `datetime`, and
|
639
775
|
`list`.
|
640
776
|
|
641
777
|
:param value: A string value that want to mapped with an params
|
@@ -0,0 +1,178 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: ddeutil-workflow
|
3
|
+
Version: 0.0.11
|
4
|
+
Summary: Lightweight workflow orchestration with less dependencies
|
5
|
+
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/ddeutils/ddeutil-workflow/
|
8
|
+
Project-URL: Source Code, https://github.com/ddeutils/ddeutil-workflow/
|
9
|
+
Keywords: orchestration,workflow
|
10
|
+
Classifier: Topic :: Utilities
|
11
|
+
Classifier: Natural Language :: English
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Operating System :: OS Independent
|
15
|
+
Classifier: Programming Language :: Python
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Requires-Python: >=3.9.13
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Requires-Dist: ddeutil-io
|
25
|
+
Requires-Dist: python-dotenv ==1.0.1
|
26
|
+
Requires-Dist: typer <1.0.0,==0.12.5
|
27
|
+
Requires-Dist: schedule <2.0.0,==1.2.2
|
28
|
+
Provides-Extra: api
|
29
|
+
Requires-Dist: fastapi <1.0.0,==0.112.2 ; extra == 'api'
|
30
|
+
|
31
|
+
# Workflow
|
32
|
+
|
33
|
+
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
34
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
35
|
+
[](https://github.com/ddeutils/ddeutil-workflow)
|
36
|
+
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
37
|
+
[](https://github.com/psf/black)
|
38
|
+
|
39
|
+
The **Lightweight workflow orchestration** with less dependencies the was created
|
40
|
+
for easy to make a simple metadata driven for data workflow orchestration.
|
41
|
+
It can to use for data operator by a `.yaml` template.
|
42
|
+
|
43
|
+
> [!WARNING]
|
44
|
+
> This package provide only orchestration workload. That mean you should not use
|
45
|
+
> workflow stage to process any large data which use lot of compute usecase.
|
46
|
+
|
47
|
+
In my opinion, I think it should not create duplicate workflow codes if I can
|
48
|
+
write with dynamic input parameters on the one template workflow that just change
|
49
|
+
the input parameters per use-case instead.
|
50
|
+
This way I can handle a lot of logical workflows in our orgs with only metadata
|
51
|
+
configuration. It called **Metadata Driven Data Workflow**.
|
52
|
+
|
53
|
+
Next, we should get some monitoring tools for manage logging that return from
|
54
|
+
workflow running. Because it not show us what is a use-case that running data
|
55
|
+
workflow.
|
56
|
+
|
57
|
+
> [!NOTE]
|
58
|
+
> _Disclaimer_: I inspire the dynamic statement from the GitHub Action `.yml` files
|
59
|
+
> and all of config file from several data orchestration framework tools from my
|
60
|
+
> experience on Data Engineer.
|
61
|
+
|
62
|
+
**Rules of This Workflow engine**:
|
63
|
+
|
64
|
+
1. Minimum unit of scheduling is 1 minute
|
65
|
+
2. Cannot re-run only failed stage and its pending downstream
|
66
|
+
3. All parallel tasks inside workflow engine use Threading
|
67
|
+
(Because Python 3.13 unlock GIL)
|
68
|
+
|
69
|
+
## Installation
|
70
|
+
|
71
|
+
This project need `ddeutil-io` extension namespace packages. If you want to install
|
72
|
+
this package with application add-ons, you should add `app` in installation;
|
73
|
+
|
74
|
+
| Usecase | Install Optional | Support |
|
75
|
+
|-------------------|------------------------------------------|--------------------|
|
76
|
+
| Python & CLI | `pip install ddeutil-workflow` | :heavy_check_mark: |
|
77
|
+
| FastAPI Server | `pip install ddeutil-workflow[api]` | :heavy_check_mark: |
|
78
|
+
|
79
|
+
|
80
|
+
> I added this feature to the main milestone.
|
81
|
+
>
|
82
|
+
> **Docker Images** supported:
|
83
|
+
>
|
84
|
+
> | Docker Image | Python Version | Support |
|
85
|
+
> |-----------------------------|----------------|---------|
|
86
|
+
> | ddeutil-workflow:latest | `3.9` | :x: |
|
87
|
+
> | ddeutil-workflow:python3.10 | `3.10` | :x: |
|
88
|
+
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
89
|
+
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
90
|
+
|
91
|
+
## Usage
|
92
|
+
|
93
|
+
This is examples that use workflow file for running common Data Engineering
|
94
|
+
use-case.
|
95
|
+
|
96
|
+
> [!IMPORTANT]
|
97
|
+
> I recommend you to use the `hook` stage for all actions that you want to do
|
98
|
+
> with workflow activity that you want to orchestrate. Because it able to dynamic
|
99
|
+
> an input argument with the same hook function that make you use less time to
|
100
|
+
> maintenance your data workflows.
|
101
|
+
|
102
|
+
```yaml
|
103
|
+
run_py_local:
|
104
|
+
type: Workflow
|
105
|
+
on:
|
106
|
+
# If workflow deploy to schedule, it will running every 5 minutes
|
107
|
+
# with Asia/Bangkok timezone.
|
108
|
+
- cronjob: '*/5 * * * *'
|
109
|
+
timezone: "Asia/Bangkok"
|
110
|
+
params:
|
111
|
+
# Incoming execution parameters will validate with this type. It allow
|
112
|
+
# to set default value or templating.
|
113
|
+
author-run: str
|
114
|
+
run-date: datetime
|
115
|
+
jobs:
|
116
|
+
getting-api-data:
|
117
|
+
stages:
|
118
|
+
- name: "Retrieve API Data"
|
119
|
+
id: retrieve-api
|
120
|
+
uses: tasks/get-api-with-oauth-to-s3@requests
|
121
|
+
with:
|
122
|
+
url: https://open-data/
|
123
|
+
auth: ${API_ACCESS_REFRESH_TOKEN}
|
124
|
+
aws_s3_path: my-data/open-data/
|
125
|
+
|
126
|
+
# This Authentication code should implement with your custom hook function.
|
127
|
+
# The template allow you to use environment variable.
|
128
|
+
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
129
|
+
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
130
|
+
```
|
131
|
+
|
132
|
+
## Configuration
|
133
|
+
|
134
|
+
| Environment | Component | Default | Description |
|
135
|
+
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
136
|
+
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application |
|
137
|
+
| `WORKFLOW_CORE_REGISTRY` | Core | src.ddeutil.workflow,tests.utils | List of importable string for the hook stage |
|
138
|
+
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template |
|
139
|
+
| `WORKFLOW_CORE_PATH_CONF` | Core | conf | The config path that keep all template `.yaml` files |
|
140
|
+
| `WORKFLOW_CORE_TIMEZONE` | Core | Asia/Bangkok | A Timezone string value that will pass to `ZoneInfo` object |
|
141
|
+
| `WORKFLOW_CORE_STAGE_DEFAULT_ID` | Core | true | A flag that enable default stage ID that use for catch an execution output |
|
142
|
+
| `WORKFLOW_CORE_STAGE_RAISE_ERROR` | Core | true | A flag that all stage raise StageException from stage execution |
|
143
|
+
| `WORKFLOW_CORE_MAX_NUM_POKING` | Core | 4 | |
|
144
|
+
| `WORKFLOW_CORE_MAX_JOB_PARALLEL` | Core | 2 | The maximum job number that able to run parallel in workflow executor |
|
145
|
+
| `WORKFLOW_LOG_DEBUG_MODE` | Log | true | A flag that enable logging with debug level mode |
|
146
|
+
| `WORKFLOW_LOG_ENABLE_WRITE` | Log | true | A flag that enable logging object saving log to its destination |
|
147
|
+
| `WORKFLOW_APP_PROCESS_WORKER` | Schedule | 2 | The maximum process worker number that run in scheduler app module |
|
148
|
+
| `WORKFLOW_APP_SCHEDULE_PER_PROCESS` | Schedule | 100 | A schedule per process that run parallel |
|
149
|
+
| `WORKFLOW_APP_STOP_BOUNDARY_DELTA` | Schedule | '{"minutes": 5, "seconds": 20}' | A time delta value that use to stop scheduler app in json string format |
|
150
|
+
|
151
|
+
**API Application**:
|
152
|
+
|
153
|
+
| Environment | Component | Default | Description |
|
154
|
+
|--------------------------------------|-----------|---------|-----------------------------------------------------------------------------------|
|
155
|
+
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
156
|
+
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
157
|
+
|
158
|
+
## Deployment
|
159
|
+
|
160
|
+
This package able to run as a application service for receive manual trigger
|
161
|
+
from the master node via RestAPI or use to be Scheduler background service
|
162
|
+
like crontab job but via Python API.
|
163
|
+
|
164
|
+
### Schedule App
|
165
|
+
|
166
|
+
```shell
|
167
|
+
(venv) $ ddeutil-workflow schedule
|
168
|
+
```
|
169
|
+
|
170
|
+
### API Server
|
171
|
+
|
172
|
+
```shell
|
173
|
+
(venv) $ uvicorn src.ddeutil.workflow.api:app --host 127.0.0.1 --port 80
|
174
|
+
```
|
175
|
+
|
176
|
+
> [!NOTE]
|
177
|
+
> If this package already deploy, it able to use
|
178
|
+
> `uvicorn ddeutil.workflow.api:app --host 127.0.0.1 --port 80 --workers 4`
|
@@ -0,0 +1,21 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=i9hlE_wZTG59lDKZ8plCEpWU3EdeUe141ACbjcsAewc,28
|
2
|
+
ddeutil/workflow/__init__.py,sha256=aEQiEWwTPGhfwpzzdb99xXaHchi5ABWUHl2iLIyT18E,664
|
3
|
+
ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
|
4
|
+
ddeutil/workflow/api.py,sha256=xVP8eGu1nnR8HM0ULTwxs9TV9tsxCOjZ68cAffw2f3o,4802
|
5
|
+
ddeutil/workflow/cli.py,sha256=Ikcq526WeIl-737-v55T0PwAZ2pNiZFxlN0Y-DjhDbQ,3374
|
6
|
+
ddeutil/workflow/cron.py,sha256=uhp3E5pl_tX_H88bsDujcwdhZmOE53csyV-ouPpPdK8,25321
|
7
|
+
ddeutil/workflow/exceptions.py,sha256=Uf1-Tn8rAzj0aiVHSqo4fBqO80W0za7UFZgKv24E-tg,706
|
8
|
+
ddeutil/workflow/job.py,sha256=eESvmIbIyYtiKgVLfILtb0lTz-shMhy4Bi7kHrtZSk0,19663
|
9
|
+
ddeutil/workflow/log.py,sha256=bZyyqf3oNBB8oRf8RI0YvII7wHHoj4wC-nmW_pQjQ1c,6036
|
10
|
+
ddeutil/workflow/on.py,sha256=Sxwnu0vPbIrMR_WWvH3_rOvD0tbiJntcB5378WoV19M,7163
|
11
|
+
ddeutil/workflow/repeat.py,sha256=e3dekPTlMlxCCizfBYsZ8dD8Juy4rtfqDZJU3Iky2oA,5011
|
12
|
+
ddeutil/workflow/route.py,sha256=ABEk-WlVo9XGFc7zCPbckX33URCNH7woQFU1keX_8PQ,6970
|
13
|
+
ddeutil/workflow/scheduler.py,sha256=ISiVoKM0puh3XaXZ9NTi-J-vREGxdEa-lk1jR6a4OXk,41639
|
14
|
+
ddeutil/workflow/stage.py,sha256=nYsKKT5ZKelEsFnDScBwaitXOv_aUEFkubDPQVK5isM,20644
|
15
|
+
ddeutil/workflow/utils.py,sha256=TbqgPkDDYBpqCZ7HV2TU3AH1_Mv-zfrJdwVL-l2SPUo,28559
|
16
|
+
ddeutil_workflow-0.0.11.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
17
|
+
ddeutil_workflow-0.0.11.dist-info/METADATA,sha256=QOsSVDJmVdgrXzmtSYmE5WVI7u69EGlKfsALWjE0by8,9395
|
18
|
+
ddeutil_workflow-0.0.11.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
19
|
+
ddeutil_workflow-0.0.11.dist-info/entry_points.txt,sha256=0BVOgO3LdUdXVZ-CiHHDKxzEk2c8J30jEwHeKn2YCWI,62
|
20
|
+
ddeutil_workflow-0.0.11.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
21
|
+
ddeutil_workflow-0.0.11.dist-info/RECORD,,
|