PyPI - ddeutil-workflow - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

ddeutil-workflow 0.0.7py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/app.py +4 -0
ddeutil/workflow/exceptions.py +1 -4
ddeutil/workflow/log.py +49 -0
ddeutil/workflow/pipeline.py +327 -167
ddeutil/workflow/stage.py +191 -97
ddeutil/workflow/utils.py +94 -16
{ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.8.dist-info}/METADATA +17 -92
ddeutil_workflow-0.0.8.dist-info/RECORD +20 -0
ddeutil_workflow-0.0.7.dist-info/RECORD +0 -20
{ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.8.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.8.dist-info}/WHEEL +0 -0
{ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.8.dist-info}/top_level.txt +0 -0

ddeutil/workflow/stage.py CHANGED Viewed

@@ -3,6 +3,18 @@
 # Licensed under the MIT License. See LICENSE in the project root for
 # license information.
 # ------------------------------------------------------------------------------
+"""Stage Model that use for getting stage data template from Job Model.
+The stage that handle the minimize task that run in some thread (same thread at
+its job owner) that mean it is the lowest executor of a pipeline workflow that
+can tracking logs.
+    The output of stage execution only return 0 status because I do not want to
+handle stage error on this stage model. I think stage model should have a lot of
+usecase and it does not worry when I want to create a new one.
+    Execution --> Ok    --> Result with 0
+              --> Error --> Raise StageException
+"""
 from __future__ import annotations
 import contextlib
@@ -15,6 +27,7 @@ import uuid
 from abc import ABC, abstractmethod
 from collections.abc import Iterator
 from dataclasses import dataclass
+from functools import wraps
 from inspect import Parameter
 from pathlib import Path
 from subprocess import CompletedProcess
@@ -22,6 +35,7 @@ from typing import Callable, Optional, Union
 from ddeutil.core import str2bool
 from pydantic import BaseModel, Field
+from pydantic.functional_validators import model_validator
 from .__types import DictData, DictStr, Re, TupleStr
 from .exceptions import StageException
@@ -36,6 +50,35 @@ from .utils import (
 )
+def handler_result(message: str | None = None):
+    """Decorator function for handler result from the stage execution."""
+    message: str = message or ""
+    def decorator(func):
+        @wraps(func)
+        def wrapped(self: BaseStage, *args, **kwargs):
+            try:
+                rs: DictData = func(self, *args, **kwargs)
+                return Result(status=0, context=rs)
+            except Exception as err:
+                logging.error(
+                    f"({self.run_id}) [STAGE]: {err.__class__.__name__}: {err}"
+                )
+                if isinstance(err, StageException):
+                    raise StageException(
+                        f"{self.__class__.__name__}: {message}\n---\n\t{err}"
+                    ) from err
+                raise StageException(
+                    f"{self.__class__.__name__}: {message}\n---\n\t"
+                    f"{err.__class__.__name__}: {err}"
+                ) from None
+        return wrapped
+    return decorator
 class BaseStage(BaseModel, ABC):
     """Base Stage Model that keep only id and name fields for the stage
     metadata. If you want to implement any custom stage, you can use this class
@@ -56,6 +99,17 @@ class BaseStage(BaseModel, ABC):
         default=None,
         alias="if",
     )
+    run_id: Optional[str] = Field(
+        default=None,
+        description="A running stage ID.",
+        repr=False,
+    )
+    @model_validator(mode="after")
+    def __prepare_running_id(self):
+        if self.run_id is None:
+            self.run_id = gen_id(self.name + (self.id or ""), unique=True)
+        return self
     @abstractmethod
     def execute(self, params: DictData) -> Result:
@@ -74,24 +128,40 @@ class BaseStage(BaseModel, ABC):
         :param params: A context data that want to add output result.
         :rtype: DictData
         """
-        if self.id:
-            _id: str = param2template(self.id, params)
-        elif str2bool(os.getenv("WORKFLOW_CORE_DEFAULT_STAGE_ID", "false")):
-            _id: str = gen_id(param2template(self.name, params))
-        else:
+        if not (
+            self.id
+            or str2bool(os.getenv("WORKFLOW_CORE_DEFAULT_STAGE_ID", "false"))
+        ):
+            logging.debug(
+                f"({self.run_id}) [STAGE]: Output does not set because this "
+                f"stage does not set ID or default stage ID config flag not be "
+                f"True."
+            )
             return params
         # NOTE: Create stages key to receive an output from the stage execution.
         if "stages" not in params:
             params["stages"] = {}
+        # TODO: Validate stage id and name should not dynamic with params
+        #   template. (allow only matrix)
+        if self.id:
+            _id: str = param2template(self.id, params=params)
+        else:
+            _id: str = gen_id(param2template(self.name, params=params))
+        # NOTE: Set the output to that stage generated ID.
         params["stages"][_id] = {"outputs": output}
+        logging.debug(
+            f"({self.run_id}) [STAGE]: Set output complete with stage ID: {_id}"
+        )
         return params
-    def is_skip(self, params: DictData | None = None) -> bool:
+    def is_skipped(self, params: DictData | None = None) -> bool:
         """Return true if condition of this stage do not correct.
         :param params: A parameters that want to pass to condition template.
+        :rtype: bool
         """
         params: DictData = params or {}
         if self.condition is None:
@@ -104,7 +174,7 @@ class BaseStage(BaseModel, ABC):
                 raise TypeError("Return type of condition does not be boolean")
             return not rs
         except Exception as err:
-            logging.error(str(err))
+            logging.error(f"({self.run_id}) [STAGE]: {err}")
             raise StageException(str(err)) from err
@@ -131,8 +201,10 @@ class EmptyStage(BaseStage):
         :param params: A context data that want to add output result. But this
             stage does not pass any output.
         """
-        stm: str = param2template(self.echo, params=params) or "..."
-        logging.info(f"[STAGE]: Empty-Execute: {self.name!r}: " f"( {stm} )")
+        logging.info(
+            f"({self.run_id}) [STAGE]: Empty-Execute: {self.name!r}: "
+            f"( {param2template(self.echo, params=params) or '...'} )"
+        )
         return Result(status=0, context={})
@@ -183,12 +255,17 @@ class BashStage(BaseStage):
             f.write(bash.replace("\r\n", "\n"))
         make_exec(f"./{f_name}")
+        logging.debug(
+            f"({self.run_id}) [STAGE]: Start create `.sh` file and running a "
+            f"bash statement."
+        )
         yield [f_shebang, f_name]
         Path(f"./{f_name}").unlink()
-    def execute(self, params: DictData) -> Result:
+    @handler_result()
+    def execute(self, params: DictData) -> DictData:
         """Execute the Bash statement with the Python build-in ``subprocess``
         package.
@@ -199,7 +276,7 @@ class BashStage(BaseStage):
         with self.__prepare_bash(
             bash=bash, env=param2template(self.env, params)
         ) as sh:
-            logging.info(f"[STAGE]: Shell-Execute: {sh}")
+            logging.info(f"({self.run_id}) [STAGE]: Shell-Execute: {sh}")
             rs: CompletedProcess = subprocess.run(
                 sh,
                 shell=False,
@@ -212,21 +289,32 @@ class BashStage(BaseStage):
                 if "\\x00" in rs.stderr
                 else rs.stderr
             )
-            logging.error(f"{err}\n\n```bash\n{bash}```")
-            raise StageException(f"{err}\n\n```bash\n{bash}```")
-        return Result(
-            status=0,
-            context={
-                "return_code": rs.returncode,
-                "stdout": rs.stdout.rstrip("\n"),
-                "stderr": rs.stderr.rstrip("\n"),
-            },
-        )
+            logging.error(
+                f"({self.run_id}) [STAGE]: {err}\n\n```bash\n{bash}```"
+            )
+            raise StageException(
+                f"{err.__class__.__name__}: {err}\nRunning Statement:"
+                f"\n---\n```bash\n{bash}\n```"
+            )
+        return {
+            "return_code": rs.returncode,
+            "stdout": rs.stdout.rstrip("\n"),
+            "stderr": rs.stderr.rstrip("\n"),
+        }
 class PyStage(BaseStage):
     """Python executor stage that running the Python statement that receive
     globals nad additional variables.
+    Data Validate:
+        >>> stage = {
+        ...     "name": "Python stage execution",
+        ...     "run": 'print("Hello {x}")',
+        ...     "vars": {
+        ...         "x": "BAR",
+        ...     },
+        ... }
     """
     run: str = Field(
@@ -259,7 +347,8 @@ class PyStage(BaseStage):
         params.update({k: _globals[k] for k in params if k in _globals})
         return params
-    def execute(self, params: DictData) -> Result:
+    @handler_result()
+    def execute(self, params: DictData) -> DictData:
         """Execute the Python statement that pass all globals and input params
         to globals argument on ``exec`` build-in function.
@@ -271,18 +360,10 @@ class PyStage(BaseStage):
             globals() | params | param2template(self.vars, params)
         )
         _locals: DictData = {}
-        try:
-            logging.info(f"[STAGE]: Py-Execute: {uuid.uuid4()}")
-            exec(param2template(self.run, params), _globals, _locals)
-        except Exception as err:
-            raise StageException(
-                f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
-                f"{self.run}"
-            ) from None
-        return Result(
-            status=0,
-            context={"locals": _locals, "globals": _globals},
-        )
+        run: str = param2template(self.run, params)
+        logging.info(f"({self.run_id}) [STAGE]: Py-Execute: {uuid.uuid4()}")
+        exec(run, _globals, _locals)
+        return {"locals": _locals, "globals": _globals}
 @dataclass
@@ -294,6 +375,34 @@ class HookSearch:
     tag: str
+def extract_hook(hook: str) -> Callable[[], TagFunc]:
+    """Extract Hook string value to hook function.
+    :param hook: A hook value that able to match with Task regex.
+    :rtype: Callable[[], TagFunc]
+    """
+    if not (found := Re.RE_TASK_FMT.search(hook)):
+        raise ValueError("Task does not match with task format regex.")
+    # NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
+    hook: HookSearch = HookSearch(**found.groupdict())
+    # NOTE: Registry object should implement on this package only.
+    rgt: dict[str, Registry] = make_registry(f"{hook.path}")
+    if hook.func not in rgt:
+        raise NotImplementedError(
+            f"``REGISTER-MODULES.{hook.path}.registries`` does not "
+            f"implement registry: {hook.func!r}."
+        )
+    if hook.tag not in rgt[hook.func]:
+        raise NotImplementedError(
+            f"tag: {hook.tag!r} does not found on registry func: "
+            f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
+        )
+    return rgt[hook.func][hook.tag]
 class HookStage(BaseStage):
     """Hook executor that hook the Python function from registry with tag
     decorator function in ``utils`` module and run it with input arguments.
@@ -314,48 +423,27 @@ class HookStage(BaseStage):
     """
     uses: str = Field(
-        description="A pointer that want to load function from registry",
+        description="A pointer that want to load function from registry.",
+    )
+    args: DictData = Field(
+        description="An arguments that want to pass to the hook function.",
+        alias="with",
     )
-    args: DictData = Field(alias="with")
-    @staticmethod
-    def extract_hook(hook: str) -> Callable[[], TagFunc]:
-        """Extract Hook string value to hook function.
-        :param hook: A hook value that able to match with Task regex.
-        """
-        if not (found := Re.RE_TASK_FMT.search(hook)):
-            raise ValueError("Task does not match with task format regex.")
-        # NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
-        hook: HookSearch = HookSearch(**found.groupdict())
-        # NOTE: Registry object should implement on this package only.
-        rgt: dict[str, Registry] = make_registry(f"{hook.path}")
-        if hook.func not in rgt:
-            raise NotImplementedError(
-                f"``REGISTER-MODULES.{hook.path}.registries`` does not "
-                f"implement registry: {hook.func!r}."
-            )
-        if hook.tag not in rgt[hook.func]:
-            raise NotImplementedError(
-                f"tag: {hook.tag!r} does not found on registry func: "
-                f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
-            )
-        return rgt[hook.func][hook.tag]
-    def execute(self, params: DictData) -> Result:
+    @handler_result()
+    def execute(self, params: DictData) -> DictData:
         """Execute the Hook function that already in the hook registry.
         :param params: A parameter that want to pass before run any statement.
         :type params: DictData
         :rtype: Result
         """
-        t_func: TagFunc = self.extract_hook(param2template(self.uses, params))()
+        t_func_hook: str = param2template(self.uses, params)
+        t_func: TagFunc = extract_hook(t_func_hook)()
         if not callable(t_func):
-            raise ImportError("Hook caller function does not callable.")
+            raise ImportError(
+                f"Hook caller {t_func_hook!r} function does not callable."
+            )
         # VALIDATE: check input task caller parameters that exists before
         #   calling.
         args: DictData = param2template(self.args, params)
@@ -369,56 +457,62 @@ class HookStage(BaseStage):
                 f"Necessary params, ({', '.join(ips.parameters.keys())}), "
                 f"does not set to args"
             )
         # NOTE: add '_' prefix if it want to use.
         for k in ips.parameters:
             if k.removeprefix("_") in args:
                 args[k] = args.pop(k.removeprefix("_"))
-        try:
-            logging.info(f"[STAGE]: Hook-Execute: {t_func.name}@{t_func.tag}")
-            rs: DictData = t_func(**param2template(args, params))
-        except Exception as err:
-            raise StageException(f"{err.__class__.__name__}: {err}") from err
+        logging.info(
+            f"({self.run_id}) [STAGE]: Hook-Execute: "
+            f"{t_func.name}@{t_func.tag}"
+        )
+        rs: DictData = t_func(**param2template(args, params))
-        # VALIDATE: Check the result type from hook function, it should be dict.
+        # VALIDATE:
+        #   Check the result type from hook function, it should be dict.
         if not isinstance(rs, dict):
-            raise StageException(
-                f"Return of hook function: {t_func.name}@{t_func.tag} does not "
-                f"serialize to result model, you should fix it to `dict` type."
+            raise TypeError(
+                f"Return of hook function: {t_func.name}@{t_func.tag} does "
+                f"not serialize to result model, you should fix it to "
+                f"`dict` type."
             )
-        return Result(status=0, context=rs)
+        return rs
 class TriggerStage(BaseStage):
-    """Trigger Pipeline execution stage that execute another pipeline object."""
+    """Trigger Pipeline execution stage that execute another pipeline object.
+    Data Validate:
+        >>> stage = {
+        ...     "name": "Trigger pipeline stage execution",
+        ...     "trigger": 'pipeline-name-for-loader',
+        ...     "params": {
+        ...         "run-date": "2024-08-01",
+        ...         "source": "src",
+        ...     },
+        ... }
+    """
     trigger: str = Field(description="A trigger pipeline name.")
-    params: DictData = Field(default_factory=dict)
+    params: DictData = Field(
+        default_factory=dict,
+        description="A parameter that want to pass to pipeline execution.",
+    )
-    def execute(self, params: DictData) -> Result:
-        """Trigger execution.
+    @handler_result("Raise from trigger pipeline")
+    def execute(self, params: DictData) -> DictData:
+        """Trigger pipeline execution.
         :param params: A parameter data that want to use in this execution.
         :rtype: Result
         """
-        from .exceptions import PipelineException
         from .pipeline import Pipeline
-        try:
-            # NOTE: Loading pipeline object from trigger name.
-            pipe: Pipeline = Pipeline.from_loader(
-                name=self.trigger, externals={}
-            )
-            rs: Result = pipe.execute(
-                params=param2template(self.params, params)
-            )
-        except PipelineException as err:
-            _alias_stage: str = self.id or self.name
-            raise StageException(
-                f"Trigger Stage: {_alias_stage} get trigger pipeline exception."
-            ) from err
-        return rs
+        # NOTE: Loading pipeline object from trigger name.
+        _trigger: str = param2template(self.trigger, params=params)
+        pipe: Pipeline = Pipeline.from_loader(name=_trigger, externals={})
+        rs: Result = pipe.execute(params=param2template(self.params, params))
+        return rs.context
 # NOTE: Order of parsing stage data

ddeutil/workflow/utils.py CHANGED Viewed

@@ -12,17 +12,17 @@ import stat
 from abc import ABC, abstractmethod
 from ast import Call, Constant, Expr, Module, Name, parse
 from collections.abc import Iterator
-from dataclasses import dataclass, field
 from datetime import date, datetime
 from functools import wraps
 from hashlib import md5
 from importlib import import_module
+from inspect import isfunction
 from itertools import product
 from pathlib import Path
 from typing import Any, Callable, Literal, Optional, Protocol, Union
 from zoneinfo import ZoneInfo
-from ddeutil.core import getdot, hasdot, import_string, lazy
+from ddeutil.core import getdot, hasdot, hash_str, import_string, lazy, str2bool
 from ddeutil.io import PathData, search_env_replace
 from ddeutil.io.models.lineage import dt_now
 from pydantic import BaseModel, ConfigDict, Field
@@ -47,10 +47,10 @@ class Engine(BaseModel):
     paths: PathData = Field(default_factory=PathData)
     registry: list[str] = Field(
-        default_factory=lambda: ["ddeutil.workflow"],
+        default_factory=lambda: ["ddeutil.workflow"],  # pragma: no cover
     )
     registry_filter: list[str] = Field(
-        default=lambda: ["ddeutil.workflow.utils"]
+        default_factory=lambda: ["ddeutil.workflow.utils"],  # pragma: no cover
     )
     @model_validator(mode="before")
@@ -89,7 +89,15 @@ class ConfParams(BaseModel):
 def config() -> ConfParams:
-    """Load Config data from ``workflows-conf.yaml`` file."""
+    """Load Config data from ``workflows-conf.yaml`` file.
+    Configuration Docs:
+    ---
+    :var engine.registry:
+    :var engine.registry_filter:
+    :var paths.root:
+    :var paths.conf:
+    """
     root_path: str = os.getenv("WORKFLOW_ROOT_PATH", ".")
     regis: list[str] = ["ddeutil.workflow"]
@@ -119,19 +127,31 @@ def config() -> ConfParams:
     )
-def gen_id(value: Any, *, sensitive: bool = True, unique: bool = False) -> str:
+def gen_id(
+    value: Any,
+    *,
+    sensitive: bool = True,
+    unique: bool = False,
+) -> str:
     """Generate running ID for able to tracking. This generate process use `md5`
-    function.
-    :param value:
-    :param sensitive:
-    :param unique:
+    algorithm function if ``WORKFLOW_CORE_PIPELINE_ID_SIMPLE`` set to false.
+    But it will cut this hashing value length to 10 it the setting value set to
+    true.
+    :param value: A value that want to add to prefix before hashing with md5.
+    :param sensitive: A flag that convert the value to lower case before hashing
+    :param unique: A flag that add timestamp at microsecond level to value
+        before hashing.
     :rtype: str
     """
     if not isinstance(value, str):
         value: str = str(value)
     tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
+    if str2bool(os.getenv("WORKFLOW_CORE_PIPELINE_ID_SIMPLE", "true")):
+        return hash_str(f"{(value if sensitive else value.lower())}", n=10) + (
+            f"{datetime.now(tz=tz):%Y%m%d%H%M%S%f}" if unique else ""
+        )
     return md5(
         (
             f"{(value if sensitive else value.lower())}"
@@ -328,9 +348,42 @@ Param = Union[
 ]
-@dataclass
-class Result:
-    """Result Dataclass object for passing parameter and receiving output from
+class Context(BaseModel):
+    """Context Pydantic Model"""
+    params: dict = Field(default_factory=dict)
+    jobs: dict = Field(default_factory=dict)
+    error: dict = Field(default_factory=dict)
+class Result(BaseModel):
+    """Result Pydantic Model for passing parameter and receiving output from
+    the pipeline execution.
+    """
+    # TODO: Add running ID to this result dataclass.
+    # ---
+    # parent_run_id: str
+    # run_id: str
+    #
+    status: int = Field(default=2)
+    context: DictData = Field(default_factory=dict)
+    def receive(self, result: Result) -> Result:
+        self.__dict__["status"] = result.status
+        self.__dict__["context"].update(result.context)
+        return self
+    def receive_jobs(self, result: Result) -> Result:
+        self.__dict__["status"] = result.status
+        if "jobs" not in self.__dict__["context"]:
+            self.__dict__["context"]["jobs"] = {}
+        self.__dict__["context"]["jobs"].update(result.context)
+        return self
+class ReResult(BaseModel):
+    """Result Pydantic Model for passing parameter and receiving output from
     the pipeline execution.
     """
@@ -339,8 +392,14 @@ class Result:
     # parent_run_id: str
     # run_id: str
     #
-    status: int = field(default=2)
-    context: DictData = field(default_factory=dict)
+    status: int = Field(default=2)
+    context: Context = Field(default_factory=Context)
+    def receive(self, result: ReResult) -> ReResult:
+        self.__dict__["status"] = result.status
+        self.__dict__["context"].__dict__["jobs"].update(result.context.jobs)
+        self.__dict__["context"].__dict__["error"].update(result.context.error)
+        return self
 def make_exec(path: str | Path):
@@ -580,6 +639,25 @@ def param2template(
     return str2template(value, params, filters=filters)
+def filter_func(value: Any):
+    """Filter own created function out of any value with replace it to its
+    function name. If it is built-in function, it does not have any changing.
+    """
+    if isinstance(value, dict):
+        return {k: filter_func(value[k]) for k in value}
+    elif isinstance(value, (list, tuple, set)):
+        return type(value)([filter_func(i) for i in value])
+    if isfunction(value):
+        # NOTE: If it want to improve to get this function, it able to save to
+        #   some global memory storage.
+        #   ---
+        #   >>> GLOBAL_DICT[value.__name__] = value
+        #
+        return value.__name__
+    return value
 def dash2underscore(
     key: str,
     values: DictData,

ddeutil-workflow 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

ddeutil-workflow 0.0.7py3-none-any.whl → 0.0.8py3-none-any.whl