PyPI - ddeutil-workflow - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

ddeutil-workflow 0.0.6py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +26 -4
ddeutil/workflow/__types.py +11 -1
ddeutil/workflow/api.py +120 -0
ddeutil/workflow/app.py +45 -0
ddeutil/workflow/exceptions.py +3 -3
ddeutil/workflow/log.py +79 -0
ddeutil/workflow/pipeline.py +516 -120
ddeutil/workflow/repeat.py +134 -0
ddeutil/workflow/route.py +78 -0
ddeutil/workflow/stage.py +209 -86
ddeutil/workflow/utils.py +368 -66
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/METADATA +48 -76
ddeutil_workflow-0.0.8.dist-info/RECORD +20 -0
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/WHEEL +1 -1
ddeutil_workflow-0.0.6.dist-info/RECORD +0 -15
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.8.dist-info}/top_level.txt +0 -0

ddeutil/workflow/repeat.py ADDED Viewed

@@ -0,0 +1,134 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) 2023 Priyanshu Panwar. All rights reserved.
+# Licensed under the MIT License.
+# This code refs from: https://github.com/priyanshu-panwar/fastapi-utilities
+# ------------------------------------------------------------------------------
+import asyncio
+import logging
+from asyncio import ensure_future
+from datetime import datetime
+from functools import wraps
+from croniter import croniter
+from starlette.concurrency import run_in_threadpool
+def get_delta(cron: str):
+    """This function returns the time delta between now and the next cron
+    execution time.
+    """
+    now: datetime = datetime.now()
+    cron = croniter(cron, now)
+    return (cron.get_next(datetime) - now).total_seconds()
+def repeat_at(
+    *,
+    cron: str,
+    logger: logging.Logger = None,
+    raise_exceptions: bool = False,
+    max_repetitions: int = None,
+):
+    """This function returns a decorator that makes a function execute
+    periodically as per the cron expression provided.
+    :param cron: str
+        Cron-style string for periodic execution, eg. '0 0 * * *' every midnight
+    :param logger: logging.Logger (default None)
+        Logger object to log exceptions
+    :param raise_exceptions: bool (default False)
+        Whether to raise exceptions or log them
+    :param max_repetitions: int (default None)
+        Maximum number of times to repeat the function. If None, repeat
+        indefinitely.
+    """
+    def decorator(func):
+        is_coroutine = asyncio.iscoroutinefunction(func)
+        @wraps(func)
+        def wrapper(*_args, **_kwargs):
+            repititions = 0
+            if not croniter.is_valid(cron):
+                raise ValueError("Invalid cron expression")
+            async def loop(*args, **kwargs):
+                nonlocal repititions
+                while max_repetitions is None or repititions < max_repetitions:
+                    try:
+                        sleepTime = get_delta(cron)
+                        await asyncio.sleep(sleepTime)
+                        if is_coroutine:
+                            await func(*args, **kwargs)
+                        else:
+                            await run_in_threadpool(func, *args, **kwargs)
+                    except Exception as e:
+                        if logger is not None:
+                            logger.exception(e)
+                        if raise_exceptions:
+                            raise e
+                    repititions += 1
+            ensure_future(loop(*_args, **_kwargs))
+        return wrapper
+    return decorator
+def repeat_every(
+    *,
+    seconds: float,
+    wait_first: bool = False,
+    logger: logging.Logger = None,
+    raise_exceptions: bool = False,
+    max_repetitions: int = None,
+):
+    """This function returns a decorator that schedules a function to execute
+    periodically after every `seconds` seconds.
+    :param seconds: float
+        The number of seconds to wait before executing the function again.
+    :param wait_first: bool (default False)
+        Whether to wait `seconds` seconds before executing the function for the
+        first time.
+    :param logger: logging.Logger (default None)
+        The logger to use for logging exceptions.
+    :param raise_exceptions: bool (default False)
+        Whether to raise exceptions instead of logging them.
+    :param max_repetitions: int (default None)
+        The maximum number of times to repeat the function. If None, the
+        function will repeat indefinitely.
+    """
+    def decorator(func):
+        is_coroutine = asyncio.iscoroutinefunction(func)
+        @wraps(func)
+        async def wrapper(*_args, **_kwargs):
+            repetitions = 0
+            async def loop(*args, **kwargs):
+                nonlocal repetitions
+                if wait_first:
+                    await asyncio.sleep(seconds)
+                while max_repetitions is None or repetitions < max_repetitions:
+                    try:
+                        if is_coroutine:
+                            await func(*args, **kwargs)
+                        else:
+                            await run_in_threadpool(func, *args, **kwargs)
+                    except Exception as e:
+                        if logger is not None:
+                            logger.exception(e)
+                        if raise_exceptions:
+                            raise e
+                    repetitions += 1
+                    await asyncio.sleep(seconds)
+            ensure_future(loop(*_args, **_kwargs))
+        return wrapper
+    return decorator

ddeutil/workflow/route.py ADDED Viewed

@@ -0,0 +1,78 @@
+from enum import Enum
+from fastapi import APIRouter, Request, status
+from pydantic import BaseModel, ConfigDict, Field
+from .log import get_logger
+logger = get_logger(__name__)
+workflow_route = APIRouter(prefix="/workflow")
+@workflow_route.get("/{name}")
+async def get_pipeline(name: str):
+    return {"message": f"getting pipeline {name}"}
+@workflow_route.get("/{name}/logs")
+async def get_pipeline_log(name: str):
+    return {"message": f"getting pipeline {name} logs"}
+class JobNotFoundError(Exception):
+    pass
+schedule_route = APIRouter(prefix="/schedule", tags=["schedule"])
+class TriggerEnum(str, Enum):
+    interval = "interval"
+    cron = "cron"
+class Job(BaseModel):
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "func": "example.main:pytest_job",
+                "trigger": "interval",
+                "seconds": 3,
+                "id": "pytest_job",
+            },
+        },
+    )
+    func: str = Field()
+    trigger: TriggerEnum = Field(title="Trigger type")
+    seconds: int = Field(title="Interval in seconds")
+    id: str = Field(title="Job ID")
+@schedule_route.post(
+    "/", name="scheduler:add_job", status_code=status.HTTP_201_CREATED
+)
+async def add_job(request: Request, job: Job):
+    job = request.app.scheduler.add_job(**job.dict())
+    return {"job": f"{job.id}"}
+@schedule_route.get("/", name="scheduler:get_jobs", response_model=list)
+async def get_jobs(request: Request):
+    jobs = request.app.scheduler.get_jobs()
+    jobs = [
+        {k: v for k, v in job.__getstate__().items() if k != "trigger"}
+        for job in jobs
+    ]
+    return jobs
+@schedule_route.delete("/{job_id}", name="scheduler:remove_job")
+async def remove_job(request: Request, job_id: str):
+    try:
+        deleted = request.app.scheduler.remove_job(job_id=job_id)
+        logger.debug(f"Job {job_id} deleted: {deleted}")
+        return {"job": f"{job_id}"}
+    except AttributeError as err:
+        raise JobNotFoundError(
+            f"No job by the id of {job_id} was found"
+        ) from err

ddeutil/workflow/stage.py CHANGED Viewed

@@ -3,6 +3,18 @@
 # Licensed under the MIT License. See LICENSE in the project root for
 # license information.
 # ------------------------------------------------------------------------------
+"""Stage Model that use for getting stage data template from Job Model.
+The stage that handle the minimize task that run in some thread (same thread at
+its job owner) that mean it is the lowest executor of a pipeline workflow that
+can tracking logs.
+    The output of stage execution only return 0 status because I do not want to
+handle stage error on this stage model. I think stage model should have a lot of
+usecase and it does not worry when I want to create a new one.
+    Execution --> Ok    --> Result with 0
+              --> Error --> Raise StageException
+"""
 from __future__ import annotations
 import contextlib
@@ -15,6 +27,7 @@ import uuid
 from abc import ABC, abstractmethod
 from collections.abc import Iterator
 from dataclasses import dataclass
+from functools import wraps
 from inspect import Parameter
 from pathlib import Path
 from subprocess import CompletedProcess
@@ -22,6 +35,7 @@ from typing import Callable, Optional, Union
 from ddeutil.core import str2bool
 from pydantic import BaseModel, Field
+from pydantic.functional_validators import model_validator
 from .__types import DictData, DictStr, Re, TupleStr
 from .exceptions import StageException
@@ -36,6 +50,35 @@ from .utils import (
 )
+def handler_result(message: str | None = None):
+    """Decorator function for handler result from the stage execution."""
+    message: str = message or ""
+    def decorator(func):
+        @wraps(func)
+        def wrapped(self: BaseStage, *args, **kwargs):
+            try:
+                rs: DictData = func(self, *args, **kwargs)
+                return Result(status=0, context=rs)
+            except Exception as err:
+                logging.error(
+                    f"({self.run_id}) [STAGE]: {err.__class__.__name__}: {err}"
+                )
+                if isinstance(err, StageException):
+                    raise StageException(
+                        f"{self.__class__.__name__}: {message}\n---\n\t{err}"
+                    ) from err
+                raise StageException(
+                    f"{self.__class__.__name__}: {message}\n---\n\t"
+                    f"{err.__class__.__name__}: {err}"
+                ) from None
+        return wrapped
+    return decorator
 class BaseStage(BaseModel, ABC):
     """Base Stage Model that keep only id and name fields for the stage
     metadata. If you want to implement any custom stage, you can use this class
@@ -56,6 +99,17 @@ class BaseStage(BaseModel, ABC):
         default=None,
         alias="if",
     )
+    run_id: Optional[str] = Field(
+        default=None,
+        description="A running stage ID.",
+        repr=False,
+    )
+    @model_validator(mode="after")
+    def __prepare_running_id(self):
+        if self.run_id is None:
+            self.run_id = gen_id(self.name + (self.id or ""), unique=True)
+        return self
     @abstractmethod
     def execute(self, params: DictData) -> Result:
@@ -74,24 +128,40 @@ class BaseStage(BaseModel, ABC):
         :param params: A context data that want to add output result.
         :rtype: DictData
         """
-        if self.id:
-            _id: str = param2template(self.id, params)
-        elif str2bool(os.getenv("WORKFLOW_CORE_DEFAULT_STAGE_ID", "false")):
-            _id: str = gen_id(param2template(self.name, params))
-        else:
+        if not (
+            self.id
+            or str2bool(os.getenv("WORKFLOW_CORE_DEFAULT_STAGE_ID", "false"))
+        ):
+            logging.debug(
+                f"({self.run_id}) [STAGE]: Output does not set because this "
+                f"stage does not set ID or default stage ID config flag not be "
+                f"True."
+            )
             return params
         # NOTE: Create stages key to receive an output from the stage execution.
         if "stages" not in params:
             params["stages"] = {}
+        # TODO: Validate stage id and name should not dynamic with params
+        #   template. (allow only matrix)
+        if self.id:
+            _id: str = param2template(self.id, params=params)
+        else:
+            _id: str = gen_id(param2template(self.name, params=params))
+        # NOTE: Set the output to that stage generated ID.
         params["stages"][_id] = {"outputs": output}
+        logging.debug(
+            f"({self.run_id}) [STAGE]: Set output complete with stage ID: {_id}"
+        )
         return params
-    def is_skip(self, params: DictData | None = None) -> bool:
+    def is_skipped(self, params: DictData | None = None) -> bool:
         """Return true if condition of this stage do not correct.
         :param params: A parameters that want to pass to condition template.
+        :rtype: bool
         """
         params: DictData = params or {}
         if self.condition is None:
@@ -99,20 +169,24 @@ class BaseStage(BaseModel, ABC):
         _g: DictData = globals() | params
         try:
-            rs: bool = eval(
-                param2template(self.condition, params, repr_flag=True), _g, {}
-            )
+            rs: bool = eval(param2template(self.condition, params), _g, {})
             if not isinstance(rs, bool):
                 raise TypeError("Return type of condition does not be boolean")
             return not rs
         except Exception as err:
-            logging.error(str(err))
+            logging.error(f"({self.run_id}) [STAGE]: {err}")
             raise StageException(str(err)) from err
 class EmptyStage(BaseStage):
     """Empty stage that do nothing (context equal empty stage) and logging the
     name of stage only to stdout.
+    Data Validate:
+        >>> stage = {
+        ...     "name": "Empty stage execution",
+        ...     "echo": "Hello World",
+        ... }
     """
     echo: Optional[str] = Field(
@@ -127,7 +201,10 @@ class EmptyStage(BaseStage):
         :param params: A context data that want to add output result. But this
             stage does not pass any output.
         """
-        logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
+        logging.info(
+            f"({self.run_id}) [STAGE]: Empty-Execute: {self.name!r}: "
+            f"( {param2template(self.echo, params=params) or '...'} )"
+        )
         return Result(status=0, context={})
@@ -178,12 +255,17 @@ class BashStage(BaseStage):
             f.write(bash.replace("\r\n", "\n"))
         make_exec(f"./{f_name}")
+        logging.debug(
+            f"({self.run_id}) [STAGE]: Start create `.sh` file and running a "
+            f"bash statement."
+        )
         yield [f_shebang, f_name]
         Path(f"./{f_name}").unlink()
-    def execute(self, params: DictData) -> Result:
+    @handler_result()
+    def execute(self, params: DictData) -> DictData:
         """Execute the Bash statement with the Python build-in ``subprocess``
         package.
@@ -194,7 +276,7 @@ class BashStage(BaseStage):
         with self.__prepare_bash(
             bash=bash, env=param2template(self.env, params)
         ) as sh:
-            logging.info(f"[STAGE]: Shell-Execute: {sh}")
+            logging.info(f"({self.run_id}) [STAGE]: Shell-Execute: {sh}")
             rs: CompletedProcess = subprocess.run(
                 sh,
                 shell=False,
@@ -207,21 +289,32 @@ class BashStage(BaseStage):
                 if "\\x00" in rs.stderr
                 else rs.stderr
             )
-            logging.error(f"{err}\nRunning Statement:\n---\n{bash}")
-            raise StageException(f"{err}\nRunning Statement:\n---\n{bash}")
-        return Result(
-            status=0,
-            context={
-                "return_code": rs.returncode,
-                "stdout": rs.stdout.rstrip("\n"),
-                "stderr": rs.stderr.rstrip("\n"),
-            },
-        )
+            logging.error(
+                f"({self.run_id}) [STAGE]: {err}\n\n```bash\n{bash}```"
+            )
+            raise StageException(
+                f"{err.__class__.__name__}: {err}\nRunning Statement:"
+                f"\n---\n```bash\n{bash}\n```"
+            )
+        return {
+            "return_code": rs.returncode,
+            "stdout": rs.stdout.rstrip("\n"),
+            "stderr": rs.stderr.rstrip("\n"),
+        }
 class PyStage(BaseStage):
     """Python executor stage that running the Python statement that receive
     globals nad additional variables.
+    Data Validate:
+        >>> stage = {
+        ...     "name": "Python stage execution",
+        ...     "run": 'print("Hello {x}")',
+        ...     "vars": {
+        ...         "x": "BAR",
+        ...     },
+        ... }
     """
     run: str = Field(
@@ -254,7 +347,8 @@ class PyStage(BaseStage):
         params.update({k: _globals[k] for k in params if k in _globals})
         return params
-    def execute(self, params: DictData) -> Result:
+    @handler_result()
+    def execute(self, params: DictData) -> DictData:
         """Execute the Python statement that pass all globals and input params
         to globals argument on ``exec`` build-in function.
@@ -266,18 +360,10 @@ class PyStage(BaseStage):
             globals() | params | param2template(self.vars, params)
         )
         _locals: DictData = {}
-        try:
-            logging.info(f"[STAGE]: Py-Execute: {uuid.uuid4()}")
-            exec(param2template(self.run, params), _globals, _locals)
-        except Exception as err:
-            raise StageException(
-                f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
-                f"{self.run}"
-            ) from None
-        return Result(
-            status=0,
-            context={"locals": _locals, "globals": _globals},
-        )
+        run: str = param2template(self.run, params)
+        logging.info(f"({self.run_id}) [STAGE]: Py-Execute: {uuid.uuid4()}")
+        exec(run, _globals, _locals)
+        return {"locals": _locals, "globals": _globals}
 @dataclass
@@ -289,6 +375,34 @@ class HookSearch:
     tag: str
+def extract_hook(hook: str) -> Callable[[], TagFunc]:
+    """Extract Hook string value to hook function.
+    :param hook: A hook value that able to match with Task regex.
+    :rtype: Callable[[], TagFunc]
+    """
+    if not (found := Re.RE_TASK_FMT.search(hook)):
+        raise ValueError("Task does not match with task format regex.")
+    # NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
+    hook: HookSearch = HookSearch(**found.groupdict())
+    # NOTE: Registry object should implement on this package only.
+    rgt: dict[str, Registry] = make_registry(f"{hook.path}")
+    if hook.func not in rgt:
+        raise NotImplementedError(
+            f"``REGISTER-MODULES.{hook.path}.registries`` does not "
+            f"implement registry: {hook.func!r}."
+        )
+    if hook.tag not in rgt[hook.func]:
+        raise NotImplementedError(
+            f"tag: {hook.tag!r} does not found on registry func: "
+            f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
+        )
+    return rgt[hook.func][hook.tag]
 class HookStage(BaseStage):
     """Hook executor that hook the Python function from registry with tag
     decorator function in ``utils`` module and run it with input arguments.
@@ -309,54 +423,33 @@ class HookStage(BaseStage):
     """
     uses: str = Field(
-        description="A pointer that want to load function from registry",
+        description="A pointer that want to load function from registry.",
+    )
+    args: DictData = Field(
+        description="An arguments that want to pass to the hook function.",
+        alias="with",
     )
-    args: DictData = Field(alias="with")
-    @staticmethod
-    def extract_hook(hook: str) -> Callable[[], TagFunc]:
-        """Extract Hook string value to hook function.
-        :param hook: A hook value that able to match with Task regex.
-        """
-        if not (found := Re.RE_TASK_FMT.search(hook)):
-            raise ValueError("Task does not match with task format regex.")
-        # NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
-        hook: HookSearch = HookSearch(**found.groupdict())
-        # NOTE: Registry object should implement on this package only.
-        rgt: dict[str, Registry] = make_registry(f"{hook.path}")
-        if hook.func not in rgt:
-            raise NotImplementedError(
-                f"``REGISTER-MODULES.{hook.path}.registries`` does not "
-                f"implement registry: {hook.func!r}."
-            )
-        if hook.tag not in rgt[hook.func]:
-            raise NotImplementedError(
-                f"tag: {hook.tag!r} does not found on registry func: "
-                f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
-            )
-        return rgt[hook.func][hook.tag]
-    def execute(self, params: DictData) -> Result:
-        """Execute the Task function that already mark registry.
+    @handler_result()
+    def execute(self, params: DictData) -> DictData:
+        """Execute the Hook function that already in the hook registry.
         :param params: A parameter that want to pass before run any statement.
         :type params: DictData
         :rtype: Result
         """
-        t_func: TagFunc = self.extract_hook(param2template(self.uses, params))()
+        t_func_hook: str = param2template(self.uses, params)
+        t_func: TagFunc = extract_hook(t_func_hook)()
         if not callable(t_func):
-            raise ImportError("Hook caller function does not callable.")
-        args: DictData = param2template(self.args, params)
+            raise ImportError(
+                f"Hook caller {t_func_hook!r} function does not callable."
+            )
         # VALIDATE: check input task caller parameters that exists before
         #   calling.
+        args: DictData = param2template(self.args, params)
         ips = inspect.signature(t_func)
         if any(
-            k not in args
+            (k.removeprefix("_") not in args and k not in args)
             for k in ips.parameters
             if ips.parameters[k].default == Parameter.empty
         ):
@@ -364,32 +457,62 @@ class HookStage(BaseStage):
                 f"Necessary params, ({', '.join(ips.parameters.keys())}), "
                 f"does not set to args"
             )
-        try:
-            logging.info(f"[STAGE]: Hook-Execute: {t_func.name}@{t_func.tag}")
-            rs: DictData = t_func(**param2template(args, params))
-        except Exception as err:
-            raise StageException(f"{err.__class__.__name__}: {err}") from err
-        return Result(status=0, context=rs)
+        # NOTE: add '_' prefix if it want to use.
+        for k in ips.parameters:
+            if k.removeprefix("_") in args:
+                args[k] = args.pop(k.removeprefix("_"))
+        logging.info(
+            f"({self.run_id}) [STAGE]: Hook-Execute: "
+            f"{t_func.name}@{t_func.tag}"
+        )
+        rs: DictData = t_func(**param2template(args, params))
+        # VALIDATE:
+        #   Check the result type from hook function, it should be dict.
+        if not isinstance(rs, dict):
+            raise TypeError(
+                f"Return of hook function: {t_func.name}@{t_func.tag} does "
+                f"not serialize to result model, you should fix it to "
+                f"`dict` type."
+            )
+        return rs
 class TriggerStage(BaseStage):
-    """Trigger Pipeline execution stage that execute another pipeline object."""
+    """Trigger Pipeline execution stage that execute another pipeline object.
+    Data Validate:
+        >>> stage = {
+        ...     "name": "Trigger pipeline stage execution",
+        ...     "trigger": 'pipeline-name-for-loader',
+        ...     "params": {
+        ...         "run-date": "2024-08-01",
+        ...         "source": "src",
+        ...     },
+        ... }
+    """
     trigger: str = Field(description="A trigger pipeline name.")
-    params: DictData = Field(default_factory=dict)
+    params: DictData = Field(
+        default_factory=dict,
+        description="A parameter that want to pass to pipeline execution.",
+    )
-    def execute(self, params: DictData) -> Result:
-        """Trigger execution.
+    @handler_result("Raise from trigger pipeline")
+    def execute(self, params: DictData) -> DictData:
+        """Trigger pipeline execution.
         :param params: A parameter data that want to use in this execution.
         :rtype: Result
         """
         from .pipeline import Pipeline
-        pipe: Pipeline = Pipeline.from_loader(name=self.trigger, externals={})
-        rs = pipe.execute(params=self.params)
-        return Result(status=0, context=rs)
+        # NOTE: Loading pipeline object from trigger name.
+        _trigger: str = param2template(self.trigger, params=params)
+        pipe: Pipeline = Pipeline.from_loader(name=_trigger, externals={})
+        rs: Result = pipe.execute(params=param2template(self.params, params))
+        return rs.context
 # NOTE: Order of parsing stage data

ddeutil-workflow 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

ddeutil-workflow 0.0.6py3-none-any.whl → 0.0.8py3-none-any.whl