PyPI - ddeutil-workflow - Versions diffs - 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl - Mend

ddeutil-workflow 0.0.10py3-none-any.whl → 0.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +3 -2
ddeutil/workflow/api.py +84 -16
ddeutil/workflow/cli.py +14 -14
ddeutil/workflow/exceptions.py +6 -6
ddeutil/workflow/job.py +572 -0
ddeutil/workflow/log.py +10 -10
ddeutil/workflow/repeat.py +4 -2
ddeutil/workflow/route.py +165 -36
ddeutil/workflow/scheduler.py +733 -110
ddeutil/workflow/stage.py +12 -12
ddeutil/workflow/utils.py +4 -4
{ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.12.dist-info}/METADATA +66 -70
ddeutil_workflow-0.0.12.dist-info/RECORD +21 -0
{ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.12.dist-info}/WHEEL +1 -1
ddeutil/workflow/pipeline.py +0 -1186
ddeutil_workflow-0.0.10.dist-info/RECORD +0 -21
{ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.12.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.12.dist-info}/entry_points.txt +0 -0
{ddeutil_workflow-0.0.10.dist-info → ddeutil_workflow-0.0.12.dist-info}/top_level.txt +0 -0

ddeutil/workflow/job.py ADDED Viewed

@@ -0,0 +1,572 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
+# Licensed under the MIT License. See LICENSE in the project root for
+# license information.
+# ------------------------------------------------------------------------------
+from __future__ import annotations
+import copy
+import time
+from concurrent.futures import (
+    FIRST_EXCEPTION,
+    Future,
+    ThreadPoolExecutor,
+    as_completed,
+    wait,
+)
+from pickle import PickleError
+from textwrap import dedent
+from threading import Event
+from typing import Optional
+from pydantic import BaseModel, Field
+from pydantic.functional_validators import field_validator, model_validator
+from typing_extensions import Self
+from .__types import (
+    DictData,
+    DictStr,
+    Matrix,
+    MatrixExclude,
+    MatrixInclude,
+    TupleStr,
+)
+from .exceptions import (
+    JobException,
+    StageException,
+    UtilException,
+)
+from .log import get_logger
+from .stage import Stage
+from .utils import (
+    Result,
+    cross_product,
+    dash2underscore,
+    filter_func,
+    gen_id,
+    has_template,
+)
+logger = get_logger("ddeutil.workflow")
+__all__: TupleStr = (
+    "Strategy",
+    "Job",
+)
+class Strategy(BaseModel):
+    """Strategy Model that will combine a matrix together for running the
+    special job.
+    Data Validate:
+        >>> strategy = {
+        ...     'max-parallel': 1,
+        ...     'fail-fast': False,
+        ...     'matrix': {
+        ...         'first': [1, 2, 3],
+        ...         'second': ['foo', 'bar'],
+        ...     },
+        ...     'include': [{'first': 4, 'second': 'foo'}],
+        ...     'exclude': [{'first': 1, 'second': 'bar'}],
+        ... }
+    """
+    fail_fast: bool = Field(
+        default=False,
+        serialization_alias="fail-fast",
+    )
+    max_parallel: int = Field(
+        default=1,
+        gt=0,
+        description=(
+            "The maximum number of executor thread pool that want to run "
+            "parallel"
+        ),
+        serialization_alias="max-parallel",
+    )
+    matrix: Matrix = Field(
+        default_factory=dict,
+        description=(
+            "A matrix values that want to cross product to possible strategies."
+        ),
+    )
+    include: MatrixInclude = Field(
+        default_factory=list,
+        description="A list of additional matrix that want to adds-in.",
+    )
+    exclude: MatrixExclude = Field(
+        default_factory=list,
+        description="A list of exclude matrix that want to filter-out.",
+    )
+    @model_validator(mode="before")
+    def __prepare_keys(cls, values: DictData) -> DictData:
+        """Rename key that use dash to underscore because Python does not
+        support this character exist in any variable name.
+        """
+        dash2underscore("max-parallel", values)
+        dash2underscore("fail-fast", values)
+        return values
+    def is_set(self) -> bool:
+        """Return True if this strategy was set from yaml template."""
+        return len(self.matrix) > 0
+    def make(self) -> list[DictStr]:
+        """Return List of product of matrix values that already filter with
+        exclude and add include.
+        :rtype: list[DictStr]
+        """
+        # NOTE: If it does not set matrix, it will return list of an empty dict.
+        if not (mt := self.matrix):
+            return [{}]
+        final: list[DictStr] = []
+        for r in cross_product(matrix=mt):
+            if any(
+                all(r[k] == v for k, v in exclude.items())
+                for exclude in self.exclude
+            ):
+                continue
+            final.append(r)
+        # NOTE: If it is empty matrix and include, it will return list of an
+        #   empty dict.
+        if not final and not self.include:
+            return [{}]
+        # NOTE: Add include to generated matrix with exclude list.
+        add: list[DictStr] = []
+        for include in self.include:
+            # VALIDATE:
+            #   Validate any key in include list should be a subset of some one
+            #   in matrix.
+            if all(not (set(include.keys()) <= set(m.keys())) for m in final):
+                raise ValueError("Include should have the keys equal to matrix")
+            # VALIDATE:
+            #   Validate value of include does not duplicate with generated
+            #   matrix.
+            if any(
+                all(include.get(k) == v for k, v in m.items())
+                for m in [*final, *add]
+            ):
+                continue
+            add.append(include)
+        final.extend(add)
+        return final
+class Job(BaseModel):
+    """Job Model (group of stages).
+        This job model allow you to use for-loop that call matrix strategy. If
+    you pass matrix mapping and it able to generate, you will see it running
+    with loop of matrix values.
+    Data Validate:
+        >>> job = {
+        ...     "runs-on": None,
+        ...     "strategy": {
+        ...         "max-parallel": 1,
+        ...         "matrix": {
+        ...             "first": [1, 2, 3],
+        ...             "second": ['foo', 'bar'],
+        ...         },
+        ...     },
+        ...     "needs": [],
+        ...     "stages": [
+        ...         {
+        ...             "name": "Some stage",
+        ...             "run": "print('Hello World')",
+        ...         },
+        ...         ...
+        ...     ],
+        ... }
+    """
+    id: Optional[str] = Field(
+        default=None,
+        description=(
+            "A job ID, this value will add from workflow after validation "
+            "process."
+        ),
+    )
+    desc: Optional[str] = Field(
+        default=None,
+        description="A job description that can be string of markdown content.",
+    )
+    runs_on: Optional[str] = Field(
+        default=None,
+        description="A target executor node for this job use to execution.",
+        serialization_alias="runs-on",
+    )
+    stages: list[Stage] = Field(
+        default_factory=list,
+        description="A list of Stage of this job.",
+    )
+    needs: list[str] = Field(
+        default_factory=list,
+        description="A list of the job ID that want to run before this job.",
+    )
+    strategy: Strategy = Field(
+        default_factory=Strategy,
+        description="A strategy matrix that want to generate.",
+    )
+    run_id: Optional[str] = Field(
+        default=None,
+        description="A running job ID.",
+        repr=False,
+        exclude=True,
+    )
+    @model_validator(mode="before")
+    def __prepare_keys(cls, values: DictData) -> DictData:
+        """Rename key that use dash to underscore because Python does not
+        support this character exist in any variable name.
+        """
+        dash2underscore("runs-on", values)
+        return values
+    @field_validator("desc", mode="after")
+    def ___prepare_desc(cls, value: str) -> str:
+        """Prepare description string that was created on a template."""
+        return dedent(value)
+    @model_validator(mode="after")
+    def __prepare_running_id(self):
+        if self.run_id is None:
+            self.run_id = gen_id(self.id or "", unique=True)
+        # VALIDATE: Validate job id should not dynamic with params template.
+        if has_template(self.id):
+            raise ValueError("Job ID should not has any template.")
+        return self
+    def get_running_id(self, run_id: str) -> Self:
+        """Return Job model object that changing job running ID with an
+        input running ID.
+        :param run_id: A replace job running ID.
+        :rtype: Self
+        """
+        return self.model_copy(update={"run_id": run_id})
+    def stage(self, stage_id: str) -> Stage:
+        """Return stage model that match with an input stage ID."""
+        for stage in self.stages:
+            if stage_id == (stage.id or ""):
+                return stage
+        raise ValueError(f"Stage ID {stage_id} does not exists")
+    def set_outputs(self, output: DictData) -> DictData:
+        """Setting output of job execution"""
+        if len(output) > 1 and self.strategy.is_set():
+            return {"strategies": output}
+        return output[next(iter(output))]
+    def execute_strategy(
+        self,
+        strategy: DictData,
+        params: DictData,
+        *,
+        event: Event | None = None,
+    ) -> Result:
+        """Job Strategy execution with passing dynamic parameters from the
+        workflow execution to strategy matrix.
+            This execution is the minimum level execution of job model.
+        :param strategy: A metrix strategy value.
+        :param params: A dynamic parameters.
+        :param event: An manger event that pass to the PoolThreadExecutor.
+        :rtype: Result
+        :raise JobException: If it has any error from StageException or
+            UtilException.
+        """
+        # NOTE: Force stop this execution if event was set from main execution.
+        if event and event.is_set():
+            return Result(
+                status=1,
+                context={
+                    gen_id(strategy): {
+                        "matrix": strategy,
+                        "stages": {},
+                        "error_message": {
+                            "message": "Process Event stopped before execution"
+                        },
+                    },
+                },
+            )
+        # NOTE: Create strategy execution context and update a matrix and copied
+        #   of params. So, the context value will have structure like;
+        #   ---
+        #   {
+        #       "params": { ... },      <== Current input params
+        #       "jobs": { ... },        <== Current input params
+        #       "matrix": { ... }       <== Current strategy value
+        #   }
+        #
+        context: DictData = params
+        context.update({"matrix": strategy})
+        # IMPORTANT: The stage execution only run sequentially one-by-one.
+        for stage in self.stages:
+            # IMPORTANT: Change any stage running IDs to this job running ID.
+            stage: Stage = stage.get_running_id(self.run_id)
+            _st_name: str = stage.id or stage.name
+            if stage.is_skipped(params=context):
+                logger.info(
+                    f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
+                )
+                continue
+            logger.info(
+                f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
+            )
+            # NOTE: Logging a matrix that pass on this stage execution.
+            if strategy:
+                logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
+            # NOTE:
+            #       I do not use below syntax because `params` dict be the
+            #   reference memory pointer and it was changed when I action
+            #   anything like update or re-construct this.
+            #
+            #       ... params |= stage.execute(params=params)
+            #
+            #   This step will add the stage result to ``stages`` key in
+            #   that stage id. It will have structure like;
+            #   ---
+            #   {
+            #       "params": { ... },
+            #       "jobs": { ... },
+            #       "matrix": { ... },
+            #       "stages": { { "stage-id-1": ... }, ... }
+            #   }
+            #
+            if event and event.is_set():
+                return Result(
+                    status=1,
+                    context={
+                        gen_id(strategy): {
+                            "matrix": strategy,
+                            # NOTE: If job strategy executor use multithreading,
+                            #   it will not filter function object from context.
+                            # ---
+                            # "stages": filter_func(context.pop("stages", {})),
+                            "stages": context.pop("stages", {}),
+                            "error_message": {
+                                "message": (
+                                    "Process Event stopped before execution"
+                                ),
+                            },
+                        },
+                    },
+                )
+            try:
+                rs: Result = stage.execute(params=context)
+                stage.set_outputs(rs.context, to=context)
+            except (StageException, UtilException) as err:
+                logger.error(
+                    f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
+                )
+                raise JobException(
+                    f"Get stage execution error: {err.__class__.__name__}: "
+                    f"{err}"
+                ) from None
+            # NOTE: Remove new stage object that was created from
+            #   ``get_running_id`` method.
+            del stage
+        return Result(
+            status=0,
+            context={
+                gen_id(strategy): {
+                    "matrix": strategy,
+                    # NOTE: (WF001) filter own created function from stages
+                    #   value, because it does not dump with pickle when you
+                    #   execute with multiprocess.
+                    #
+                    "stages": filter_func(context.pop("stages", {})),
+                },
+            },
+        )
+    def execute(self, params: DictData | None = None) -> Result:
+        """Job execution with passing dynamic parameters from the workflow
+        execution. It will generate matrix values at the first step and for-loop
+        any metrix to all stages dependency.
+        :param params: An input parameters that use on job execution.
+        :rtype: Result
+        """
+        context: DictData = {}
+        # NOTE: Normal Job execution.
+        if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
+            for strategy in self.strategy.make():
+                rs: Result = self.execute_strategy(
+                    strategy, params=copy.deepcopy(params)
+                )
+                context.update(rs.context)
+            return Result(
+                status=0,
+                context=context,
+            )
+        # # WARNING: (WF001) I got error that raise when use
+        # #  ``ProcessPoolExecutor``;
+        # #   ---
+        # #   _pickle.PicklingError: Can't pickle
+        # #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
+        # #       on ddeutil.workflow.stage failed
+        # #
+        # # from multiprocessing import Event, Manager
+        # with Manager() as manager:
+        #     event: Event = manager.Event()
+        #
+        #     # NOTE: Start process pool executor for running strategy executor
+        #     #   in parallel mode.
+        #     with ProcessPoolExecutor(
+        #         max_workers=self.strategy.max_parallel
+        #     ) as executor:
+        #         futures: list[Future] = [
+        #             executor.submit(
+        #                 self.execute_strategy,
+        #                 strategy,
+        #                 params=copy.deepcopy(params),
+        #                 event=event,
+        #             )
+        #             for strategy in self.strategy.make()
+        #         ]
+        #         if self.strategy.fail_fast:
+        #             rs = self.__catch_fail_fast(event, futures)
+        #         else:
+        #             rs = self.__catch_all_completed(futures)
+        # NOTE: Create event for cancel executor stop running.
+        event: Event = Event()
+        with ThreadPoolExecutor(
+            max_workers=self.strategy.max_parallel
+        ) as executor:
+            futures: list[Future] = [
+                executor.submit(
+                    self.execute_strategy,
+                    strategy,
+                    params=copy.deepcopy(params),
+                    event=event,
+                )
+                for strategy in self.strategy.make()
+            ]
+            # NOTE: Dynamic catching futures object with fail-fast flag.
+            if self.strategy.fail_fast:
+                rs: Result = self.__catch_fail_fast(event, futures)
+            else:
+                rs: Result = self.__catch_all_completed(futures)
+        return Result(
+            status=0,
+            context=rs.context,
+        )
+    def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
+        """Job parallel pool futures catching with fail-fast mode. That will
+        stop all not done futures if it receive the first exception from all
+        running futures.
+        :param event:
+        :param futures: A list of futures.
+        :rtype: Result
+        """
+        context: DictData = {}
+        # NOTE: Get results from a collection of tasks with a
+        #   timeout that has the first exception.
+        done, not_done = wait(
+            futures, timeout=1800, return_when=FIRST_EXCEPTION
+        )
+        nd: str = (
+            f", the strategies do not run is {not_done}" if not_done else ""
+        )
+        logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
+        if len(done) != len(futures):
+            # NOTE: Stop all running tasks
+            event.set()
+            # NOTE: Cancel any scheduled tasks
+            for future in futures:
+                future.cancel()
+        status: int = 0
+        for future in done:
+            if future.exception():
+                status = 1
+                logger.error(
+                    f"({self.run_id}) [JOB]: One stage failed with: "
+                    f"{future.exception()}, shutting down this future."
+                )
+            elif future.cancelled():
+                continue
+            else:
+                rs: Result = future.result(timeout=60)
+                context.update(rs.context)
+        return Result(status=status, context=context)
+    def __catch_all_completed(self, futures: list[Future]) -> Result:
+        """Job parallel pool futures catching with all-completed mode.
+        :param futures: A list of futures.
+        :rtype: Result
+        """
+        context: DictData = {}
+        status: int = 0
+        for future in as_completed(futures):
+            try:
+                rs: Result = future.result(timeout=60)
+                context.update(rs.context)
+            except PickleError as err:
+                # NOTE: (WF001) I do not want to fix this issue because
+                #   it does not make sense and over-engineering with
+                #   this bug fix process.
+                raise JobException(
+                    f"PyStage that create object on locals does use "
+                    f"parallel in strategy execution;\n\t{err}"
+                ) from None
+            except TimeoutError:
+                status = 1
+                logger.warning(
+                    f"({self.run_id}) [JOB]: Task is hanging. Attempting to "
+                    f"kill."
+                )
+                future.cancel()
+                time.sleep(0.1)
+                if not future.cancelled():
+                    logger.warning(
+                        f"({self.run_id}) [JOB]: Failed to cancel the task."
+                    )
+                else:
+                    logger.warning(
+                        f"({self.run_id}) [JOB]: Task canceled successfully."
+                    )
+            except JobException as err:
+                status = 1
+                logger.error(
+                    f"({self.run_id}) [JOB]: Get stage exception with "
+                    f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
+                    f"{err}"
+                )
+        return Result(status=status, context=context)

ddeutil/workflow/log.py CHANGED Viewed

@@ -47,13 +47,13 @@ def get_logger(name: str):
 class BaseLog(BaseModel, ABC):
     """Base Log Pydantic Model abstraction that implement only model fields."""
-    name: str = Field(description="A pipeline name.")
+    name: str = Field(description="A workflow name.")
     on: str = Field(description="A cronjob string of this piepline schedule.")
     release: datetime = Field(description="A release datetime.")
     context: DictData = Field(
         default_factory=dict,
         description=(
-            "A context data that receive from a pipeline execution result.",
+            "A context data that receive from a workflow execution result.",
         ),
     )
     parent_run_id: Optional[str] = Field(default=None)
@@ -77,7 +77,7 @@ class BaseLog(BaseModel, ABC):
 class FileLog(BaseLog):
     """File Log Pydantic Model that use to saving log data from result of
-    pipeline execution. It inherit from BaseLog model that implement the
+    workflow execution. It inherit from BaseLog model that implement the
     ``self.save`` method for file.
     """
@@ -87,7 +87,7 @@ class FileLog(BaseLog):
     @classmethod
     def find_logs(cls, name: str):
-        pointer: Path = config().engine.paths.root / f"./logs/pipeline={name}"
+        pointer: Path = config().engine.paths.root / f"./logs/workflow={name}"
         for file in pointer.glob("./release=*/*.log"):
             with file.open(mode="r", encoding="utf-8") as f:
                 yield json.load(f)
@@ -97,11 +97,11 @@ class FileLog(BaseLog):
         if release is not None:
             pointer: Path = (
                 config().engine.paths.root
-                / f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
+                / f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
             )
             if not pointer.exists():
                 raise FileNotFoundError(
-                    f"Pointer: ./logs/pipeline={name}/"
+                    f"Pointer: ./logs/workflow={name}/"
                     f"release={release:%Y%m%d%H%M%S} does not found."
                 )
             return cls.model_validate(
@@ -119,7 +119,7 @@ class FileLog(BaseLog):
     ) -> bool:
         """Check this log already point in the destination.
-        :param name: A pipeline name.
+        :param name: A workflow name.
         :param release: A release datetime.
         :param queue: A list of queue of datetime that already run in the
             future.
@@ -131,7 +131,7 @@ class FileLog(BaseLog):
         # NOTE: create pointer path that use the same logic of pointer method.
         pointer: Path = (
             config().engine.paths.root
-            / f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
+            / f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
         )
         if not queue:
@@ -145,11 +145,11 @@ class FileLog(BaseLog):
         """
         return (
             config().engine.paths.root
-            / f"./logs/pipeline={self.name}/release={self.release:%Y%m%d%H%M%S}"
+            / f"./logs/workflow={self.name}/release={self.release:%Y%m%d%H%M%S}"
         )
     def save(self, excluded: list[str] | None) -> Self:
-        """Save logging data that receive a context data from a pipeline
+        """Save logging data that receive a context data from a workflow
         execution result.
         :param excluded: An excluded list of key name that want to pass in the

ddeutil/workflow/repeat.py CHANGED Viewed

@@ -20,7 +20,7 @@ from .log import get_logger
 logger = get_logger("ddeutil.workflow")
-def get_cronjob_delta(cron: str):
+def get_cronjob_delta(cron: str) -> float:
     """This function returns the time delta between now and the next cron
     execution time.
     """
@@ -59,6 +59,7 @@ async def run_func(
 def repeat_at(
     *,
     cron: str,
+    delay: float = 0,
     raise_exceptions: bool = False,
     max_repetitions: int = None,
 ):
@@ -67,6 +68,7 @@ def repeat_at(
     :param cron: str
         Cron-style string for periodic execution, eg. '0 0 * * *' every midnight
+    :param delay:
     :param raise_exceptions: bool (default False)
         Whether to raise exceptions or log them
     :param max_repetitions: int (default None)
@@ -89,7 +91,7 @@ def repeat_at(
             async def loop(*args, **kwargs):
                 nonlocal repititions
                 while max_repetitions is None or repititions < max_repetitions:
-                    sleep_time = get_cronjob_delta(cron)
+                    sleep_time = get_cronjob_delta(cron) + delay
                     await asyncio.sleep(sleep_time)
                     await run_func(
                         is_coroutine,

ddeutil-workflow 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

ddeutil-workflow 0.0.10py3-none-any.whl → 0.0.12py3-none-any.whl