PyPI - ddeutil-workflow - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

ddeutil-workflow 0.0.6py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +26 -4
ddeutil/workflow/__types.py +11 -1
ddeutil/workflow/api.py +120 -0
ddeutil/workflow/app.py +41 -0
ddeutil/workflow/exceptions.py +3 -0
ddeutil/workflow/log.py +30 -0
ddeutil/workflow/pipeline.py +341 -105
ddeutil/workflow/repeat.py +134 -0
ddeutil/workflow/route.py +78 -0
ddeutil/workflow/stage.py +41 -12
ddeutil/workflow/utils.py +280 -56
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.7.dist-info}/METADATA +61 -14
ddeutil_workflow-0.0.7.dist-info/RECORD +20 -0
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.7.dist-info}/WHEEL +1 -1
ddeutil_workflow-0.0.6.dist-info/RECORD +0 -15
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.7.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.6.dist-info → ddeutil_workflow-0.0.7.dist-info}/top_level.txt +0 -0

ddeutil/workflow/pipeline.py CHANGED Viewed

@@ -7,20 +7,41 @@ from __future__ import annotations
 import copy
 import logging
+import os
 import time
+from concurrent.futures import (
+    FIRST_EXCEPTION,
+    Future,
+    ProcessPoolExecutor,
+    ThreadPoolExecutor,
+    as_completed,
+    wait,
+)
+from datetime import datetime
+from multiprocessing import Event, Manager
+from pickle import PickleError
 from queue import Queue
 from typing import Optional
+from zoneinfo import ZoneInfo
 from pydantic import BaseModel, Field
 from pydantic.functional_validators import model_validator
 from typing_extensions import Self
 from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
-from .exceptions import JobException, PipelineException
+from .exceptions import JobException, PipelineException, StageException
 from .loader import Loader
 from .on import On
+from .scheduler import CronRunner
 from .stage import Stage
-from .utils import Param, Result, cross_product, dash2underscore, gen_id
+from .utils import (
+    Param,
+    Result,
+    cross_product,
+    dash2underscore,
+    gen_id,
+    get_diff_sec,
+)
 class Strategy(BaseModel):
@@ -29,6 +50,8 @@ class Strategy(BaseModel):
     Data Validate:
         >>> strategy = {
+        ...     'max-parallel': 1,
+        ...     'fail-fast': False,
         ...     'matrix': {
         ...         'first': [1, 2, 3],
         ...         'second': ['foo', 'bar']
@@ -39,7 +62,7 @@ class Strategy(BaseModel):
     """
     fail_fast: bool = Field(default=False)
-    max_parallel: int = Field(default=-1)
+    max_parallel: int = Field(default=1, gt=0)
     matrix: Matrix = Field(default_factory=dict)
     include: MatrixInclude = Field(
         default_factory=list,
@@ -164,11 +187,47 @@ class Job(BaseModel):
         return output[next(iter(output))]
-    def strategy_execute(self, strategy: DictData, params: DictData) -> Result:
-        context: DictData = {}
-        context.update(params)
+    def strategy_execute(
+        self,
+        strategy: DictData,
+        params: DictData,
+        *,
+        event: Event | None = None,
+    ) -> Result:
+        """Strategy execution with passing dynamic parameters from the pipeline
+        stage execution.
+        :param strategy:
+        :param params:
+        :param event: An manger event that pass to the PoolThreadExecutor.
+        :rtype: Result
+        """
+        _stop_rs: Result = Result(
+            status=1,
+            context={
+                gen_id(strategy): {
+                    "matrix": strategy,
+                    "stages": {},
+                    "error": "Event stopped",
+                },
+            },
+        )
+        if event and event.is_set():
+            return _stop_rs
+        # NOTE: Create strategy execution context and update a matrix and copied
+        #   of params. So, the context value will have structure like;
+        #   ---
+        #   {
+        #       "params": { ... },      <== Current input params
+        #       "jobs": { ... },        <== Current input params
+        #       "matrix": { ... }       <== Current strategy value
+        #   }
+        #
+        context: DictData = params
         context.update({"matrix": strategy})
+        # IMPORTANT: The stage execution only run sequentially one-by-one.
         for stage in self.stages:
             _st_name: str = stage.id or stage.name
@@ -177,6 +236,29 @@ class Job(BaseModel):
                 continue
             logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
+            # NOTE: Logging a matrix that pass on this stage execution.
+            if strategy:
+                logging.info(f"[...]: Matrix: {strategy}")
+            # NOTE:
+            #       I do not use below syntax because `params` dict be the
+            #   reference memory pointer and it was changed when I action
+            #   anything like update or re-construct this.
+            #
+            #       ... params |= stage.execute(params=params)
+            #
+            #   This step will add the stage result to ``stages`` key in
+            #   that stage id. It will have structure like;
+            #   ---
+            #   {
+            #       "params": { ... },
+            #       "jobs": { ... },
+            #       "matrix": { ... },
+            #       "stages": { { "stage-id-1": ... }, ... }
+            #   }
+            #
+            if event and event.is_set():
+                return _stop_rs
             rs: Result = stage.execute(params=context)
             if rs.status == 0:
                 stage.set_outputs(rs.context, params=context)
@@ -185,6 +267,8 @@ class Job(BaseModel):
                     f"Getting status does not equal zero on stage: "
                     f"{stage.name}."
                 )
+        # TODO: Filter and warning if it pass any objects to context between
+        #   strategy job executor like function, etc.
         return Result(
             status=0,
             context={
@@ -204,71 +288,104 @@ class Job(BaseModel):
         :rtype: Result
         """
         strategy_context: DictData = {}
-        for strategy in self.strategy.make():
+        rs = Result(context=strategy_context)
-            # NOTE: Create strategy context and update matrix and params to this
-            #   context. So, the context will have structure like;
-            #   ---
-            #   {
-            #       "params": { ... },      <== Current input params
-            #       "jobs": { ... },
-            #       "matrix": { ... }       <== Current strategy value
-            #   }
-            #
-            context: DictData = {}
-            context.update(params)
-            context.update({"matrix": strategy})
+        if self.strategy.max_parallel == 1:
+            for strategy in self.strategy.make():
+                rs: Result = self.strategy_execute(
+                    strategy, params=copy.deepcopy(params)
+                )
+                strategy_context.update(rs.context)
+            return rs
-            # TODO: we should add option for ``wait_as_complete`` for release
-            #   a stage execution to run on background (multi-thread).
-            #   ---
-            #   >>> from concurrency
-            #
-            # IMPORTANT: The stage execution only run sequentially one-by-one.
-            for stage in self.stages:
-                _st_name: str = stage.id or stage.name
-                if stage.is_skip(params=context):
-                    logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
-                    continue
-                logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
-                # NOTE: Logging a matrix that pass on this stage execution.
-                if strategy:
-                    logging.info(f"[...]: Matrix: {strategy}")
-                # NOTE:
-                #       I do not use below syntax because `params` dict be the
-                #   reference memory pointer and it was changed when I action
-                #   anything like update or re-construct this.
-                #
-                #       ... params |= stage.execute(params=params)
-                #
-                #   This step will add the stage result to ``stages`` key in
-                #   that stage id. It will have structure like;
-                #   ---
-                #   {
-                #       "params": { ... },
-                #       "jobs": { ... },
-                #       "matrix": { ... },
-                #       "stages": { { "stage-id-1": ... }, ... }
-                #   }
-                #
-                rs: Result = stage.execute(params=context)
-                if rs.status == 0:
-                    stage.set_outputs(rs.context, params=context)
-                else:
-                    raise JobException(
-                        f"Getting status does not equal zero on stage: "
-                        f"{stage.name}."
+        # FIXME: (WF001) I got error that raise when use
+        #  ``ProcessPoolExecutor``;
+        #   ---
+        #   _pickle.PicklingError: Can't pickle
+        #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
+        #       on ddeutil.workflow.stage failed
+        #
+        with Manager() as manager:
+            event: Event = manager.Event()
+            with ProcessPoolExecutor(
+                max_workers=self.strategy.max_parallel
+            ) as pool:
+                pool_result: list[Future] = [
+                    pool.submit(
+                        self.strategy_execute,
+                        st,
+                        params=copy.deepcopy(params),
+                        event=event,
                     )
-            strategy_context[gen_id(strategy)] = {
-                "matrix": strategy,
-                "stages": context.pop("stages", {}),
-            }
-        return Result(status=0, context=strategy_context)
+                    for st in self.strategy.make()
+                ]
+                if self.strategy.fail_fast:
+                    # NOTE: Get results from a collection of tasks with a
+                    #   timeout that has the first exception.
+                    done, not_done = wait(
+                        pool_result, timeout=60, return_when=FIRST_EXCEPTION
+                    )
+                    nd: str = (
+                        f", the strategies do not run is {not_done}"
+                        if not_done
+                        else ""
+                    )
+                    logging.warning(f"[JOB]: Strategy is set Fail Fast{nd}")
+                    # NOTE: Stop all running tasks
+                    event.set()
+                    # NOTE: Cancel any scheduled tasks
+                    for future in pool_result:
+                        future.cancel()
+                    rs.status = 0
+                    for f in done:
+                        if f.exception():
+                            rs.status = 1
+                            logging.error(
+                                f"One task failed with: {f.exception()}, "
+                                f"shutting down"
+                            )
+                        elif f.cancelled():
+                            continue
+                        else:
+                            rs: Result = f.result(timeout=60)
+                            strategy_context.update(rs.context)
+                    rs.context = strategy_context
+                    return rs
+                for pool_rs in as_completed(pool_result):
+                    try:
+                        rs: Result = pool_rs.result(timeout=60)
+                        strategy_context.update(rs.context)
+                    except PickleError as err:
+                        # NOTE: I do not want to fix this issue because it does
+                        #   not make sense and over-engineering with this bug
+                        #   fix process.
+                        raise JobException(
+                            f"PyStage that create object on locals does use "
+                            f"parallel in strategy;\n\t{err}"
+                        ) from None
+                    except TimeoutError:
+                        rs.status = 1
+                        logging.warning("Task is hanging. Attempting to kill.")
+                        pool_rs.cancel()
+                        if not pool_rs.cancelled():
+                            logging.warning("Failed to cancel the task.")
+                        else:
+                            logging.warning("Task canceled successfully.")
+                    except StageException as err:
+                        rs.status = 1
+                        logging.warning(
+                            f"Get stage exception with fail-fast does not set;"
+                            f"\n\t{err}"
+                        )
+        rs.status = 0
+        rs.context = strategy_context
+        return rs
 class Pipeline(BaseModel):
@@ -356,6 +473,18 @@ class Pipeline(BaseModel):
             }
         return values
+    @model_validator(mode="after")
+    def __validate_jobs_need(self):
+        for job in self.jobs:
+            if not_exist := [
+                need for need in self.jobs[job].needs if need not in self.jobs
+            ]:
+                raise PipelineException(
+                    f"This needed jobs: {not_exist} do not exist in this "
+                    f"pipeline."
+                )
+        return self
     def job(self, name: str) -> Job:
         """Return Job model that exists on this pipeline.
@@ -375,6 +504,7 @@ class Pipeline(BaseModel):
         job execution.
         :param params: A parameter mapping that receive from pipeline execution.
+        :rtype: DictData
         """
         # VALIDATE: Incoming params should have keys that set on this pipeline.
         if check_key := tuple(
@@ -382,7 +512,7 @@ class Pipeline(BaseModel):
             for k in self.params
             if (k not in params and self.params[k].required)
         ):
-            raise ValueError(
+            raise PipelineException(
                 f"Required Param on this pipeline setting does not set: "
                 f"{', '.join(check_key)}."
             )
@@ -400,6 +530,102 @@ class Pipeline(BaseModel):
             "jobs": {},
         }
+    def release(
+        self,
+        on: On,
+        params: DictData | None = None,
+        *,
+        waiting_sec: int = 600,
+        sleep_interval: int = 10,
+    ) -> str:
+        """Start running pipeline with the on schedule in period of 30 minutes.
+        That mean it will still running at background 30 minutes until the
+        schedule matching with its time.
+        """
+        params: DictData = params or {}
+        logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
+        gen: CronRunner = on.generate(datetime.now())
+        tz: ZoneInfo = gen.tz
+        next_running_time: datetime = gen.next
+        if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
+            logging.debug(
+                f"[CORE]: {self.name} closely to run >> "
+                f"{next_running_time:%Y-%m-%d %H:%M:%S}"
+            )
+            # NOTE: Release when the time is nearly to schedule time.
+            while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
+                time.sleep(sleep_interval)
+                logging.debug(
+                    f"[CORE]: {self.name!r} : Sleep until: {duration}"
+                )
+            time.sleep(1)
+            rs: Result = self.execute(params=params)
+            logging.debug(f"{rs.context}")
+            return f"[CORE]: Start Execute: {self.name}"
+        return f"[CORE]: {self.name} does not closely to run yet."
+    def poke(self, params: DictData | None = None):
+        """Poke pipeline threading task for executing with its schedules that
+        was set on the `on`.
+        """
+        params: DictData = params or {}
+        logging.info(
+            f"[CORE]: Start Poking: {self.name!r} :"
+            f"{gen_id(self.name, unique=True)}"
+        )
+        results = []
+        with ThreadPoolExecutor(
+            max_workers=int(
+                os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
+            ),
+        ) as executor:
+            futures: list[Future] = [
+                executor.submit(
+                    self.release,
+                    on,
+                    params=params,
+                )
+                for on in self.on
+            ]
+            for future in as_completed(futures):
+                rs = future.result()
+                logging.info(rs)
+                results.append(rs)
+        return results
+    def job_execute(
+        self,
+        job: str,
+        params: DictData,
+    ):
+        """Job Executor that use on pipeline executor.
+        :param job: A job ID that want to execute.
+        :param params: A params that was parameterized from pipeline execution.
+        """
+        # VALIDATE: check a job ID that exists in this pipeline or not.
+        if job not in self.jobs:
+            raise PipelineException(
+                f"The job ID: {job} does not exists on {self.name!r} pipeline."
+            )
+        job_obj: Job = self.jobs[job]
+        rs: Result = job_obj.execute(params=params)
+        if rs.status != 0:
+            logging.warning(
+                f"Getting status does not equal zero on job: {job}."
+            )
+            return Result(
+                status=1, context={job: job_obj.set_outputs(rs.context)}
+            )
+        return Result(status=0, context={job: job_obj.set_outputs(rs.context)})
     def execute(
         self,
         params: DictData | None = None,
@@ -430,7 +656,7 @@ class Pipeline(BaseModel):
         """
         logging.info(
-            f"[CORE]: Start Pipeline {self.name}:"
+            f"[CORE]: Start Execute: {self.name}:"
             f"{gen_id(self.name, unique=True)}"
         )
         params: DictData = params or {}
@@ -452,42 +678,52 @@ class Pipeline(BaseModel):
         # NOTE: Create result context that will pass this context to any
         #   execution dependency.
         rs: Result = Result(context=self.parameterize(params))
-        # IMPORTANT: The job execution can run parallel and waiting by needed.
-        while not jq.empty() and (
-            not_time_out_flag := ((time.monotonic() - ts) < timeout)
-        ):
-            job_id: str = jq.get()
-            logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
-            job: Job = self.jobs[job_id]
-            # TODO: Condition on ``needs`` of this job was set. It should create
-            #   multithreading process on this step.
-            #   But, I don't know how to handle changes params between each job
-            #   execution while its use them together.
-            #   ---
-            #   >>> import multiprocessing
-            #   >>> with multiprocessing.Pool(processes=3) as pool:
-            #   ...     results = pool.starmap(merge_names, ('', '', ...))
-            #   ---
-            #   This case we use multi-process because I want to split usage of
-            #   data in this level, that mean the data that push to parallel job
-            #   should not use across another job.
-            #
-            if any(rs.context["jobs"].get(need) for need in job.needs):
-                jq.put(job_id)
-            # NOTE: copy current the result context for reference other job
-            #   context.
-            job_context: DictData = copy.deepcopy(rs.context)
-            job_rs: Result = job.execute(params=job_context)
-            if job_rs.status == 0:
-                # NOTE: Receive output of job execution.
-                rs.context["jobs"][job_id] = job.set_outputs(job_rs.context)
-            else:
-                raise PipelineException(
-                    f"Getting status does not equal zero on job: {job_id}."
+        if (
+            worker := int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1"))
+        ) > 1:
+            # IMPORTANT: The job execution can run parallel and waiting by
+            #   needed.
+            with ThreadPoolExecutor(max_workers=worker) as executor:
+                futures: list[Future] = []
+                while not jq.empty() and (
+                    not_time_out_flag := ((time.monotonic() - ts) < timeout)
+                ):
+                    job_id: str = jq.get()
+                    logging.info(
+                        f"[PIPELINE]: Start execute the job: {job_id!r}"
+                    )
+                    job: Job = self.jobs[job_id]
+                    if any(
+                        need not in rs.context["jobs"] for need in job.needs
+                    ):
+                        jq.put(job_id)
+                    futures.append(
+                        executor.submit(
+                            self.job_execute,
+                            job_id,
+                            params=copy.deepcopy(rs.context),
+                        ),
+                    )
+                for future in as_completed(futures):
+                    job_rs: Result = future.result(timeout=20)
+                    rs.context["jobs"].update(job_rs.context)
+        else:
+            logging.info(
+                f"[CORE]: Run {self.name} with non-threading job executor"
+            )
+            while not jq.empty() and (
+                not_time_out_flag := ((time.monotonic() - ts) < timeout)
+            ):
+                job_id: str = jq.get()
+                logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
+                job: Job = self.jobs[job_id]
+                if any(need not in rs.context["jobs"] for need in job.needs):
+                    jq.put(job_id)
+                job_rs = self.job_execute(
+                    job_id, params=copy.deepcopy(rs.context)
                 )
+                rs.context["jobs"].update(job_rs.context)
         if not not_time_out_flag:
             logging.warning("Execution of pipeline was time out")

ddeutil/workflow/repeat.py ADDED Viewed

@@ -0,0 +1,134 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) 2023 Priyanshu Panwar. All rights reserved.
+# Licensed under the MIT License.
+# This code refs from: https://github.com/priyanshu-panwar/fastapi-utilities
+# ------------------------------------------------------------------------------
+import asyncio
+import logging
+from asyncio import ensure_future
+from datetime import datetime
+from functools import wraps
+from croniter import croniter
+from starlette.concurrency import run_in_threadpool
+def get_delta(cron: str):
+    """This function returns the time delta between now and the next cron
+    execution time.
+    """
+    now: datetime = datetime.now()
+    cron = croniter(cron, now)
+    return (cron.get_next(datetime) - now).total_seconds()
+def repeat_at(
+    *,
+    cron: str,
+    logger: logging.Logger = None,
+    raise_exceptions: bool = False,
+    max_repetitions: int = None,
+):
+    """This function returns a decorator that makes a function execute
+    periodically as per the cron expression provided.
+    :param cron: str
+        Cron-style string for periodic execution, eg. '0 0 * * *' every midnight
+    :param logger: logging.Logger (default None)
+        Logger object to log exceptions
+    :param raise_exceptions: bool (default False)
+        Whether to raise exceptions or log them
+    :param max_repetitions: int (default None)
+        Maximum number of times to repeat the function. If None, repeat
+        indefinitely.
+    """
+    def decorator(func):
+        is_coroutine = asyncio.iscoroutinefunction(func)
+        @wraps(func)
+        def wrapper(*_args, **_kwargs):
+            repititions = 0
+            if not croniter.is_valid(cron):
+                raise ValueError("Invalid cron expression")
+            async def loop(*args, **kwargs):
+                nonlocal repititions
+                while max_repetitions is None or repititions < max_repetitions:
+                    try:
+                        sleepTime = get_delta(cron)
+                        await asyncio.sleep(sleepTime)
+                        if is_coroutine:
+                            await func(*args, **kwargs)
+                        else:
+                            await run_in_threadpool(func, *args, **kwargs)
+                    except Exception as e:
+                        if logger is not None:
+                            logger.exception(e)
+                        if raise_exceptions:
+                            raise e
+                    repititions += 1
+            ensure_future(loop(*_args, **_kwargs))
+        return wrapper
+    return decorator
+def repeat_every(
+    *,
+    seconds: float,
+    wait_first: bool = False,
+    logger: logging.Logger = None,
+    raise_exceptions: bool = False,
+    max_repetitions: int = None,
+):
+    """This function returns a decorator that schedules a function to execute
+    periodically after every `seconds` seconds.
+    :param seconds: float
+        The number of seconds to wait before executing the function again.
+    :param wait_first: bool (default False)
+        Whether to wait `seconds` seconds before executing the function for the
+        first time.
+    :param logger: logging.Logger (default None)
+        The logger to use for logging exceptions.
+    :param raise_exceptions: bool (default False)
+        Whether to raise exceptions instead of logging them.
+    :param max_repetitions: int (default None)
+        The maximum number of times to repeat the function. If None, the
+        function will repeat indefinitely.
+    """
+    def decorator(func):
+        is_coroutine = asyncio.iscoroutinefunction(func)
+        @wraps(func)
+        async def wrapper(*_args, **_kwargs):
+            repetitions = 0
+            async def loop(*args, **kwargs):
+                nonlocal repetitions
+                if wait_first:
+                    await asyncio.sleep(seconds)
+                while max_repetitions is None or repetitions < max_repetitions:
+                    try:
+                        if is_coroutine:
+                            await func(*args, **kwargs)
+                        else:
+                            await run_in_threadpool(func, *args, **kwargs)
+                    except Exception as e:
+                        if logger is not None:
+                            logger.exception(e)
+                        if raise_exceptions:
+                            raise e
+                    repetitions += 1
+                    await asyncio.sleep(seconds)
+            ensure_future(loop(*_args, **_kwargs))
+        return wrapper
+    return decorator

ddeutil-workflow 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl

ddeutil-workflow 0.0.6py3-none-any.whl → 0.0.7py3-none-any.whl