PyPI - ddeutil-workflow - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

ddeutil-workflow 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +31 -0
ddeutil/workflow/__types.py +53 -1
ddeutil/workflow/api.py +120 -0
ddeutil/workflow/app.py +41 -0
ddeutil/workflow/exceptions.py +16 -1
ddeutil/workflow/loader.py +13 -115
ddeutil/workflow/log.py +30 -0
ddeutil/workflow/on.py +78 -26
ddeutil/workflow/pipeline.py +599 -414
ddeutil/workflow/repeat.py +134 -0
ddeutil/workflow/route.py +78 -0
ddeutil/workflow/{__scheduler.py → scheduler.py} +73 -45
ddeutil/workflow/stage.py +431 -0
ddeutil/workflow/utils.py +442 -48
{ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.7.dist-info}/METADATA +144 -68
ddeutil_workflow-0.0.7.dist-info/RECORD +20 -0
{ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.7.dist-info}/WHEEL +1 -1
ddeutil/workflow/__regex.py +0 -44
ddeutil/workflow/tasks/__init__.py +0 -6
ddeutil/workflow/tasks/dummy.py +0 -52
ddeutil_workflow-0.0.5.dist-info/RECORD +0 -17
{ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.7.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.5.dist-info → ddeutil_workflow-0.0.7.dist-info}/top_level.txt +0 -0

ddeutil/workflow/pipeline.py CHANGED Viewed

@@ -5,303 +5,53 @@
 # ------------------------------------------------------------------------------
 from __future__ import annotations
-import contextlib
-import inspect
-import itertools
+import copy
 import logging
-import subprocess
-import sys
+import os
 import time
-import uuid
-from abc import ABC, abstractmethod
-from inspect import Parameter
-from pathlib import Path
+from concurrent.futures import (
+    FIRST_EXCEPTION,
+    Future,
+    ProcessPoolExecutor,
+    ThreadPoolExecutor,
+    as_completed,
+    wait,
+)
+from datetime import datetime
+from multiprocessing import Event, Manager
+from pickle import PickleError
 from queue import Queue
-from subprocess import CompletedProcess
-from typing import Any, Callable, Optional, Union
+from typing import Optional
+from zoneinfo import ZoneInfo
 from pydantic import BaseModel, Field
 from pydantic.functional_validators import model_validator
 from typing_extensions import Self
-from .__regex import RegexConf
-from .__types import DictData, DictStr
-from .exceptions import TaskException
-from .loader import Loader, map_params
-from .utils import Params, TaskSearch, make_exec, make_registry
-class BaseStage(BaseModel, ABC):
-    """Base Stage Model that keep only id and name fields for the stage
-    metadata. If you want to implement any custom stage, you can use this class
-    to parent and implement ``self.execute()`` method only.
-    """
-    id: Optional[str] = Field(
-        default=None,
-        description=(
-            "The stage ID that use to keep execution output or getting by job "
-            "owner."
-        ),
-    )
-    name: str = Field(
-        description="The stage name that want to logging when start execution."
-    )
-    @abstractmethod
-    def execute(self, params: DictData) -> DictData:
-        """Execute abstraction method that action something by sub-model class.
-        This is important method that make this class is able to be the stage.
-        :param params: A parameter data that want to use in this execution.
-        :rtype: DictData
-        """
-        raise NotImplementedError("Stage should implement ``execute`` method.")
-    def set_outputs(self, rs: DictData, params: DictData) -> DictData:
-        """Set an outputs from execution process to an input params.
-        :param rs: A result data that want to extract to an output key.
-        :param params: A context data that want to add output result.
-        :rtype: DictData
-        """
-        if self.id is None:
-            return params
-        if "stages" not in params:
-            params["stages"] = {}
-        params["stages"][self.id] = {"outputs": rs}
-        return params
-class EmptyStage(BaseStage):
-    """Empty stage that do nothing (context equal empty stage) and logging the
-    name of stage only to stdout.
-    """
-    def execute(self, params: DictData) -> DictData:
-        """Execution method for the Empty stage that do only logging out to
-        stdout.
-        :param params: A context data that want to add output result. But this
-            stage does not pass any output.
-        """
-        logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
-        return params
-class ShellStage(BaseStage):
-    """Shell stage that execute bash script on the current OS. That mean if your
-    current OS is Windows, it will running bash in the WSL.
-    """
-    shell: str = Field(description="A shell statement that want to execute.")
-    env: DictStr = Field(
-        default_factory=dict,
-        description=(
-            "An environment variable mapping that want to set before execute "
-            "this shell statement."
-        ),
-    )
-    @contextlib.contextmanager
-    def __prepare_shell(self):
-        """Return context of prepared shell statement that want to execute. This
-        step will write the `.sh` file before giving this file name to context.
-        After that, it will auto delete this file automatic.
-        """
-        f_name: str = f"{uuid.uuid4()}.sh"
-        f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
-        with open(f"./{f_name}", mode="w", newline="\n") as f:
-            f.write(f"#!/bin/{f_shebang}\n")
-            for k in self.env:
-                f.write(f"{k}='{self.env[k]}';\n")
-            # NOTE: make sure that shell script file does not have `\r` char.
-            f.write(self.shell.replace("\r\n", "\n"))
-        make_exec(f"./{f_name}")
-        yield [f_shebang, f_name]
-        Path(f_name).unlink()
-    def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
-        """Set outputs to params"""
-        # NOTE: skipping set outputs of stage execution when id does not set.
-        if self.id is None:
-            return params
-        if "stages" not in params:
-            params["stages"] = {}
-        params["stages"][self.id] = {
-            # NOTE: The output will fileter unnecessary keys from ``_locals``.
-            "outputs": {
-                "return_code": rs.returncode,
-                "stdout": rs.stdout.rstrip("\n"),
-            },
-        }
-        return params
-    def execute(self, params: DictData) -> DictData:
-        """Execute the Shell & Powershell statement with the Python build-in
-        ``subprocess`` package.
-        """
-        with self.__prepare_shell() as sh:
-            logging.info(f"[STAGE]: Shell-Execute: {sh}")
-            rs: CompletedProcess = subprocess.run(
-                sh,
-                shell=False,
-                capture_output=True,
-                text=True,
-            )
-        if rs.returncode > 0:
-            err: str = (
-                rs.stderr.encode("utf-8").decode("utf-16")
-                if "\\x00" in rs.stderr
-                else rs.stderr
-            )
-            logging.error(f"{err}\nRunning Statement:\n---\n{self.shell}")
-            raise TaskException(f"{err}\nRunning Statement:\n---\n{self.shell}")
-        self.set_outputs(rs, params)
-        return params
-class PyStage(BaseStage):
-    """Python executor stage that running the Python statement that receive
-    globals nad additional variables.
-    """
-    run: str
-    vars: DictData = Field(default_factory=dict)
-    def get_vars(self, params: DictData) -> DictData:
-        """Return variables"""
-        rs = self.vars.copy()
-        for p, v in self.vars.items():
-            rs[p] = map_params(v, params)
-        return rs
-    def set_outputs(self, rs: DictData, params: DictData) -> DictData:
-        """Set an outputs from execution process to an input params.
-        :param rs: A result data that want to extract to an output key.
-        :param params: A context data that want to add output result.
-        :rtype: DictData
-        """
-        # NOTE: skipping set outputs of stage execution when id does not set.
-        if self.id is None:
-            return params
-        if "stages" not in params:
-            params["stages"] = {}
-        params["stages"][self.id] = {
-            # NOTE: The output will fileter unnecessary keys from ``_locals``.
-            "outputs": {k: rs[k] for k in rs if k != "__annotations__"},
-        }
-        return params
-    def execute(self, params: DictData) -> DictData:
-        """Execute the Python statement that pass all globals and input params
-        to globals argument on ``exec`` build-in function.
-        :param params: A parameter that want to pass before run any statement.
-        :type params: DictData
-        :rtype: DictData
-        :returns: A parameters from an input that was mapped output if the stage
-            ID was set.
-        """
-        _globals: DictData = globals() | params | self.get_vars(params)
-        _locals: DictData = {}
-        try:
-            exec(map_params(self.run, params), _globals, _locals)
-        except Exception as err:
-            raise TaskException(
-                f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
-                f"{self.run}"
-            ) from None
-        # NOTE: set outputs from ``_locals`` value from ``exec``.
-        self.set_outputs(_locals, params)
-        return params | {k: _globals[k] for k in params if k in _globals}
-class TaskStage(BaseStage):
-    """Task executor stage that running the Python function."""
-    task: str
-    args: DictData
-    @staticmethod
-    def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
-        """Extract Task string value to task function."""
-        if not (found := RegexConf.RE_TASK_FMT.search(task)):
-            raise ValueError("Task does not match with task format regex.")
-        tasks: TaskSearch = TaskSearch(**found.groupdict())
-        # NOTE: Registry object should implement on this package only.
-        # TODO: This prefix value to search registry should dynamic with
-        #   config file.
-        rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
-        if tasks.func not in rgt:
-            raise NotImplementedError(
-                f"ddeutil.workflow.{tasks.path}.registries does not "
-                f"implement registry: {tasks.func}."
-            )
-        if tasks.tag not in rgt[tasks.func]:
-            raise NotImplementedError(
-                f"tag: {tasks.tag} does not found on registry func: "
-                f"ddeutil.workflow.{tasks.path}.registries."
-                f"{tasks.func}"
-            )
-        return rgt[tasks.func][tasks.tag]
-    def execute(self, params: DictData) -> DictData:
-        """Execute the Task function."""
-        task_caller = self.extract_task(self.task)()
-        if not callable(task_caller):
-            raise ImportError("Task caller function does not callable.")
-        # NOTE: check task caller parameters
-        ips = inspect.signature(task_caller)
-        if any(
-            k not in self.args
-            for k in ips.parameters
-            if ips.parameters[k].default == Parameter.empty
-        ):
-            raise ValueError(
-                f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
-                f"does not set to args"
-            )
-        try:
-            rs = task_caller(**map_params(self.args, params))
-        except Exception as err:
-            raise TaskException(f"{err.__class__.__name__}: {err}") from err
-        self.set_outputs(rs, params)
-        return params
-# NOTE: Order of parsing stage data
-Stage = Union[
-    PyStage,
-    ShellStage,
-    TaskStage,
-    EmptyStage,
-]
+from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
+from .exceptions import JobException, PipelineException, StageException
+from .loader import Loader
+from .on import On
+from .scheduler import CronRunner
+from .stage import Stage
+from .utils import (
+    Param,
+    Result,
+    cross_product,
+    dash2underscore,
+    gen_id,
+    get_diff_sec,
+)
 class Strategy(BaseModel):
     """Strategy Model that will combine a matrix together for running the
     special job.
-    Examples:
+    Data Validate:
         >>> strategy = {
+        ...     'max-parallel': 1,
+        ...     'fail-fast': False,
         ...     'matrix': {
         ...         'first': [1, 2, 3],
         ...         'second': ['foo', 'bar']
@@ -312,35 +62,115 @@ class Strategy(BaseModel):
     """
     fail_fast: bool = Field(default=False)
-    max_parallel: int = Field(default=-1)
-    matrix: dict[str, Union[list[str], list[int]]] = Field(default_factory=dict)
-    include: list[dict[str, Union[str, int]]] = Field(default_factory=list)
-    exclude: list[dict[str, Union[str, int]]] = Field(default_factory=list)
+    max_parallel: int = Field(default=1, gt=0)
+    matrix: Matrix = Field(default_factory=dict)
+    include: MatrixInclude = Field(
+        default_factory=list,
+        description="A list of additional matrix that want to adds-in.",
+    )
+    exclude: MatrixExclude = Field(
+        default_factory=list,
+        description="A list of exclude matrix that want to filter-out.",
+    )
     @model_validator(mode="before")
     def __prepare_keys(cls, values: DictData) -> DictData:
-        if "max-parallel" in values:
-            values["max_parallel"] = values.pop("max-parallel")
-        if "fail-fast" in values:
-            values["fail_fast"] = values.pop("fail-fast")
+        """Rename key that use dash to underscore because Python does not
+        support this character exist in any variable name.
+        """
+        dash2underscore("max-parallel", values)
+        dash2underscore("fail-fast", values)
         return values
+    def make(self) -> list[DictStr]:
+        """Return List of product of matrix values that already filter with
+        exclude and add include.
+        :rtype: list[DictStr]
+        """
+        # NOTE: If it does not set matrix, it will return list of an empty dict.
+        if not (mt := self.matrix):
+            return [{}]
+        final: list[DictStr] = []
+        for r in cross_product(matrix=mt):
+            if any(
+                all(r[k] == v for k, v in exclude.items())
+                for exclude in self.exclude
+            ):
+                continue
+            final.append(r)
+        # NOTE: If it is empty matrix and include, it will return list of an
+        #   empty dict.
+        if not final and not self.include:
+            return [{}]
+        # NOTE: Add include to generated matrix with exclude list.
+        add: list[DictStr] = []
+        for include in self.include:
+            # VALIDATE:
+            #   Validate any key in include list should be a subset of some one
+            #   in matrix.
+            if all(not (set(include.keys()) <= set(m.keys())) for m in final):
+                raise ValueError("Include should have the keys equal to matrix")
+            # VALIDATE:
+            #   Validate value of include does not duplicate with generated
+            #   matrix.
+            if any(
+                all(include.get(k) == v for k, v in m.items())
+                for m in [*final, *add]
+            ):
+                continue
+            add.append(include)
+        final.extend(add)
+        return final
 class Job(BaseModel):
-    """Job Model that is able to call a group of stages."""
+    """Job Model (group of stages).
+        This job model allow you to use for-loop that call matrix strategy. If
+    you pass matrix mapping and it able to generate, you will see it running
+    with loop of matrix values.
+    Data Validate:
+        >>> job = {
+        ...     "runs-on": None,
+        ...     "strategy": {},
+        ...     "needs": [],
+        ...     "stages": [
+        ...         {
+        ...             "name": "Some stage",
+        ...             "run": "print('Hello World')",
+        ...         },
+        ...     ],
+        ... }
+    """
+    name: Optional[str] = Field(default=None)
+    desc: Optional[str] = Field(default=None)
     runs_on: Optional[str] = Field(default=None)
-    stages: list[Stage] = Field(default_factory=list)
+    stages: list[Stage] = Field(
+        default_factory=list,
+        description="A list of Stage of this job.",
+    )
     needs: list[str] = Field(
         default_factory=list,
         description="A list of the job ID that want to run before this job.",
     )
-    strategy: Strategy = Field(default_factory=Strategy)
+    strategy: Strategy = Field(
+        default_factory=Strategy,
+        description="A strategy matrix that want to generate.",
+    )
     @model_validator(mode="before")
     def __prepare_keys(cls, values: DictData) -> DictData:
-        if "runs-on" in values:
-            values["runs_on"] = values.pop("runs-on")
+        """Rename key that use dash to underscore because Python does not
+        support this character exist in any variable name.
+        """
+        dash2underscore("runs-on", values)
         return values
     def stage(self, stage_id: str) -> Stage:
@@ -350,57 +180,212 @@ class Job(BaseModel):
                 return stage
         raise ValueError(f"Stage ID {stage_id} does not exists")
-    def make_strategy(self) -> list[DictStr]:
-        """Return List of combination of matrix values that already filter with
-        exclude and add include values.
-        """
-        if not (mt := self.strategy.matrix):
-            return [{}]
-        final: list[DictStr] = []
-        for r in [
-            {_k: _v for e in mapped for _k, _v in e.items()}
-            for mapped in itertools.product(
-                *[[{k: v} for v in vs] for k, vs in mt.items()]
-            )
-        ]:
-            if any(
-                all(r[k] == v for k, v in exclude.items())
-                for exclude in self.strategy.exclude
-            ):
-                continue
-            final.append(r)
+    @staticmethod
+    def set_outputs(output: DictData) -> DictData:
+        if len(output) > 1:
+            return {"strategies": output}
-        if not final:
-            return [{}]
+        return output[next(iter(output))]
-        for include in self.strategy.include:
-            if include.keys() != final[0].keys():
-                raise ValueError("Include should have the keys equal to matrix")
-            if any(all(include[k] == v for k, v in f.items()) for f in final):
+    def strategy_execute(
+        self,
+        strategy: DictData,
+        params: DictData,
+        *,
+        event: Event | None = None,
+    ) -> Result:
+        """Strategy execution with passing dynamic parameters from the pipeline
+        stage execution.
+        :param strategy:
+        :param params:
+        :param event: An manger event that pass to the PoolThreadExecutor.
+        :rtype: Result
+        """
+        _stop_rs: Result = Result(
+            status=1,
+            context={
+                gen_id(strategy): {
+                    "matrix": strategy,
+                    "stages": {},
+                    "error": "Event stopped",
+                },
+            },
+        )
+        if event and event.is_set():
+            return _stop_rs
+        # NOTE: Create strategy execution context and update a matrix and copied
+        #   of params. So, the context value will have structure like;
+        #   ---
+        #   {
+        #       "params": { ... },      <== Current input params
+        #       "jobs": { ... },        <== Current input params
+        #       "matrix": { ... }       <== Current strategy value
+        #   }
+        #
+        context: DictData = params
+        context.update({"matrix": strategy})
+        # IMPORTANT: The stage execution only run sequentially one-by-one.
+        for stage in self.stages:
+            _st_name: str = stage.id or stage.name
+            if stage.is_skip(params=context):
+                logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
                 continue
-            final.append(include)
-        return final
+            logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
-    def execute(self, params: DictData | None = None) -> DictData:
-        """Execute job with passing dynamic parameters from the pipeline."""
-        for strategy in self.make_strategy():
-            params.update({"matrix": strategy})
+            # NOTE: Logging a matrix that pass on this stage execution.
+            if strategy:
+                logging.info(f"[...]: Matrix: {strategy}")
-            # IMPORTANT: The stage execution only run sequentially one-by-one.
-            for stage in self.stages:
-                logging.info(
-                    f"[JOB]: Start execute the stage: "
-                    f"{(stage.id if stage.id else stage.name)!r}"
+            # NOTE:
+            #       I do not use below syntax because `params` dict be the
+            #   reference memory pointer and it was changed when I action
+            #   anything like update or re-construct this.
+            #
+            #       ... params |= stage.execute(params=params)
+            #
+            #   This step will add the stage result to ``stages`` key in
+            #   that stage id. It will have structure like;
+            #   ---
+            #   {
+            #       "params": { ... },
+            #       "jobs": { ... },
+            #       "matrix": { ... },
+            #       "stages": { { "stage-id-1": ... }, ... }
+            #   }
+            #
+            if event and event.is_set():
+                return _stop_rs
+            rs: Result = stage.execute(params=context)
+            if rs.status == 0:
+                stage.set_outputs(rs.context, params=context)
+            else:
+                raise JobException(
+                    f"Getting status does not equal zero on stage: "
+                    f"{stage.name}."
                 )
+        # TODO: Filter and warning if it pass any objects to context between
+        #   strategy job executor like function, etc.
+        return Result(
+            status=0,
+            context={
+                gen_id(strategy): {
+                    "matrix": strategy,
+                    "stages": context.pop("stages", {}),
+                },
+            },
+        )
-                # NOTE:
-                #       I do not use below syntax because `params` dict be the
-                #   reference memory pointer and it was changed when I action
-                #   anything like update or re-construct this.
-                #       ... params |= stage.execute(params=params)
-                stage.execute(params=params)
-        # TODO: We should not return matrix key to outside
-        return params
+    def execute(self, params: DictData | None = None) -> Result:
+        """Job execution with passing dynamic parameters from the pipeline
+        execution. It will generate matrix values at the first step and for-loop
+        any metrix to all stages dependency.
+        :param params: An input parameters that use on job execution.
+        :rtype: Result
+        """
+        strategy_context: DictData = {}
+        rs = Result(context=strategy_context)
+        if self.strategy.max_parallel == 1:
+            for strategy in self.strategy.make():
+                rs: Result = self.strategy_execute(
+                    strategy, params=copy.deepcopy(params)
+                )
+                strategy_context.update(rs.context)
+            return rs
+        # FIXME: (WF001) I got error that raise when use
+        #  ``ProcessPoolExecutor``;
+        #   ---
+        #   _pickle.PicklingError: Can't pickle
+        #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
+        #       on ddeutil.workflow.stage failed
+        #
+        with Manager() as manager:
+            event: Event = manager.Event()
+            with ProcessPoolExecutor(
+                max_workers=self.strategy.max_parallel
+            ) as pool:
+                pool_result: list[Future] = [
+                    pool.submit(
+                        self.strategy_execute,
+                        st,
+                        params=copy.deepcopy(params),
+                        event=event,
+                    )
+                    for st in self.strategy.make()
+                ]
+                if self.strategy.fail_fast:
+                    # NOTE: Get results from a collection of tasks with a
+                    #   timeout that has the first exception.
+                    done, not_done = wait(
+                        pool_result, timeout=60, return_when=FIRST_EXCEPTION
+                    )
+                    nd: str = (
+                        f", the strategies do not run is {not_done}"
+                        if not_done
+                        else ""
+                    )
+                    logging.warning(f"[JOB]: Strategy is set Fail Fast{nd}")
+                    # NOTE: Stop all running tasks
+                    event.set()
+                    # NOTE: Cancel any scheduled tasks
+                    for future in pool_result:
+                        future.cancel()
+                    rs.status = 0
+                    for f in done:
+                        if f.exception():
+                            rs.status = 1
+                            logging.error(
+                                f"One task failed with: {f.exception()}, "
+                                f"shutting down"
+                            )
+                        elif f.cancelled():
+                            continue
+                        else:
+                            rs: Result = f.result(timeout=60)
+                            strategy_context.update(rs.context)
+                    rs.context = strategy_context
+                    return rs
+                for pool_rs in as_completed(pool_result):
+                    try:
+                        rs: Result = pool_rs.result(timeout=60)
+                        strategy_context.update(rs.context)
+                    except PickleError as err:
+                        # NOTE: I do not want to fix this issue because it does
+                        #   not make sense and over-engineering with this bug
+                        #   fix process.
+                        raise JobException(
+                            f"PyStage that create object on locals does use "
+                            f"parallel in strategy;\n\t{err}"
+                        ) from None
+                    except TimeoutError:
+                        rs.status = 1
+                        logging.warning("Task is hanging. Attempting to kill.")
+                        pool_rs.cancel()
+                        if not pool_rs.cancelled():
+                            logging.warning("Failed to cancel the task.")
+                        else:
+                            logging.warning("Task canceled successfully.")
+                    except StageException as err:
+                        rs.status = 1
+                        logging.warning(
+                            f"Get stage exception with fail-fast does not set;"
+                            f"\n\t{err}"
+                        )
+        rs.status = 0
+        rs.context = strategy_context
+        return rs
 class Pipeline(BaseModel):
@@ -409,13 +394,73 @@ class Pipeline(BaseModel):
     coding line to execute it.
     """
-    desc: Optional[str] = Field(default=None)
-    params: dict[str, Params] = Field(default_factory=dict)
-    on: dict[str, DictStr] = Field(default_factory=dict)
-    jobs: dict[str, Job]
+    name: str = Field(description="A pipeline name.")
+    desc: Optional[str] = Field(
+        default=None,
+        description=(
+            "A pipeline description that is able to be string of markdown "
+            "content."
+        ),
+    )
+    params: dict[str, Param] = Field(
+        default_factory=dict,
+        description="A parameters that want to use on this pipeline.",
+    )
+    on: list[On] = Field(
+        default_factory=list,
+        description="A list of On instance for this pipeline schedule.",
+    )
+    jobs: dict[str, Job] = Field(
+        default_factory=dict,
+        description="A mapping of job ID and job model that already loaded.",
+    )
+    @classmethod
+    def from_loader(
+        cls,
+        name: str,
+        externals: DictData | None = None,
+    ) -> Self:
+        """Create Pipeline instance from the Loader object.
+        :param name: A pipeline name that want to pass to Loader object.
+        :param externals: An external parameters that want to pass to Loader
+            object.
+        """
+        loader: Loader = Loader(name, externals=(externals or {}))
+        loader_data: DictData = copy.deepcopy(loader.data)
+        # NOTE: Add name to loader data
+        loader_data["name"] = name.replace(" ", "_")
+        if "jobs" not in loader_data:
+            raise ValueError("Config does not set ``jobs`` value")
+        # NOTE: Prepare `on` data
+        cls.__bypass_on(loader_data)
+        return cls.model_validate(loader_data)
+    @classmethod
+    def __bypass_on(cls, data: DictData, externals: DictData | None = None):
+        """Bypass the on data to loaded config data."""
+        if on := data.pop("on", []):
+            if isinstance(on, str):
+                on = [on]
+            if any(not isinstance(i, (dict, str)) for i in on):
+                raise TypeError("The ``on`` key should be list of str or dict")
+            data["on"] = [
+                (
+                    Loader(n, externals=(externals or {})).data
+                    if isinstance(n, str)
+                    else n
+                )
+                for n in on
+            ]
+        return data
     @model_validator(mode="before")
     def __prepare_params(cls, values: DictData) -> DictData:
+        """Prepare the params key."""
         # NOTE: Prepare params type if it passing with only type value.
         if params := values.pop("params", {}):
             values["params"] = {
@@ -428,23 +473,16 @@ class Pipeline(BaseModel):
             }
         return values
-    @classmethod
-    def from_loader(
-        cls,
-        name: str,
-        externals: DictData | None = None,
-    ) -> Self:
-        """Create Pipeline instance from the Loader object."""
-        loader: Loader = Loader(name, externals=(externals or {}))
-        if "jobs" not in loader.data:
-            raise ValueError("Config does not set ``jobs`` value")
-        return cls(
-            jobs=loader.data["jobs"],
-            params=loader.data["params"],
-        )
     @model_validator(mode="after")
-    def job_checking_needs(self):
+    def __validate_jobs_need(self):
+        for job in self.jobs:
+            if not_exist := [
+                need for need in self.jobs[job].needs if need not in self.jobs
+            ]:
+                raise PipelineException(
+                    f"This needed jobs: {not_exist} do not exist in this "
+                    f"pipeline."
+                )
         return self
     def job(self, name: str) -> Job:
@@ -460,17 +498,148 @@ class Pipeline(BaseModel):
             raise ValueError(f"Job {name!r} does not exists")
         return self.jobs[name]
+    def parameterize(self, params: DictData) -> DictData:
+        """Prepare parameters before passing to execution process. This method
+        will create jobs key to params mapping that will keep any result from
+        job execution.
+        :param params: A parameter mapping that receive from pipeline execution.
+        :rtype: DictData
+        """
+        # VALIDATE: Incoming params should have keys that set on this pipeline.
+        if check_key := tuple(
+            f"{k!r}"
+            for k in self.params
+            if (k not in params and self.params[k].required)
+        ):
+            raise PipelineException(
+                f"Required Param on this pipeline setting does not set: "
+                f"{', '.join(check_key)}."
+            )
+        # NOTE: mapping type of param before adding it to params variable.
+        return {
+            "params": (
+                params
+                | {
+                    k: self.params[k].receive(params[k])
+                    for k in params
+                    if k in self.params
+                }
+            ),
+            "jobs": {},
+        }
+    def release(
+        self,
+        on: On,
+        params: DictData | None = None,
+        *,
+        waiting_sec: int = 600,
+        sleep_interval: int = 10,
+    ) -> str:
+        """Start running pipeline with the on schedule in period of 30 minutes.
+        That mean it will still running at background 30 minutes until the
+        schedule matching with its time.
+        """
+        params: DictData = params or {}
+        logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
+        gen: CronRunner = on.generate(datetime.now())
+        tz: ZoneInfo = gen.tz
+        next_running_time: datetime = gen.next
+        if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
+            logging.debug(
+                f"[CORE]: {self.name} closely to run >> "
+                f"{next_running_time:%Y-%m-%d %H:%M:%S}"
+            )
+            # NOTE: Release when the time is nearly to schedule time.
+            while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
+                time.sleep(sleep_interval)
+                logging.debug(
+                    f"[CORE]: {self.name!r} : Sleep until: {duration}"
+                )
+            time.sleep(1)
+            rs: Result = self.execute(params=params)
+            logging.debug(f"{rs.context}")
+            return f"[CORE]: Start Execute: {self.name}"
+        return f"[CORE]: {self.name} does not closely to run yet."
+    def poke(self, params: DictData | None = None):
+        """Poke pipeline threading task for executing with its schedules that
+        was set on the `on`.
+        """
+        params: DictData = params or {}
+        logging.info(
+            f"[CORE]: Start Poking: {self.name!r} :"
+            f"{gen_id(self.name, unique=True)}"
+        )
+        results = []
+        with ThreadPoolExecutor(
+            max_workers=int(
+                os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
+            ),
+        ) as executor:
+            futures: list[Future] = [
+                executor.submit(
+                    self.release,
+                    on,
+                    params=params,
+                )
+                for on in self.on
+            ]
+            for future in as_completed(futures):
+                rs = future.result()
+                logging.info(rs)
+                results.append(rs)
+        return results
+    def job_execute(
+        self,
+        job: str,
+        params: DictData,
+    ):
+        """Job Executor that use on pipeline executor.
+        :param job: A job ID that want to execute.
+        :param params: A params that was parameterized from pipeline execution.
+        """
+        # VALIDATE: check a job ID that exists in this pipeline or not.
+        if job not in self.jobs:
+            raise PipelineException(
+                f"The job ID: {job} does not exists on {self.name!r} pipeline."
+            )
+        job_obj: Job = self.jobs[job]
+        rs: Result = job_obj.execute(params=params)
+        if rs.status != 0:
+            logging.warning(
+                f"Getting status does not equal zero on job: {job}."
+            )
+            return Result(
+                status=1, context={job: job_obj.set_outputs(rs.context)}
+            )
+        return Result(status=0, context={job: job_obj.set_outputs(rs.context)})
     def execute(
         self,
         params: DictData | None = None,
-        time_out: int = 60,
-    ) -> DictData:
+        *,
+        timeout: int = 60,
+    ) -> Result:
         """Execute pipeline with passing dynamic parameters to any jobs that
         included in the pipeline.
-        :param params: An input parameters that use on pipeline execution.
-        :param time_out: A time out in second unit that use for limit time of
-            this pipeline execution.
+        :param params: An input parameters that use on pipeline execution that
+            will parameterize before using it.
+        :param timeout: A pipeline execution time out in second unit that use
+            for limit time of execution and waiting job dependency.
+        :rtype: Result
         ---
@@ -483,66 +652,82 @@ class Pipeline(BaseModel):
             For example, when I want to use the output from previous stage, I
         can access it with syntax:
-            ... "<job-name>.stages.<stage-id>.outputs.<key>"
+            ... ${job-name}.stages.${stage-id}.outputs.${key}
         """
+        logging.info(
+            f"[CORE]: Start Execute: {self.name}:"
+            f"{gen_id(self.name, unique=True)}"
+        )
         params: DictData = params or {}
-        if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
-            raise ValueError(
-                f"Parameters that needed on pipeline does not pass: "
-                f"{', '.join(check_key)}."
-            )
-        if any(p not in params for p in self.params if self.params[p].required):
-            raise ValueError("Required parameter does not pass")
-        # NOTE: mapping type of param before adding it to params variable.
-        params: DictData = {
-            "params": (
-                params
-                | {
-                    k: self.params[k].receive(params[k])
-                    for k in params
-                    if k in self.params
-                }
-            ),
-            "jobs": {},
-        }
+        # NOTE: It should not do anything if it does not have job.
+        if not self.jobs:
+            logging.warning("[PIPELINE]: This pipeline does not have any jobs")
+            return Result(status=0, context=params)
         # NOTE: create a job queue that keep the job that want to running after
         #   it dependency condition.
-        jq = Queue()
+        jq: Queue = Queue()
         for job_id in self.jobs:
             jq.put(job_id)
         ts: float = time.monotonic()
-        not_time_out_flag = True
-        # IMPORTANT: The job execution can run parallel and waiting by needed.
-        while not jq.empty() and (
-            not_time_out_flag := ((time.monotonic() - ts) < time_out)
-        ):
-            job_id: str = jq.get()
-            logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
-            job: Job = self.jobs[job_id]
-            # TODO: Condition on ``needs`` of this job was set. It should create
-            #   multithreading process on this step.
-            #   But, I don't know how to handle changes params between each job
-            #   execution while its use them together.
-            #   ---
-            #   >>> import multiprocessing
-            #   >>> with multiprocessing.Pool(processes=3) as pool:
-            #   ...     results = pool.starmap(merge_names, ('', '', ...))
-            #
-            if any(params["jobs"].get(need) for need in job.needs):
-                jq.put(job_id)
+        not_time_out_flag: bool = True
+        # NOTE: Create result context that will pass this context to any
+        #   execution dependency.
+        rs: Result = Result(context=self.parameterize(params))
+        if (
+            worker := int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1"))
+        ) > 1:
+            # IMPORTANT: The job execution can run parallel and waiting by
+            #   needed.
+            with ThreadPoolExecutor(max_workers=worker) as executor:
+                futures: list[Future] = []
+                while not jq.empty() and (
+                    not_time_out_flag := ((time.monotonic() - ts) < timeout)
+                ):
+                    job_id: str = jq.get()
+                    logging.info(
+                        f"[PIPELINE]: Start execute the job: {job_id!r}"
+                    )
+                    job: Job = self.jobs[job_id]
+                    if any(
+                        need not in rs.context["jobs"] for need in job.needs
+                    ):
+                        jq.put(job_id)
+                    futures.append(
+                        executor.submit(
+                            self.job_execute,
+                            job_id,
+                            params=copy.deepcopy(rs.context),
+                        ),
+                    )
+                for future in as_completed(futures):
+                    job_rs: Result = future.result(timeout=20)
+                    rs.context["jobs"].update(job_rs.context)
+        else:
+            logging.info(
+                f"[CORE]: Run {self.name} with non-threading job executor"
+            )
+            while not jq.empty() and (
+                not_time_out_flag := ((time.monotonic() - ts) < timeout)
+            ):
+                job_id: str = jq.get()
+                logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
+                job: Job = self.jobs[job_id]
+                if any(need not in rs.context["jobs"] for need in job.needs):
+                    jq.put(job_id)
+                job_rs = self.job_execute(
+                    job_id, params=copy.deepcopy(rs.context)
+                )
+                rs.context["jobs"].update(job_rs.context)
-            job.execute(params=params)
-            params["jobs"][job_id] = {
-                "stages": params.pop("stages", {}),
-                "matrix": params.pop("matrix", {}),
-            }
         if not not_time_out_flag:
-            raise RuntimeError("Execution of pipeline was time out")
-        return params
+            logging.warning("Execution of pipeline was time out")
+            rs.status = 1
+            return rs
+        rs.status = 0
+        return rs

ddeutil-workflow 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

ddeutil-workflow 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl