PyPI - ddeutil-workflow - Versions diffs - 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

ddeutil-workflow 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +9 -0
ddeutil/workflow/__types.py +43 -1
ddeutil/workflow/exceptions.py +13 -1
ddeutil/workflow/loader.py +16 -110
ddeutil/workflow/on.py +195 -0
ddeutil/workflow/pipeline.py +351 -371
ddeutil/workflow/{vendors/__schedule.py → scheduler.py} +222 -176
ddeutil/workflow/stage.py +402 -0
ddeutil/workflow/utils.py +219 -28
{ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/METADATA +118 -90
ddeutil_workflow-0.0.6.dist-info/RECORD +15 -0
{ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/WHEEL +1 -1
ddeutil/workflow/__regex.py +0 -44
ddeutil/workflow/conn.py +0 -240
ddeutil/workflow/schedule.py +0 -82
ddeutil/workflow/tasks/__init__.py +0 -6
ddeutil/workflow/tasks/_pandas.py +0 -54
ddeutil/workflow/tasks/_polars.py +0 -92
ddeutil/workflow/vendors/__dataset.py +0 -127
ddeutil/workflow/vendors/__dict.py +0 -333
ddeutil/workflow/vendors/__init__.py +0 -0
ddeutil/workflow/vendors/aws.py +0 -185
ddeutil/workflow/vendors/az.py +0 -0
ddeutil/workflow/vendors/minio.py +0 -11
ddeutil/workflow/vendors/pd.py +0 -13
ddeutil/workflow/vendors/pg.py +0 -11
ddeutil/workflow/vendors/pl.py +0 -172
ddeutil/workflow/vendors/sftp.py +0 -209
ddeutil_workflow-0.0.4.dist-info/RECORD +0 -29
{ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/top_level.txt +0 -0

ddeutil/workflow/pipeline.py CHANGED Viewed

@@ -5,284 +5,29 @@
 # ------------------------------------------------------------------------------
 from __future__ import annotations
-import contextlib
-import inspect
-import itertools
+import copy
 import logging
-import subprocess
-import sys
 import time
-import uuid
-from abc import ABC, abstractmethod
-from inspect import Parameter
-from pathlib import Path
 from queue import Queue
-from subprocess import CompletedProcess
-from typing import Any, Callable, Optional, Union
+from typing import Optional
-import msgspec as spec
 from pydantic import BaseModel, Field
 from pydantic.functional_validators import model_validator
 from typing_extensions import Self
-from .__regex import RegexConf
-from .__types import DictData, DictStr
-from .exceptions import TaskException
-from .loader import Loader, map_params
-from .utils import Params, make_exec, make_registry
-class BaseStage(BaseModel, ABC):
-    """Base Stage Model that keep only id and name fields."""
-    id: Optional[str] = Field(
-        default=None,
-        description=(
-            "The stage ID that use to keep execution output or getting by job "
-            "owner."
-        ),
-    )
-    name: str = Field(
-        description="The stage name that want to logging when start execution."
-    )
-    @abstractmethod
-    def execute(self, params: DictData) -> DictData:
-        """Execute abstraction method that action something by sub-model class.
-        :param params: A parameter data that want to use in this execution.
-        """
-        raise NotImplementedError("Stage should implement ``execute`` method.")
-    def set_outputs(self, rs: DictData, params: DictData) -> DictData:
-        """Set an outputs from execution process to an input params."""
-        if self.id is None:
-            return params
-        if "stages" not in params:
-            params["stages"] = {}
-        params["stages"][self.id] = {"outputs": rs}
-        return params
-class EmptyStage(BaseStage):
-    """Empty stage that is doing nothing and logging the name of stage only."""
-    def execute(self, params: DictData) -> DictData:
-        """Execute for the Empty stage that do only logging out."""
-        logging.info(f"Execute: {self.name!r}")
-        return params
-class ShellStage(BaseStage):
-    """Shell statement stage."""
-    shell: str
-    env: DictStr = Field(default_factory=dict)
-    @staticmethod
-    @contextlib.contextmanager
-    def __prepare_shell(shell: str):
-        """Return context of prepared shell statement that want to execute. This
-        step will write the `.sh` file before giving this file name to context.
-        After that, it will auto delete this file automatic.
-        :param shell: A shell statement that want to prepare.
-        """
-        f_name: str = f"{uuid.uuid4()}.sh"
-        f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
-        with open(f"./{f_name}", mode="w", newline="\n") as f:
-            f.write(f"#!/bin/{f_shebang}\n")
-            # NOTE: make sure that shell script file does not have `\r` char.
-            f.write(shell.replace("\r\n", "\n"))
-        make_exec(f"./{f_name}")
-        yield [f_shebang, f_name]
-        Path(f_name).unlink()
-    def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
-        """Set outputs to params"""
-        # NOTE: skipping set outputs of stage execution when id does not set.
-        if self.id is None:
-            return params
-        if "stages" not in params:
-            params["stages"] = {}
-        params["stages"][self.id] = {
-            # NOTE: The output will fileter unnecessary keys from ``_locals``.
-            "outputs": {
-                "return_code": rs.returncode,
-                "stdout": rs.stdout.rstrip("\n"),
-            },
-        }
-        return params
-    def execute(self, params: DictData) -> DictData:
-        """Execute the Shell & Powershell statement with the Python build-in
-        ``subprocess`` package.
-        """
-        with self.__prepare_shell(self.shell) as sh:
-            with open(sh[-1]) as f:
-                logging.debug(f.read())
-            logging.info(f"Shell-Execute: {sh}")
-            rs: CompletedProcess = subprocess.run(
-                sh,
-                shell=False,
-                capture_output=True,
-                text=True,
-            )
-        if rs.returncode > 0:
-            logging.error(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
-            raise TaskException(
-                f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}"
-            )
-        self.set_outputs(rs, params)
-        return params
-class PyStage(BaseStage):
-    """Python executor stage that running the Python statement that receive
-    globals nad additional variables.
-    """
-    run: str
-    vars: DictData = Field(default_factory=dict)
-    def get_vars(self, params: DictData) -> DictData:
-        """Return variables"""
-        rs = self.vars.copy()
-        for p, v in self.vars.items():
-            rs[p] = map_params(v, params)
-        return rs
-    def set_outputs(self, rs: DictData, params: DictData) -> DictData:
-        """Set outputs to params"""
-        # NOTE: skipping set outputs of stage execution when id does not set.
-        if self.id is None:
-            return params
-        if "stages" not in params:
-            params["stages"] = {}
-        params["stages"][self.id] = {
-            # NOTE: The output will fileter unnecessary keys from ``_locals``.
-            "outputs": {k: rs[k] for k in rs if k != "__annotations__"},
-        }
-        return params
-    def execute(self, params: DictData) -> DictData:
-        """Execute the Python statement that pass all globals and input params
-        to globals argument on ``exec`` build-in function.
-        :param params: A parameter that want to pass before run any statement.
-        :type params: DictData
-        :rtype: DictData
-        :returns: A parameters from an input that was mapped output if the stage
-            ID was set.
-        """
-        _globals: DictData = globals() | params | self.get_vars(params)
-        _locals: DictData = {}
-        try:
-            exec(map_params(self.run, params), _globals, _locals)
-        except Exception as err:
-            raise TaskException(
-                f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
-                f"{self.run}"
-            ) from None
-        # NOTE: set outputs from ``_locals`` value from ``exec``.
-        self.set_outputs(_locals, params)
-        return params | {k: _globals[k] for k in params if k in _globals}
-class TaskSearch(spec.Struct, kw_only=True, tag="task"):
-    """Task Search Struct that use the `msgspec` for the best performance."""
-    path: str
-    func: str
-    tag: str
-    def to_dict(self) -> DictData:
-        """Return dict data from struct fields."""
-        return {f: getattr(self, f) for f in self.__struct_fields__}
-class TaskStage(BaseStage):
-    """Task executor stage that running the Python function."""
-    task: str
-    args: DictData
-    @staticmethod
-    def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
-        """Extract Task string value to task function."""
-        if not (found := RegexConf.RE_TASK_FMT.search(task)):
-            raise ValueError("Task does not match with task format regex.")
-        tasks: TaskSearch = TaskSearch(**found.groupdict())
-        # NOTE: Registry object should implement on this package only.
-        # TODO: This prefix value to search registry should dynamic with
-        #   config file.
-        rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
-        if tasks.func not in rgt:
-            raise NotImplementedError(
-                f"ddeutil.workflow.{tasks.path}.registries does not "
-                f"implement registry: {tasks.func}."
-            )
-        if tasks.tag not in rgt[tasks.func]:
-            raise NotImplementedError(
-                f"tag: {tasks.tag} does not found on registry func: "
-                f"ddeutil.workflow.{tasks.path}.registries."
-                f"{tasks.func}"
-            )
-        return rgt[tasks.func][tasks.tag]
-    def execute(self, params: DictData) -> DictData:
-        """Execute the Task function."""
-        task_caller = self.extract_task(self.task)()
-        if not callable(task_caller):
-            raise ImportError("Task caller function does not callable.")
-        # NOTE: check task caller parameters
-        ips = inspect.signature(task_caller)
-        if any(
-            k not in self.args
-            for k in ips.parameters
-            if ips.parameters[k].default == Parameter.empty
-        ):
-            raise ValueError(
-                f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
-                f"does not set to args"
-            )
-        try:
-            rs = task_caller(**map_params(self.args, params))
-        except Exception as err:
-            raise TaskException(f"{err.__class__.__name__}: {err}") from err
-        self.set_outputs(rs, params)
-        return params
-# NOTE: Order of parsing stage data
-Stage = Union[
-    PyStage,
-    ShellStage,
-    TaskStage,
-    EmptyStage,
-]
+from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
+from .exceptions import JobException, PipelineException
+from .loader import Loader
+from .on import On
+from .stage import Stage
+from .utils import Param, Result, cross_product, dash2underscore, gen_id
 class Strategy(BaseModel):
     """Strategy Model that will combine a matrix together for running the
     special job.
-    Examples:
+    Data Validate:
         >>> strategy = {
         ...     'matrix': {
         ...         'first': [1, 2, 3],
@@ -295,31 +40,114 @@ class Strategy(BaseModel):
     fail_fast: bool = Field(default=False)
     max_parallel: int = Field(default=-1)
-    matrix: dict[str, Union[list[str], list[int]]] = Field(default_factory=dict)
-    include: list[dict[str, Union[str, int]]] = Field(default_factory=list)
-    exclude: list[dict[str, Union[str, int]]] = Field(default_factory=list)
+    matrix: Matrix = Field(default_factory=dict)
+    include: MatrixInclude = Field(
+        default_factory=list,
+        description="A list of additional matrix that want to adds-in.",
+    )
+    exclude: MatrixExclude = Field(
+        default_factory=list,
+        description="A list of exclude matrix that want to filter-out.",
+    )
     @model_validator(mode="before")
     def __prepare_keys(cls, values: DictData) -> DictData:
-        if "max-parallel" in values:
-            values["max_parallel"] = values.pop("max-parallel")
-        if "fail-fast" in values:
-            values["fail_fast"] = values.pop("fail-fast")
+        """Rename key that use dash to underscore because Python does not
+        support this character exist in any variable name.
+        """
+        dash2underscore("max-parallel", values)
+        dash2underscore("fail-fast", values)
         return values
+    def make(self) -> list[DictStr]:
+        """Return List of product of matrix values that already filter with
+        exclude and add include.
+        :rtype: list[DictStr]
+        """
+        # NOTE: If it does not set matrix, it will return list of an empty dict.
+        if not (mt := self.matrix):
+            return [{}]
+        final: list[DictStr] = []
+        for r in cross_product(matrix=mt):
+            if any(
+                all(r[k] == v for k, v in exclude.items())
+                for exclude in self.exclude
+            ):
+                continue
+            final.append(r)
+        # NOTE: If it is empty matrix and include, it will return list of an
+        #   empty dict.
+        if not final and not self.include:
+            return [{}]
+        # NOTE: Add include to generated matrix with exclude list.
+        add: list[DictStr] = []
+        for include in self.include:
+            # VALIDATE:
+            #   Validate any key in include list should be a subset of some one
+            #   in matrix.
+            if all(not (set(include.keys()) <= set(m.keys())) for m in final):
+                raise ValueError("Include should have the keys equal to matrix")
+            # VALIDATE:
+            #   Validate value of include does not duplicate with generated
+            #   matrix.
+            if any(
+                all(include.get(k) == v for k, v in m.items())
+                for m in [*final, *add]
+            ):
+                continue
+            add.append(include)
+        final.extend(add)
+        return final
 class Job(BaseModel):
-    """Job Model"""
+    """Job Model (group of stages).
+        This job model allow you to use for-loop that call matrix strategy. If
+    you pass matrix mapping and it able to generate, you will see it running
+    with loop of matrix values.
+    Data Validate:
+        >>> job = {
+        ...     "runs-on": None,
+        ...     "strategy": {},
+        ...     "needs": [],
+        ...     "stages": [
+        ...         {
+        ...             "name": "Some stage",
+        ...             "run": "print('Hello World')",
+        ...         },
+        ...     ],
+        ... }
+    """
+    name: Optional[str] = Field(default=None)
+    desc: Optional[str] = Field(default=None)
     runs_on: Optional[str] = Field(default=None)
-    stages: list[Stage] = Field(default_factory=list)
-    needs: list[str] = Field(default_factory=list)
-    strategy: Strategy = Field(default_factory=Strategy)
+    stages: list[Stage] = Field(
+        default_factory=list,
+        description="A list of Stage of this job.",
+    )
+    needs: list[str] = Field(
+        default_factory=list,
+        description="A list of the job ID that want to run before this job.",
+    )
+    strategy: Strategy = Field(
+        default_factory=Strategy,
+        description="A strategy matrix that want to generate.",
+    )
     @model_validator(mode="before")
     def __prepare_keys(cls, values: DictData) -> DictData:
-        if "runs-on" in values:
-            values["runs_on"] = values.pop("runs-on")
+        """Rename key that use dash to underscore because Python does not
+        support this character exist in any variable name.
+        """
+        dash2underscore("runs-on", values)
         return values
     def stage(self, stage_id: str) -> Stage:
@@ -329,57 +157,118 @@ class Job(BaseModel):
                 return stage
         raise ValueError(f"Stage ID {stage_id} does not exists")
-    def make_strategy(self) -> list[DictStr]:
-        """Return List of combination of matrix values that already filter with
-        exclude and add include values.
-        """
-        if not (mt := self.strategy.matrix):
-            return [{}]
-        final: list[DictStr] = []
-        for r in [
-            {_k: _v for e in mapped for _k, _v in e.items()}
-            for mapped in itertools.product(
-                *[[{k: v} for v in vs] for k, vs in mt.items()]
-            )
-        ]:
-            if any(
-                all(r[k] == v for k, v in exclude.items())
-                for exclude in self.strategy.exclude
-            ):
-                continue
-            final.append(r)
+    @staticmethod
+    def set_outputs(output: DictData) -> DictData:
+        if len(output) > 1:
+            return {"strategies": output}
-        if not final:
-            return [{}]
+        return output[next(iter(output))]
-        for include in self.strategy.include:
-            if include.keys() != final[0].keys():
-                raise ValueError("Include should have the keys equal to matrix")
-            if any(all(include[k] == v for k, v in f.items()) for f in final):
+    def strategy_execute(self, strategy: DictData, params: DictData) -> Result:
+        context: DictData = {}
+        context.update(params)
+        context.update({"matrix": strategy})
+        for stage in self.stages:
+            _st_name: str = stage.id or stage.name
+            if stage.is_skip(params=context):
+                logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
                 continue
-            final.append(include)
-        return final
+            logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
+            rs: Result = stage.execute(params=context)
+            if rs.status == 0:
+                stage.set_outputs(rs.context, params=context)
+            else:
+                raise JobException(
+                    f"Getting status does not equal zero on stage: "
+                    f"{stage.name}."
+                )
+        return Result(
+            status=0,
+            context={
+                gen_id(strategy): {
+                    "matrix": strategy,
+                    "stages": context.pop("stages", {}),
+                },
+            },
+        )
-    def execute(self, params: DictData | None = None) -> DictData:
-        """Execute job with passing dynamic parameters from the pipeline."""
-        for strategy in self.make_strategy():
-            params.update({"matrix": strategy})
+    def execute(self, params: DictData | None = None) -> Result:
+        """Job execution with passing dynamic parameters from the pipeline
+        execution. It will generate matrix values at the first step and for-loop
+        any metrix to all stages dependency.
+        :param params: An input parameters that use on job execution.
+        :rtype: Result
+        """
+        strategy_context: DictData = {}
+        for strategy in self.strategy.make():
+            # NOTE: Create strategy context and update matrix and params to this
+            #   context. So, the context will have structure like;
+            #   ---
+            #   {
+            #       "params": { ... },      <== Current input params
+            #       "jobs": { ... },
+            #       "matrix": { ... }       <== Current strategy value
+            #   }
+            #
+            context: DictData = {}
+            context.update(params)
+            context.update({"matrix": strategy})
+            # TODO: we should add option for ``wait_as_complete`` for release
+            #   a stage execution to run on background (multi-thread).
+            #   ---
+            #   >>> from concurrency
+            #
             # IMPORTANT: The stage execution only run sequentially one-by-one.
             for stage in self.stages:
-                logging.info(
-                    f"[JOB]: Start execute the stage: "
-                    f"{(stage.id if stage.id else stage.name)!r}"
-                )
+                _st_name: str = stage.id or stage.name
+                if stage.is_skip(params=context):
+                    logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
+                    continue
+                logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
+                # NOTE: Logging a matrix that pass on this stage execution.
+                if strategy:
+                    logging.info(f"[...]: Matrix: {strategy}")
                 # NOTE:
                 #       I do not use below syntax because `params` dict be the
                 #   reference memory pointer and it was changed when I action
                 #   anything like update or re-construct this.
+                #
                 #       ... params |= stage.execute(params=params)
-                stage.execute(params=params)
-        # TODO: We should not return matrix key to outside
-        return params
+                #
+                #   This step will add the stage result to ``stages`` key in
+                #   that stage id. It will have structure like;
+                #   ---
+                #   {
+                #       "params": { ... },
+                #       "jobs": { ... },
+                #       "matrix": { ... },
+                #       "stages": { { "stage-id-1": ... }, ... }
+                #   }
+                #
+                rs: Result = stage.execute(params=context)
+                if rs.status == 0:
+                    stage.set_outputs(rs.context, params=context)
+                else:
+                    raise JobException(
+                        f"Getting status does not equal zero on stage: "
+                        f"{stage.name}."
+                    )
+            strategy_context[gen_id(strategy)] = {
+                "matrix": strategy,
+                "stages": context.pop("stages", {}),
+            }
+        return Result(status=0, context=strategy_context)
 class Pipeline(BaseModel):
@@ -388,11 +277,74 @@ class Pipeline(BaseModel):
     coding line to execute it.
     """
-    params: dict[str, Params] = Field(default_factory=dict)
-    jobs: dict[str, Job]
+    name: str = Field(description="A pipeline name.")
+    desc: Optional[str] = Field(
+        default=None,
+        description=(
+            "A pipeline description that is able to be string of markdown "
+            "content."
+        ),
+    )
+    params: dict[str, Param] = Field(
+        default_factory=dict,
+        description="A parameters that want to use on this pipeline.",
+    )
+    on: list[On] = Field(
+        default_factory=list,
+        description="A list of On instance for this pipeline schedule.",
+    )
+    jobs: dict[str, Job] = Field(
+        default_factory=dict,
+        description="A mapping of job ID and job model that already loaded.",
+    )
+    @classmethod
+    def from_loader(
+        cls,
+        name: str,
+        externals: DictData | None = None,
+    ) -> Self:
+        """Create Pipeline instance from the Loader object.
+        :param name: A pipeline name that want to pass to Loader object.
+        :param externals: An external parameters that want to pass to Loader
+            object.
+        """
+        loader: Loader = Loader(name, externals=(externals or {}))
+        loader_data: DictData = copy.deepcopy(loader.data)
+        # NOTE: Add name to loader data
+        loader_data["name"] = name.replace(" ", "_")
+        if "jobs" not in loader_data:
+            raise ValueError("Config does not set ``jobs`` value")
+        # NOTE: Prepare `on` data
+        cls.__bypass_on(loader_data)
+        return cls.model_validate(loader_data)
+    @classmethod
+    def __bypass_on(cls, data: DictData, externals: DictData | None = None):
+        """Bypass the on data to loaded config data."""
+        if on := data.pop("on", []):
+            if isinstance(on, str):
+                on = [on]
+            if any(not isinstance(i, (dict, str)) for i in on):
+                raise TypeError("The ``on`` key should be list of str or dict")
+            data["on"] = [
+                (
+                    Loader(n, externals=(externals or {})).data
+                    if isinstance(n, str)
+                    else n
+                )
+                for n in on
+            ]
+        return data
     @model_validator(mode="before")
     def __prepare_params(cls, values: DictData) -> DictData:
+        """Prepare the params key."""
+        # NOTE: Prepare params type if it passing with only type value.
         if params := values.pop("params", {}):
             values["params"] = {
                 p: (
@@ -404,24 +356,6 @@ class Pipeline(BaseModel):
             }
         return values
-    @classmethod
-    def from_loader(
-        cls,
-        name: str,
-        externals: Optional[DictData] = None,
-    ) -> Self:
-        loader: Loader = Loader(name, externals=(externals or {}))
-        if "jobs" not in loader.data:
-            raise ValueError("Config does not set ``jobs`` value")
-        return cls(
-            jobs=loader.data["jobs"],
-            params=loader.data["params"],
-        )
-    @model_validator(mode="after")
-    def job_checking_needs(self):
-        return self
     def job(self, name: str) -> Job:
         """Return Job model that exists on this pipeline.
@@ -435,17 +369,53 @@ class Pipeline(BaseModel):
             raise ValueError(f"Job {name!r} does not exists")
         return self.jobs[name]
+    def parameterize(self, params: DictData) -> DictData:
+        """Prepare parameters before passing to execution process. This method
+        will create jobs key to params mapping that will keep any result from
+        job execution.
+        :param params: A parameter mapping that receive from pipeline execution.
+        """
+        # VALIDATE: Incoming params should have keys that set on this pipeline.
+        if check_key := tuple(
+            f"{k!r}"
+            for k in self.params
+            if (k not in params and self.params[k].required)
+        ):
+            raise ValueError(
+                f"Required Param on this pipeline setting does not set: "
+                f"{', '.join(check_key)}."
+            )
+        # NOTE: mapping type of param before adding it to params variable.
+        return {
+            "params": (
+                params
+                | {
+                    k: self.params[k].receive(params[k])
+                    for k in params
+                    if k in self.params
+                }
+            ),
+            "jobs": {},
+        }
     def execute(
         self,
         params: DictData | None = None,
-        time_out: int = 60,
-    ) -> DictData:
+        *,
+        timeout: int = 60,
+    ) -> Result:
         """Execute pipeline with passing dynamic parameters to any jobs that
         included in the pipeline.
-        :param params: An input parameters that use on pipeline execution.
-        :param time_out: A time out second value for limit time of this
-            execution.
+        :param params: An input parameters that use on pipeline execution that
+            will parameterize before using it.
+        :param timeout: A pipeline execution time out in second unit that use
+            for limit time of execution and waiting job dependency.
+        :rtype: Result
+        ---
         See Also:
@@ -456,41 +426,36 @@ class Pipeline(BaseModel):
             For example, when I want to use the output from previous stage, I
         can access it with syntax:
-            ... "<job-name>.stages.<stage-id>.outputs.<key>"
+            ... ${job-name}.stages.${stage-id}.outputs.${key}
         """
+        logging.info(
+            f"[CORE]: Start Pipeline {self.name}:"
+            f"{gen_id(self.name, unique=True)}"
+        )
         params: DictData = params or {}
-        if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
-            raise ValueError(
-                f"Parameters that needed on pipeline does not pass: "
-                f"{', '.join(check_key)}."
-            )
-        if any(p not in params for p in self.params if self.params[p].required):
-            raise ValueError("Required parameter does not pass")
+        # NOTE: It should not do anything if it does not have job.
+        if not self.jobs:
+            logging.warning("[PIPELINE]: This pipeline does not have any jobs")
+            return Result(status=0, context=params)
-        params: DictData = {
-            "params": (
-                params
-                | {
-                    k: self.params[k].receive(params[k])
-                    for k in params
-                    if k in self.params
-                }
-            ),
-            "jobs": {},
-        }
-        jq = Queue()
+        # NOTE: create a job queue that keep the job that want to running after
+        #   it dependency condition.
+        jq: Queue = Queue()
         for job_id in self.jobs:
             jq.put(job_id)
         ts: float = time.monotonic()
-        not_time_out_flag = True
+        not_time_out_flag: bool = True
+        # NOTE: Create result context that will pass this context to any
+        #   execution dependency.
+        rs: Result = Result(context=self.parameterize(params))
         # IMPORTANT: The job execution can run parallel and waiting by needed.
         while not jq.empty() and (
-            not_time_out_flag := ((time.monotonic() - ts) < time_out)
+            not_time_out_flag := ((time.monotonic() - ts) < timeout)
         ):
             job_id: str = jq.get()
             logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
@@ -504,14 +469,29 @@ class Pipeline(BaseModel):
             #   >>> import multiprocessing
             #   >>> with multiprocessing.Pool(processes=3) as pool:
             #   ...     results = pool.starmap(merge_names, ('', '', ...))
+            #   ---
+            #   This case we use multi-process because I want to split usage of
+            #   data in this level, that mean the data that push to parallel job
+            #   should not use across another job.
             #
-            if any(params["jobs"].get(need) for need in job.needs):
+            if any(rs.context["jobs"].get(need) for need in job.needs):
                 jq.put(job_id)
-            job.execute(params=params)
-            params["jobs"][job_id] = {
-                "stages": params.pop("stages", {}),
-                "matrix": params.pop("matrix", {}),
-            }
+            # NOTE: copy current the result context for reference other job
+            #   context.
+            job_context: DictData = copy.deepcopy(rs.context)
+            job_rs: Result = job.execute(params=job_context)
+            if job_rs.status == 0:
+                # NOTE: Receive output of job execution.
+                rs.context["jobs"][job_id] = job.set_outputs(job_rs.context)
+            else:
+                raise PipelineException(
+                    f"Getting status does not equal zero on job: {job_id}."
+                )
         if not not_time_out_flag:
-            raise RuntimeError("Execution of pipeline was time out")
-        return params
+            logging.warning("Execution of pipeline was time out")
+            rs.status = 1
+            return rs
+        rs.status = 0
+        return rs

ddeutil-workflow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

ddeutil-workflow 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl