PyPI - ddeutil-workflow - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl - Mend

ddeutil-workflow 0.0.8py3-none-any.whl → 0.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +3 -14
ddeutil/workflow/api.py +44 -75
ddeutil/workflow/cli.py +134 -0
ddeutil/workflow/cron.py +803 -0
ddeutil/workflow/exceptions.py +3 -0
ddeutil/workflow/log.py +152 -47
ddeutil/workflow/on.py +27 -18
ddeutil/workflow/pipeline.py +527 -234
ddeutil/workflow/repeat.py +71 -40
ddeutil/workflow/route.py +77 -63
ddeutil/workflow/scheduler.py +523 -616
ddeutil/workflow/stage.py +158 -82
ddeutil/workflow/utils.py +273 -46
ddeutil_workflow-0.0.10.dist-info/METADATA +182 -0
ddeutil_workflow-0.0.10.dist-info/RECORD +21 -0
{ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/WHEEL +1 -1
ddeutil_workflow-0.0.10.dist-info/entry_points.txt +2 -0
ddeutil/workflow/app.py +0 -45
ddeutil/workflow/loader.py +0 -80
ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
{ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/top_level.txt +0 -0

ddeutil/workflow/pipeline.py CHANGED Viewed

@@ -6,47 +6,67 @@
 from __future__ import annotations
 import copy
-import logging
 import os
 import time
 from concurrent.futures import (
     FIRST_EXCEPTION,
     Future,
-    ProcessPoolExecutor,
     ThreadPoolExecutor,
     as_completed,
     wait,
 )
-from datetime import datetime
-from multiprocessing import Event, Manager
+from datetime import datetime, timedelta
+from heapq import heappush
 from pickle import PickleError
 from queue import Queue
+from textwrap import dedent
+from threading import Event
 from typing import Optional
 from zoneinfo import ZoneInfo
 from pydantic import BaseModel, Field
-from pydantic.functional_validators import model_validator
+from pydantic.functional_validators import field_validator, model_validator
 from typing_extensions import Self
-from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
+from .__types import (
+    DictData,
+    DictStr,
+    Matrix,
+    MatrixExclude,
+    MatrixInclude,
+    TupleStr,
+)
+from .cron import CronRunner
 from .exceptions import (
     JobException,
     PipelineException,
     StageException,
     UtilException,
 )
-from .loader import Loader
+from .log import FileLog, Log, get_logger
 from .on import On
-from .scheduler import CronRunner
 from .stage import Stage
 from .utils import (
+    Loader,
     Param,
     Result,
     cross_product,
     dash2underscore,
+    delay,
     filter_func,
     gen_id,
     get_diff_sec,
+    has_template,
+    param2template,
+)
+logger = get_logger("ddeutil.workflow")
+__all__: TupleStr = (
+    "Strategy",
+    "Job",
+    "Pipeline",
 )
@@ -67,9 +87,25 @@ class Strategy(BaseModel):
         ... }
     """
-    fail_fast: bool = Field(default=False)
-    max_parallel: int = Field(default=1, gt=0)
-    matrix: Matrix = Field(default_factory=dict)
+    fail_fast: bool = Field(
+        default=False,
+        serialization_alias="fail-fast",
+    )
+    max_parallel: int = Field(
+        default=1,
+        gt=0,
+        description=(
+            "The maximum number of executor thread pool that want to run "
+            "parallel"
+        ),
+        serialization_alias="max-parallel",
+    )
+    matrix: Matrix = Field(
+        default_factory=dict,
+        description=(
+            "A matrix values that want to cross product to possible strategies."
+        ),
+    )
     include: MatrixInclude = Field(
         default_factory=list,
         description="A list of additional matrix that want to adds-in.",
@@ -166,9 +202,22 @@ class Job(BaseModel):
         ... }
     """
-    id: Optional[str] = Field(default=None)
-    desc: Optional[str] = Field(default=None)
-    runs_on: Optional[str] = Field(default=None)
+    id: Optional[str] = Field(
+        default=None,
+        description=(
+            "A job ID, this value will add from pipeline after validation "
+            "process."
+        ),
+    )
+    desc: Optional[str] = Field(
+        default=None,
+        description="A job description that can be string of markdown content.",
+    )
+    runs_on: Optional[str] = Field(
+        default=None,
+        description="A target executor node for this job use to execution.",
+        serialization_alias="runs-on",
+    )
     stages: list[Stage] = Field(
         default_factory=list,
         description="A list of Stage of this job.",
@@ -182,7 +231,10 @@ class Job(BaseModel):
         description="A strategy matrix that want to generate.",
     )
     run_id: Optional[str] = Field(
-        default=None, description="A running job ID.", repr=False
+        default=None,
+        description="A running job ID.",
+        repr=False,
+        exclude=True,
     )
     @model_validator(mode="before")
@@ -193,12 +245,31 @@ class Job(BaseModel):
         dash2underscore("runs-on", values)
         return values
+    @field_validator("desc", mode="after")
+    def ___prepare_desc(cls, value: str) -> str:
+        """Prepare description string that was created on a template."""
+        return dedent(value)
     @model_validator(mode="after")
     def __prepare_running_id(self):
         if self.run_id is None:
             self.run_id = gen_id(self.id or "", unique=True)
+        # VALIDATE: Validate job id should not dynamic with params template.
+        if has_template(self.id):
+            raise ValueError("Job ID should not has any template.")
         return self
+    def get_running_id(self, run_id: str) -> Self:
+        """Return Job model object that changing job running ID with an
+        input running ID.
+        :param run_id: A replace job running ID.
+        :rtype: Self
+        """
+        return self.model_copy(update={"run_id": run_id})
     def stage(self, stage_id: str) -> Stage:
         """Return stage model that match with an input stage ID."""
         for stage in self.stages:
@@ -207,12 +278,12 @@ class Job(BaseModel):
         raise ValueError(f"Stage ID {stage_id} does not exists")
     def set_outputs(self, output: DictData) -> DictData:
+        """Setting output of job execution"""
         if len(output) > 1 and self.strategy.is_set():
             return {"strategies": output}
         return output[next(iter(output))]
-    def strategy_execute(
+    def execute_strategy(
         self,
         strategy: DictData,
         params: DictData,
@@ -232,6 +303,7 @@ class Job(BaseModel):
         :raise JobException: If it has any error from StageException or
             UtilException.
         """
+        # NOTE: Force stop this execution if event was set from main execution.
         if event and event.is_set():
             return Result(
                 status=1,
@@ -239,7 +311,7 @@ class Job(BaseModel):
                     gen_id(strategy): {
                         "matrix": strategy,
                         "stages": {},
-                        "error": {
+                        "error_message": {
                             "message": "Process Event stopped before execution"
                         },
                     },
@@ -262,23 +334,23 @@ class Job(BaseModel):
         for stage in self.stages:
             # IMPORTANT: Change any stage running IDs to this job running ID.
-            stage.run_id = self.run_id
+            stage: Stage = stage.get_running_id(self.run_id)
             _st_name: str = stage.id or stage.name
             if stage.is_skipped(params=context):
-                logging.info(
+                logger.info(
                     f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
                 )
                 continue
-            logging.info(
+            logger.info(
                 f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
             )
             # NOTE: Logging a matrix that pass on this stage execution.
             if strategy:
-                logging.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
+                logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
             # NOTE:
             #       I do not use below syntax because `params` dict be the
@@ -303,8 +375,12 @@ class Job(BaseModel):
                     context={
                         gen_id(strategy): {
                             "matrix": strategy,
-                            "stages": filter_func(context.pop("stages", {})),
-                            "error": {
+                            # NOTE: If job strategy executor use multithreading,
+                            #   it will not filter function object from context.
+                            # ---
+                            # "stages": filter_func(context.pop("stages", {})),
+                            "stages": context.pop("stages", {}),
+                            "error_message": {
                                 "message": (
                                     "Process Event stopped before execution"
                                 ),
@@ -314,15 +390,20 @@ class Job(BaseModel):
                 )
             try:
                 rs: Result = stage.execute(params=context)
-                stage.set_outputs(rs.context, params=context)
+                stage.set_outputs(rs.context, to=context)
             except (StageException, UtilException) as err:
-                logging.error(
+                logger.error(
                     f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
                 )
                 raise JobException(
                     f"Get stage execution error: {err.__class__.__name__}: "
                     f"{err}"
                 ) from None
+            # NOTE: Remove new stage object that was created from
+            #   ``get_running_id`` method.
+            del stage
         return Result(
             status=0,
             context={
@@ -345,109 +426,132 @@ class Job(BaseModel):
         :param params: An input parameters that use on job execution.
         :rtype: Result
         """
-        strategy_context: DictData = {}
+        context: DictData = {}
         # NOTE: Normal Job execution.
         if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
             for strategy in self.strategy.make():
-                rs: Result = self.strategy_execute(
+                rs: Result = self.execute_strategy(
                     strategy, params=copy.deepcopy(params)
                 )
-                strategy_context.update(rs.context)
+                context.update(rs.context)
             return Result(
                 status=0,
-                context=strategy_context,
+                context=context,
             )
-        # WARNING: (WF001) I got error that raise when use
-        #  ``ProcessPoolExecutor``;
-        #   ---
-        #   _pickle.PicklingError: Can't pickle
-        #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
-        #       on ddeutil.workflow.stage failed
+        # # WARNING: (WF001) I got error that raise when use
+        # #  ``ProcessPoolExecutor``;
+        # #   ---
+        # #   _pickle.PicklingError: Can't pickle
+        # #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
+        # #       on ddeutil.workflow.stage failed
+        # #
+        # # from multiprocessing import Event, Manager
+        # with Manager() as manager:
+        #     event: Event = manager.Event()
         #
-        with Manager() as manager:
-            event: Event = manager.Event()
-            # NOTE: Start process pool executor for running strategy executor in
-            #   parallel mode.
-            with ProcessPoolExecutor(
-                max_workers=self.strategy.max_parallel
-            ) as executor:
-                features: list[Future] = [
-                    executor.submit(
-                        self.strategy_execute,
-                        strategy,
-                        params=copy.deepcopy(params),
-                        event=event,
-                    )
-                    for strategy in self.strategy.make()
-                ]
-                if self.strategy.fail_fast:
-                    rs = self.__catch_fail_fast(event, features)
-                else:
-                    rs = self.__catch_all_completed(features)
+        #     # NOTE: Start process pool executor for running strategy executor
+        #     #   in parallel mode.
+        #     with ProcessPoolExecutor(
+        #         max_workers=self.strategy.max_parallel
+        #     ) as executor:
+        #         futures: list[Future] = [
+        #             executor.submit(
+        #                 self.execute_strategy,
+        #                 strategy,
+        #                 params=copy.deepcopy(params),
+        #                 event=event,
+        #             )
+        #             for strategy in self.strategy.make()
+        #         ]
+        #         if self.strategy.fail_fast:
+        #             rs = self.__catch_fail_fast(event, futures)
+        #         else:
+        #             rs = self.__catch_all_completed(futures)
+        # NOTE: Create event for cancel executor stop running.
+        event: Event = Event()
+        with ThreadPoolExecutor(
+            max_workers=self.strategy.max_parallel
+        ) as executor:
+            futures: list[Future] = [
+                executor.submit(
+                    self.execute_strategy,
+                    strategy,
+                    params=copy.deepcopy(params),
+                    event=event,
+                )
+                for strategy in self.strategy.make()
+            ]
+            # NOTE: Dynamic catching futures object with fail-fast flag.
+            if self.strategy.fail_fast:
+                rs: Result = self.__catch_fail_fast(event, futures)
+            else:
+                rs: Result = self.__catch_all_completed(futures)
         return Result(
             status=0,
             context=rs.context,
         )
-    def __catch_fail_fast(self, event: Event, features: list[Future]) -> Result:
-        """Job parallel pool features catching with fail-fast mode. That will
-        stop all not done features if it receive the first exception from all
-        running features.
+    def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
+        """Job parallel pool futures catching with fail-fast mode. That will
+        stop all not done futures if it receive the first exception from all
+        running futures.
         :param event:
-        :param features: A list of features.
+        :param futures: A list of futures.
         :rtype: Result
         """
-        strategy_context: DictData = {}
+        context: DictData = {}
         # NOTE: Get results from a collection of tasks with a
         #   timeout that has the first exception.
         done, not_done = wait(
-            features, timeout=1800, return_when=FIRST_EXCEPTION
+            futures, timeout=1800, return_when=FIRST_EXCEPTION
         )
         nd: str = (
             f", the strategies do not run is {not_done}" if not_done else ""
         )
-        logging.debug(f"[JOB]: Strategy is set Fail Fast{nd}")
+        logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
+        if len(done) != len(futures):
-        # NOTE: Stop all running tasks
-        event.set()
+            # NOTE: Stop all running tasks
+            event.set()
-        # NOTE: Cancel any scheduled tasks
-        for future in features:
-            future.cancel()
+            # NOTE: Cancel any scheduled tasks
+            for future in futures:
+                future.cancel()
         status: int = 0
-        for f in done:
-            if f.exception():
+        for future in done:
+            if future.exception():
                 status = 1
-                logging.error(
+                logger.error(
                     f"({self.run_id}) [JOB]: One stage failed with: "
-                    f"{f.exception()}, shutting down this feature."
+                    f"{future.exception()}, shutting down this future."
                 )
-            elif f.cancelled():
+            elif future.cancelled():
                 continue
             else:
-                rs: Result = f.result(timeout=60)
-                strategy_context.update(rs.context)
-        return Result(
-            status=status,
-            context=strategy_context,
-        )
+                rs: Result = future.result(timeout=60)
+                context.update(rs.context)
+        return Result(status=status, context=context)
-    def __catch_all_completed(self, features: list[Future]) -> Result:
-        """Job parallel pool features catching with all-completed mode.
+    def __catch_all_completed(self, futures: list[Future]) -> Result:
+        """Job parallel pool futures catching with all-completed mode.
-        :param features: A list of features.
+        :param futures: A list of futures.
+        :rtype: Result
         """
-        strategy_context: DictData = {}
+        context: DictData = {}
         status: int = 0
-        for feature in as_completed(features):
+        for future in as_completed(futures):
             try:
-                rs: Result = feature.result(timeout=60)
-                strategy_context.update(rs.context)
+                rs: Result = future.result(timeout=60)
+                context.update(rs.context)
             except PickleError as err:
                 # NOTE: (WF001) I do not want to fix this issue because
                 #   it does not make sense and over-engineering with
@@ -458,34 +562,42 @@ class Job(BaseModel):
                 ) from None
             except TimeoutError:
                 status = 1
-                logging.warning("Task is hanging. Attempting to kill.")
-                feature.cancel()
-                if not feature.cancelled():
-                    logging.warning("Failed to cancel the task.")
+                logger.warning(
+                    f"({self.run_id}) [JOB]: Task is hanging. Attempting to "
+                    f"kill."
+                )
+                future.cancel()
+                time.sleep(0.1)
+                if not future.cancelled():
+                    logger.warning(
+                        f"({self.run_id}) [JOB]: Failed to cancel the task."
+                    )
                 else:
-                    logging.warning("Task canceled successfully.")
+                    logger.warning(
+                        f"({self.run_id}) [JOB]: Task canceled successfully."
+                    )
             except JobException as err:
                 status = 1
-                logging.error(
+                logger.error(
                     f"({self.run_id}) [JOB]: Get stage exception with "
                     f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
                     f"{err}"
                 )
-        return Result(status=status, context=strategy_context)
+        return Result(status=status, context=context)
 class Pipeline(BaseModel):
-    """Pipeline Model this is the main feature of this project because it use to
-    be workflow data for running everywhere that you want. It use lightweight
-    coding line to execute it.
+    """Pipeline Model this is the main future of this project because it use to
+    be workflow data for running everywhere that you want or using it to
+    scheduler task in background. It use lightweight coding line from Pydantic
+    Model and enhance execute method on it.
     """
     name: str = Field(description="A pipeline name.")
     desc: Optional[str] = Field(
         default=None,
         description=(
-            "A pipeline description that is able to be string of markdown "
-            "content."
+            "A pipeline description that can be string of markdown content."
         ),
     )
     params: dict[str, Param] = Field(
@@ -501,33 +613,46 @@ class Pipeline(BaseModel):
         description="A mapping of job ID and job model that already loaded.",
     )
     run_id: Optional[str] = Field(
-        default=None, description="A running job ID.", repr=False
+        default=None,
+        description="A running pipeline ID.",
+        repr=False,
+        exclude=True,
     )
+    @property
+    def new_run_id(self) -> str:
+        """Running ID of this pipeline that always generate new unique value."""
+        return gen_id(self.name, unique=True)
     @classmethod
     def from_loader(
         cls,
         name: str,
         externals: DictData | None = None,
     ) -> Self:
-        """Create Pipeline instance from the Loader object.
+        """Create Pipeline instance from the Loader object that only receive
+        an input pipeline name. The loader object will use this pipeline name to
+        searching configuration data of this pipeline model in conf path.
         :param name: A pipeline name that want to pass to Loader object.
         :param externals: An external parameters that want to pass to Loader
             object.
+        :rtype: Self
         """
         loader: Loader = Loader(name, externals=(externals or {}))
+        # NOTE: Validate the config type match with current connection model
+        if loader.type != cls:
+            raise ValueError(f"Type {loader.type} does not match with {cls}")
         loader_data: DictData = copy.deepcopy(loader.data)
         # NOTE: Add name to loader data
         loader_data["name"] = name.replace(" ", "_")
-        if "jobs" not in loader_data:
-            raise ValueError("Config does not set ``jobs`` value")
         # NOTE: Prepare `on` data
         cls.__bypass_on(loader_data)
-        return cls.model_validate(loader_data)
+        return cls.model_validate(obj=loader_data)
     @classmethod
     def __bypass_on(cls, data: DictData, externals: DictData | None = None):
@@ -537,6 +662,8 @@ class Pipeline(BaseModel):
                 on = [on]
             if any(not isinstance(i, (dict, str)) for i in on):
                 raise TypeError("The ``on`` key should be list of str or dict")
+            # NOTE: Pass on value to Loader and keep on model object to on field
             data["on"] = [
                 (
                     Loader(n, externals=(externals or {})).data
@@ -562,25 +689,48 @@ class Pipeline(BaseModel):
             }
         return values
+    @field_validator("desc", mode="after")
+    def ___prepare_desc(cls, value: str) -> str:
+        """Prepare description string that was created on a template."""
+        return dedent(value)
     @model_validator(mode="after")
     def __validate_jobs_need_and_prepare_running_id(self):
+        """Validate each need job in any jobs should exists."""
         for job in self.jobs:
             if not_exist := [
                 need for need in self.jobs[job].needs if need not in self.jobs
             ]:
                 raise PipelineException(
                     f"This needed jobs: {not_exist} do not exist in this "
-                    f"pipeline."
+                    f"pipeline, {self.name!r}"
                 )
             # NOTE: update a job id with its job id from pipeline template
             self.jobs[job].id = job
         if self.run_id is None:
-            self.run_id = gen_id(self.name, unique=True)
+            self.run_id = self.new_run_id
+        # VALIDATE: Validate pipeline name should not dynamic with params
+        #   template.
+        if has_template(self.name):
+            raise ValueError(
+                f"Pipeline name should not has any template, please check, "
+                f"{self.name!r}."
+            )
         return self
+    def get_running_id(self, run_id: str) -> Self:
+        """Return Pipeline model object that changing pipeline running ID with
+        an input running ID.
+        :param run_id: A replace pipeline running ID.
+        :rtype: Self
+        """
+        return self.model_copy(update={"run_id": run_id})
     def job(self, name: str) -> Job:
         """Return Job model that exists on this pipeline.
@@ -591,7 +741,10 @@ class Pipeline(BaseModel):
         :returns: A job model that exists on this pipeline by input name.
         """
         if name not in self.jobs:
-            raise ValueError(f"Job {name!r} does not exists")
+            raise ValueError(
+                f"A Job {name!r} does not exists in this pipeline, "
+                f"{self.name!r}"
+            )
         return self.jobs[name]
     def parameterize(self, params: DictData) -> DictData:
@@ -629,95 +782,213 @@ class Pipeline(BaseModel):
     def release(
         self,
         on: On,
-        params: DictData | None = None,
+        params: DictData,
+        queue: list[datetime],
         *,
-        waiting_sec: int = 600,
-        sleep_interval: int = 10,
-    ) -> str:
+        waiting_sec: int = 60,
+        sleep_interval: int = 15,
+        log: Log = None,
+    ) -> Result:
         """Start running pipeline with the on schedule in period of 30 minutes.
         That mean it will still running at background 30 minutes until the
         schedule matching with its time.
+            This method allow pipeline use log object to save the execution
+        result to log destination like file log to local `/logs` directory.
+        :param on: An on schedule value.
+        :param params: A pipeline parameter that pass to execute method.
+        :param queue: A list of release time that already running.
+        :param waiting_sec: A second period value that allow pipeline execute.
+        :param sleep_interval: A second value that want to waiting until time
+            to execute.
+        :param log: A log object that want to save execution result.
+        :rtype: Result
         """
-        params: DictData = params or {}
-        logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
+        log: Log = log or FileLog
+        tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
+        gen: CronRunner = on.generate(
+            datetime.now(tz=tz).replace(second=0, microsecond=0)
+            + timedelta(seconds=1)
+        )
+        cron_tz: ZoneInfo = gen.tz
-        gen: CronRunner = on.generate(datetime.now())
-        tz: ZoneInfo = gen.tz
-        next_running_time: datetime = gen.next
+        # NOTE: get next schedule time that generate from now.
+        next_time: datetime = gen.next
-        if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
-            logging.debug(
-                f"[CORE]: {self.name} closely to run >> "
-                f"{next_running_time:%Y-%m-%d %H:%M:%S}"
+        # NOTE: get next utils it does not logger.
+        while log.is_pointed(self.name, next_time, queue=queue):
+            next_time: datetime = gen.next
+        # NOTE: push this next running time to log queue
+        heappush(queue, next_time)
+        # VALIDATE: Check the different time between the next schedule time and
+        #   now that less than waiting period (second unit).
+        if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
+            logger.debug(
+                f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+                f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
             )
-            # NOTE: Release when the time is nearly to schedule time.
-            while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
-                time.sleep(sleep_interval)
-                logging.debug(
-                    f"[CORE]: {self.name!r} : Sleep until: {duration}"
-                )
+            # NOTE: Remove next datetime from queue.
+            queue.remove(next_time)
+            time.sleep(0.15)
+            return Result(
+                status=0,
+                context={
+                    "params": params,
+                    "poking": {"skipped": [str(on.cronjob)], "run": []},
+                },
+            )
+        logger.debug(
+            f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+            f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
+        )
+        # NOTE: Release when the time is nearly to schedule time.
+        while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
+            sleep_interval + 5
+        ):
+            logger.debug(
+                f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+                f"Sleep until: {duration}"
+            )
+            time.sleep(sleep_interval)
+        time.sleep(0.5)
-            time.sleep(1)
-            rs: Result = self.execute(params=params)
-            logging.debug(f"{rs.context}")
+        # NOTE: Release parameter that use to change if params has
+        #   templating.
+        release_params: DictData = {
+            "release": {
+                "logical_date": next_time,
+            },
+        }
+        # WARNING: Re-create pipeline object that use new running pipeline
+        #   ID.
+        runner: Self = self.get_running_id(run_id=self.new_run_id)
+        rs: Result = runner.execute(
+            params=param2template(params, release_params),
+        )
+        logger.debug(
+            f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+            f"End release {next_time:%Y-%m-%d %H:%M:%S}"
+        )
-            return f"[CORE]: Start Execute: {self.name}"
-        return f"[CORE]: {self.name} does not closely to run yet."
+        # NOTE: Delete a copied pipeline instance for saving memory.
+        del runner
+        rs.set_parent_run_id(self.run_id)
+        rs_log: Log = log.model_validate(
+            {
+                "name": self.name,
+                "on": str(on.cronjob),
+                "release": next_time,
+                "context": rs.context,
+                "parent_run_id": rs.run_id,
+                "run_id": rs.run_id,
+            }
+        )
+        # NOTE: Saving execution result to destination of the input log object.
+        rs_log.save(excluded=None)
+        queue.remove(next_time)
+        time.sleep(0.05)
+        return Result(
+            status=0,
+            context={
+                "params": params,
+                "poking": {"skipped": [], "run": [str(on.cronjob)]},
+            },
+        )
-    def poke(self, params: DictData | None = None):
-        """Poke pipeline threading task for executing with its schedules that
-        was set on the `on`.
+    def poke(
+        self,
+        params: DictData | None = None,
+        *,
+        log: Log | None = None,
+    ) -> list[Result]:
+        """Poke pipeline with threading executor pool for executing with all its
+        schedules that was set on the `on` value. This method will observe its
+        schedule that nearing to run with the ``self.release()`` method.
+        :param params: A parameters that want to pass to the release method.
+        :param log: A log object that want to use on this poking process.
+        :rtype: list[Result]
         """
-        params: DictData = params or {}
-        logging.info(
-            f"[CORE]: Start Poking: {self.name!r} :"
-            f"{gen_id(self.name, unique=True)}"
+        logger.info(
+            f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
         )
-        results = []
-        with ThreadPoolExecutor(
-            max_workers=int(
-                os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
-            ),
-        ) as executor:
-            futures: list[Future] = [
-                executor.submit(
-                    self.release,
-                    on,
-                    params=params,
+        # NOTE: If this pipeline does not set the on schedule, it will return
+        #   empty result.
+        if len(self.on) == 0:
+            return []
+        params: DictData = params or {}
+        queue: list[datetime] = []
+        results: list[Result] = []
+        wk: int = int(os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING") or "4")
+        with ThreadPoolExecutor(max_workers=wk) as executor:
+            # TODO: If I want to run infinite loop.
+            futures: list[Future] = []
+            for on in self.on:
+                futures.append(
+                    executor.submit(
+                        self.release,
+                        on,
+                        params=params,
+                        log=log,
+                        queue=queue,
+                    )
                 )
-                for on in self.on
-            ]
+                delay()
+            # WARNING: This poking method does not allow to use fail-fast logic
+            #   to catching parallel execution result.
             for future in as_completed(futures):
-                rs = future.result()
-                logging.info(rs)
-                results.append(rs)
+                results.append(future.result(timeout=60))
+        if len(queue) > 0:
+            logger.error(
+                f"({self.run_id}) [POKING]: Log Queue does empty when poking "
+                f"process was finishing."
+            )
         return results
-    def job_execute(
+    def execute_job(
         self,
         job: str,
         params: DictData,
     ) -> Result:
         """Job Executor that use on pipeline executor.
         :param job: A job ID that want to execute.
         :param params: A params that was parameterized from pipeline execution.
+        :rtype: Result
         """
         # VALIDATE: check a job ID that exists in this pipeline or not.
         if job not in self.jobs:
             raise PipelineException(
                 f"The job ID: {job} does not exists on {self.name!r} pipeline."
             )
         try:
-            logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
-            job_obj: Job = self.jobs[job]
+            logger.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
+            # IMPORTANT:
+            #   Change any job running IDs to this pipeline running ID.
+            job_obj: Job = self.jobs[job].get_running_id(self.run_id)
             j_rs: Result = job_obj.execute(params=params)
         except JobException as err:
-            raise PipelineException(
-                f"The job ID: {job} get raise error: {err.__class__.__name__}:"
-                f"\n{err}"
-            ) from None
+            raise PipelineException(f"{job}: JobException: {err}") from None
         return Result(
             status=j_rs.status,
             context={job: job_obj.set_outputs(j_rs.context)},
@@ -738,9 +1009,8 @@ class Pipeline(BaseModel):
             for limit time of execution and waiting job dependency.
         :rtype: Result
-        ---
         See Also:
+        ---
             The result of execution process for each jobs and stages on this
         pipeline will keeping in dict which able to catch out with all jobs and
@@ -752,15 +1022,16 @@ class Pipeline(BaseModel):
             ... ${job-name}.stages.${stage-id}.outputs.${key}
         """
-        logging.info(
-            f"[CORE]: Start Execute: {self.name}:"
-            f"{gen_id(self.name, unique=True)}"
-        )
+        logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
         params: DictData = params or {}
+        ts: float = time.monotonic()
         # NOTE: It should not do anything if it does not have job.
         if not self.jobs:
-            logging.warning("[PIPELINE]: This pipeline does not have any jobs")
+            logger.warning(
+                f"({self.run_id}) [PIPELINE]: This pipeline: {self.name!r} "
+                f"does not have any jobs"
+            )
             return Result(status=0, context=params)
         # NOTE: Create a job queue that keep the job that want to running after
@@ -769,125 +1040,147 @@ class Pipeline(BaseModel):
         for job_id in self.jobs:
             jq.put(job_id)
-        # NOTE: Create start timestamp
-        ts: float = time.monotonic()
         # NOTE: Create result context that will pass this context to any
         #   execution dependency.
-        rs: Result = Result(context=self.parameterize(params))
+        context: DictData = self.parameterize(params)
         try:
-            rs.receive(
-                self.__exec_non_threading(rs, jq, ts, timeout=timeout)
-                if (
-                    worker := int(
-                        os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1")
-                    )
-                )
-                == 1
+            worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
+            (
+                self.__exec_non_threading(context, ts, jq, timeout=timeout)
+                if worker == 1
                 else self.__exec_threading(
-                    rs, jq, ts, worker=worker, timeout=timeout
+                    context, ts, jq, worker=worker, timeout=timeout
                 )
             )
-            return rs
+            return Result(status=0, context=context)
         except PipelineException as err:
-            rs.context.update({"error": {"message": str(err)}})
-            rs.status = 1
-            return rs
+            context.update(
+                {"error_message": f"{err.__class__.__name__}: {err}"}
+            )
+            return Result(status=1, context=context)
     def __exec_threading(
         self,
-        rs: Result,
-        job_queue: Queue,
+        context: DictData,
         ts: float,
+        job_queue: Queue,
         *,
-        worker: int = 1,
+        worker: int = 2,
         timeout: int = 600,
-    ) -> Result:
-        """Pipeline threading execution."""
+    ) -> DictData:
+        """Pipeline threading execution.
+        :param context: A context pipeline data that want to downstream passing.
+        :param ts: A start timestamp that use for checking execute time should
+            timeout.
+        :param timeout: A second value unit that bounding running time.
+        :param worker: A number of threading executor pool size.
+        :rtype: DictData
+        """
         not_time_out_flag: bool = True
+        logger.debug(
+            f"({self.run_id}): [CORE]: Run {self.name} with threading job "
+            f"executor"
+        )
         # IMPORTANT: The job execution can run parallel and waiting by
         #   needed.
         with ThreadPoolExecutor(max_workers=worker) as executor:
             futures: list[Future] = []
             while not job_queue.empty() and (
                 not_time_out_flag := ((time.monotonic() - ts) < timeout)
             ):
                 job_id: str = job_queue.get()
                 job: Job = self.jobs[job_id]
-                # IMPORTANT:
-                #   Change any job running IDs to this pipeline running ID.
-                job.run_id = self.run_id
-                if any(need not in rs.context["jobs"] for need in job.needs):
+                if any(need not in context["jobs"] for need in job.needs):
                     job_queue.put(job_id)
-                    time.sleep(0.5)
+                    time.sleep(0.25)
                     continue
                 futures.append(
                     executor.submit(
-                        self.job_execute,
+                        self.execute_job,
                         job_id,
-                        params=copy.deepcopy(rs.context),
+                        params=copy.deepcopy(context),
                     ),
                 )
+                job_queue.task_done()
+            # NOTE: Wait for all items to finish processing
+            job_queue.join()
             for future in as_completed(futures):
                 if err := future.exception():
-                    logging.error(f"{err}")
+                    logger.error(f"{err}")
                     raise PipelineException(f"{err}")
                 # NOTE: Update job result to pipeline result.
-                rs.receive_jobs(future.result(timeout=20))
+                context["jobs"].update(future.result(timeout=20).conext)
-        if not not_time_out_flag:
-            logging.warning(
-                f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
-            )
-            raise PipelineException(
-                f"Execution of pipeline: {self.name} was timeout"
-            )
-        rs.status = 0
-        return rs
+        if not_time_out_flag:
+            return context
+        # NOTE: Raise timeout error.
+        logger.warning(
+            f"({self.run_id}) [PIPELINE]: Execution of pipeline, {self.name!r} "
+            f", was timeout"
+        )
+        raise PipelineException(
+            f"Execution of pipeline: {self.name} was timeout"
+        )
     def __exec_non_threading(
         self,
-        rs: Result,
-        job_queue: Queue,
+        context: DictData,
         ts: float,
+        job_queue: Queue,
         *,
         timeout: int = 600,
-    ) -> Result:
-        """Pipeline non-threading execution."""
+    ) -> DictData:
+        """Pipeline non-threading execution that use sequential job running
+        and waiting previous run successful.
+        :param context: A context pipeline data that want to downstream passing.
+        :param ts: A start timestamp that use for checking execute time should
+            timeout.
+        :param timeout: A second value unit that bounding running time.
+        :rtype: DictData
+        """
         not_time_out_flag: bool = True
-        logging.info(f"[CORE]: Run {self.name} with non-threading job executor")
+        logger.debug(
+            f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
+            f"executor"
+        )
         while not job_queue.empty() and (
             not_time_out_flag := ((time.monotonic() - ts) < timeout)
         ):
             job_id: str = job_queue.get()
             job: Job = self.jobs[job_id]
-            # IMPORTANT:
-            #   Change any job running IDs to this pipeline running ID.
-            job.run_id = self.run_id
             # NOTE:
-            if any(need not in rs.context["jobs"] for need in job.needs):
+            if any(need not in context["jobs"] for need in job.needs):
                 job_queue.put(job_id)
-                time.sleep(0.5)
+                time.sleep(0.25)
                 continue
             # NOTE: Start job execution.
-            job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
-            rs.context["jobs"].update(job_rs.context)
+            job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
+            context["jobs"].update(job_rs.context)
+            job_queue.task_done()
-        if not not_time_out_flag:
-            logging.warning(
-                f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
-            )
-            raise PipelineException(
-                f"Execution of pipeline: {self.name} was timeout"
-            )
-        rs.status = 0
-        return rs
+        # NOTE: Wait for all items to finish processing
+        job_queue.join()
+        if not_time_out_flag:
+            return context
+        # NOTE: Raise timeout error.
+        logger.warning(
+            f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
+        )
+        raise PipelineException(
+            f"Execution of pipeline: {self.name} was timeout"
+        )

ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

ddeutil-workflow 0.0.8py3-none-any.whl → 0.0.10py3-none-any.whl