PyPI - ddeutil-workflow - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

ddeutil-workflow 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +3 -14
ddeutil/workflow/api.py +44 -75
ddeutil/workflow/cli.py +51 -0
ddeutil/workflow/cron.py +713 -0
ddeutil/workflow/loader.py +65 -13
ddeutil/workflow/log.py +147 -49
ddeutil/workflow/on.py +18 -15
ddeutil/workflow/pipeline.py +389 -140
ddeutil/workflow/repeat.py +9 -5
ddeutil/workflow/route.py +30 -37
ddeutil/workflow/scheduler.py +398 -659
ddeutil/workflow/stage.py +145 -73
ddeutil/workflow/utils.py +133 -42
ddeutil_workflow-0.0.9.dist-info/METADATA +273 -0
ddeutil_workflow-0.0.9.dist-info/RECORD +22 -0
{ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/WHEEL +1 -1
ddeutil_workflow-0.0.9.dist-info/entry_points.txt +2 -0
ddeutil/workflow/app.py +0 -45
ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
{ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/top_level.txt +0 -0

ddeutil/workflow/pipeline.py CHANGED Viewed

@@ -12,23 +12,32 @@ import time
 from concurrent.futures import (
     FIRST_EXCEPTION,
     Future,
-    ProcessPoolExecutor,
     ThreadPoolExecutor,
     as_completed,
     wait,
 )
-from datetime import datetime
-from multiprocessing import Event, Manager
+from datetime import datetime, timedelta
+from heapq import heappush
 from pickle import PickleError
 from queue import Queue
+from textwrap import dedent
+from threading import Event
 from typing import Optional
 from zoneinfo import ZoneInfo
 from pydantic import BaseModel, Field
-from pydantic.functional_validators import model_validator
+from pydantic.functional_validators import field_validator, model_validator
 from typing_extensions import Self
-from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
+from .__types import (
+    DictData,
+    DictStr,
+    Matrix,
+    MatrixExclude,
+    MatrixInclude,
+    TupleStr,
+)
+from .cron import CronRunner
 from .exceptions import (
     JobException,
     PipelineException,
@@ -36,17 +45,26 @@ from .exceptions import (
     UtilException,
 )
 from .loader import Loader
+from .log import FileLog, Log
 from .on import On
-from .scheduler import CronRunner
 from .stage import Stage
 from .utils import (
     Param,
     Result,
     cross_product,
     dash2underscore,
+    delay,
     filter_func,
     gen_id,
     get_diff_sec,
+    has_template,
+    param2template,
+)
+__all__: TupleStr = (
+    "Strategy",
+    "Job",
+    "Pipeline",
 )
@@ -166,9 +184,15 @@ class Job(BaseModel):
         ... }
     """
-    id: Optional[str] = Field(default=None)
-    desc: Optional[str] = Field(default=None)
-    runs_on: Optional[str] = Field(default=None)
+    id: Optional[str] = Field(default=None, description="A job ID.")
+    desc: Optional[str] = Field(
+        default=None,
+        description="A job description that can be string of markdown content.",
+    )
+    runs_on: Optional[str] = Field(
+        default=None,
+        description="A target executor node for this job use to execution.",
+    )
     stages: list[Stage] = Field(
         default_factory=list,
         description="A list of Stage of this job.",
@@ -182,7 +206,9 @@ class Job(BaseModel):
         description="A strategy matrix that want to generate.",
     )
     run_id: Optional[str] = Field(
-        default=None, description="A running job ID.", repr=False
+        default=None,
+        description="A running job ID.",
+        repr=False,
     )
     @model_validator(mode="before")
@@ -193,12 +219,31 @@ class Job(BaseModel):
         dash2underscore("runs-on", values)
         return values
+    @field_validator("desc", mode="after")
+    def ___prepare_desc(cls, value: str) -> str:
+        """Prepare description string that was created on a template."""
+        return dedent(value)
     @model_validator(mode="after")
     def __prepare_running_id(self):
         if self.run_id is None:
             self.run_id = gen_id(self.id or "", unique=True)
+        # VALIDATE: Validate job id should not dynamic with params template.
+        if has_template(self.id):
+            raise ValueError("Job ID should not has any template.")
         return self
+    def get_running_id(self, run_id: str) -> Self:
+        """Return Job model object that changing job running ID with an
+        input running ID.
+        :param run_id: A replace job running ID.
+        :rtype: Self
+        """
+        return self.model_copy(update={"run_id": run_id})
     def stage(self, stage_id: str) -> Stage:
         """Return stage model that match with an input stage ID."""
         for stage in self.stages:
@@ -209,7 +254,6 @@ class Job(BaseModel):
     def set_outputs(self, output: DictData) -> DictData:
         if len(output) > 1 and self.strategy.is_set():
             return {"strategies": output}
         return output[next(iter(output))]
     def strategy_execute(
@@ -262,7 +306,7 @@ class Job(BaseModel):
         for stage in self.stages:
             # IMPORTANT: Change any stage running IDs to this job running ID.
-            stage.run_id = self.run_id
+            stage: Stage = stage.get_running_id(self.run_id)
             _st_name: str = stage.id or stage.name
@@ -303,7 +347,11 @@ class Job(BaseModel):
                     context={
                         gen_id(strategy): {
                             "matrix": strategy,
-                            "stages": filter_func(context.pop("stages", {})),
+                            # NOTE: If job strategy executor use multithreading,
+                            #   it will not filter function object from context.
+                            # ---
+                            # "stages": filter_func(context.pop("stages", {})),
+                            "stages": context.pop("stages", {}),
                             "error": {
                                 "message": (
                                     "Process Event stopped before execution"
@@ -314,7 +362,7 @@ class Job(BaseModel):
                 )
             try:
                 rs: Result = stage.execute(params=context)
-                stage.set_outputs(rs.context, params=context)
+                stage.set_outputs(rs.context, to=context)
             except (StageException, UtilException) as err:
                 logging.error(
                     f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
@@ -323,6 +371,11 @@ class Job(BaseModel):
                     f"Get stage execution error: {err.__class__.__name__}: "
                     f"{err}"
                 ) from None
+            # NOTE: Remove new stage object that was created from
+            #   ``get_running_id`` method.
+            del stage
         return Result(
             status=0,
             context={
@@ -359,53 +412,74 @@ class Job(BaseModel):
                 context=strategy_context,
             )
-        # WARNING: (WF001) I got error that raise when use
-        #  ``ProcessPoolExecutor``;
-        #   ---
-        #   _pickle.PicklingError: Can't pickle
-        #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
-        #       on ddeutil.workflow.stage failed
+        # # WARNING: (WF001) I got error that raise when use
+        # #  ``ProcessPoolExecutor``;
+        # #   ---
+        # #   _pickle.PicklingError: Can't pickle
+        # #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
+        # #       on ddeutil.workflow.stage failed
+        # #
+        # # from multiprocessing import Event, Manager
+        # with Manager() as manager:
+        #     event: Event = manager.Event()
         #
-        with Manager() as manager:
-            event: Event = manager.Event()
-            # NOTE: Start process pool executor for running strategy executor in
-            #   parallel mode.
-            with ProcessPoolExecutor(
-                max_workers=self.strategy.max_parallel
-            ) as executor:
-                features: list[Future] = [
-                    executor.submit(
-                        self.strategy_execute,
-                        strategy,
-                        params=copy.deepcopy(params),
-                        event=event,
-                    )
-                    for strategy in self.strategy.make()
-                ]
-                if self.strategy.fail_fast:
-                    rs = self.__catch_fail_fast(event, features)
-                else:
-                    rs = self.__catch_all_completed(features)
+        #     # NOTE: Start process pool executor for running strategy executor
+        #     #   in parallel mode.
+        #     with ProcessPoolExecutor(
+        #         max_workers=self.strategy.max_parallel
+        #     ) as executor:
+        #         futures: list[Future] = [
+        #             executor.submit(
+        #                 self.strategy_execute,
+        #                 strategy,
+        #                 params=copy.deepcopy(params),
+        #                 event=event,
+        #             )
+        #             for strategy in self.strategy.make()
+        #         ]
+        #         if self.strategy.fail_fast:
+        #             rs = self.__catch_fail_fast(event, futures)
+        #         else:
+        #             rs = self.__catch_all_completed(futures)
+        # NOTE: Create event for cancel executor stop running.
+        event: Event = Event()
+        with ThreadPoolExecutor(
+            max_workers=self.strategy.max_parallel
+        ) as executor:
+            futures: list[Future] = [
+                executor.submit(
+                    self.strategy_execute,
+                    strategy,
+                    params=copy.deepcopy(params),
+                    event=event,
+                )
+                for strategy in self.strategy.make()
+            ]
+            if self.strategy.fail_fast:
+                rs: Result = self.__catch_fail_fast(event, futures)
+            else:
+                rs: Result = self.__catch_all_completed(futures)
         return Result(
             status=0,
             context=rs.context,
         )
-    def __catch_fail_fast(self, event: Event, features: list[Future]) -> Result:
-        """Job parallel pool features catching with fail-fast mode. That will
-        stop all not done features if it receive the first exception from all
-        running features.
+    def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
+        """Job parallel pool futures catching with fail-fast mode. That will
+        stop all not done futures if it receive the first exception from all
+        running futures.
         :param event:
-        :param features: A list of features.
+        :param futures: A list of futures.
         :rtype: Result
         """
         strategy_context: DictData = {}
         # NOTE: Get results from a collection of tasks with a
         #   timeout that has the first exception.
         done, not_done = wait(
-            features, timeout=1800, return_when=FIRST_EXCEPTION
+            futures, timeout=1800, return_when=FIRST_EXCEPTION
         )
         nd: str = (
             f", the strategies do not run is {not_done}" if not_done else ""
@@ -416,37 +490,38 @@ class Job(BaseModel):
         event.set()
         # NOTE: Cancel any scheduled tasks
-        for future in features:
+        for future in futures:
             future.cancel()
         status: int = 0
-        for f in done:
-            if f.exception():
+        for future in done:
+            if future.exception():
                 status = 1
                 logging.error(
                     f"({self.run_id}) [JOB]: One stage failed with: "
-                    f"{f.exception()}, shutting down this feature."
+                    f"{future.exception()}, shutting down this future."
                 )
-            elif f.cancelled():
+            elif future.cancelled():
                 continue
             else:
-                rs: Result = f.result(timeout=60)
+                rs: Result = future.result(timeout=60)
                 strategy_context.update(rs.context)
         return Result(
             status=status,
             context=strategy_context,
         )
-    def __catch_all_completed(self, features: list[Future]) -> Result:
-        """Job parallel pool features catching with all-completed mode.
+    def __catch_all_completed(self, futures: list[Future]) -> Result:
+        """Job parallel pool futures catching with all-completed mode.
-        :param features: A list of features.
+        :param futures: A list of futures.
+        :rtype: Result
         """
         strategy_context: DictData = {}
         status: int = 0
-        for feature in as_completed(features):
+        for future in as_completed(futures):
             try:
-                rs: Result = feature.result(timeout=60)
+                rs: Result = future.result(timeout=60)
                 strategy_context.update(rs.context)
             except PickleError as err:
                 # NOTE: (WF001) I do not want to fix this issue because
@@ -459,8 +534,8 @@ class Job(BaseModel):
             except TimeoutError:
                 status = 1
                 logging.warning("Task is hanging. Attempting to kill.")
-                feature.cancel()
-                if not feature.cancelled():
+                future.cancel()
+                if not future.cancelled():
                     logging.warning("Failed to cancel the task.")
                 else:
                     logging.warning("Task canceled successfully.")
@@ -475,7 +550,7 @@ class Job(BaseModel):
 class Pipeline(BaseModel):
-    """Pipeline Model this is the main feature of this project because it use to
+    """Pipeline Model this is the main future of this project because it use to
     be workflow data for running everywhere that you want. It use lightweight
     coding line to execute it.
     """
@@ -484,8 +559,7 @@ class Pipeline(BaseModel):
     desc: Optional[str] = Field(
         default=None,
         description=(
-            "A pipeline description that is able to be string of markdown "
-            "content."
+            "A pipeline description that can be string of markdown content."
         ),
     )
     params: dict[str, Param] = Field(
@@ -501,20 +575,30 @@ class Pipeline(BaseModel):
         description="A mapping of job ID and job model that already loaded.",
     )
     run_id: Optional[str] = Field(
-        default=None, description="A running job ID.", repr=False
+        default=None,
+        description="A running pipeline ID.",
+        repr=False,
     )
+    @property
+    def new_run_id(self) -> str:
+        """Running ID of this pipeline that always generate new unique value."""
+        return gen_id(self.name, unique=True)
     @classmethod
     def from_loader(
         cls,
         name: str,
         externals: DictData | None = None,
     ) -> Self:
-        """Create Pipeline instance from the Loader object.
+        """Create Pipeline instance from the Loader object that only receive
+        an input pipeline name. The loader object will use this pipeline name to
+        searching configuration data of this pipeline model in conf path.
         :param name: A pipeline name that want to pass to Loader object.
         :param externals: An external parameters that want to pass to Loader
             object.
+        :rtype: Self
         """
         loader: Loader = Loader(name, externals=(externals or {}))
         loader_data: DictData = copy.deepcopy(loader.data)
@@ -537,6 +621,8 @@ class Pipeline(BaseModel):
                 on = [on]
             if any(not isinstance(i, (dict, str)) for i in on):
                 raise TypeError("The ``on`` key should be list of str or dict")
+            # NOTE: Pass on value to Loader and keep on model object to on field
             data["on"] = [
                 (
                     Loader(n, externals=(externals or {})).data
@@ -562,25 +648,48 @@ class Pipeline(BaseModel):
             }
         return values
+    @field_validator("desc", mode="after")
+    def ___prepare_desc(cls, value: str) -> str:
+        """Prepare description string that was created on a template."""
+        return dedent(value)
     @model_validator(mode="after")
     def __validate_jobs_need_and_prepare_running_id(self):
+        """Validate each need job in any jobs should exists."""
         for job in self.jobs:
             if not_exist := [
                 need for need in self.jobs[job].needs if need not in self.jobs
             ]:
                 raise PipelineException(
                     f"This needed jobs: {not_exist} do not exist in this "
-                    f"pipeline."
+                    f"pipeline, {self.name!r}"
                 )
             # NOTE: update a job id with its job id from pipeline template
             self.jobs[job].id = job
         if self.run_id is None:
-            self.run_id = gen_id(self.name, unique=True)
+            self.run_id = self.new_run_id
+        # VALIDATE: Validate pipeline name should not dynamic with params
+        #   template.
+        if has_template(self.name):
+            raise ValueError(
+                f"Pipeline name should not has any template, please check, "
+                f"{self.name!r}."
+            )
         return self
+    def get_running_id(self, run_id: str) -> Self:
+        """Return Pipeline model object that changing pipeline running ID with
+        an input running ID.
+        :param run_id: A replace pipeline running ID.
+        :rtype: Self
+        """
+        return self.model_copy(update={"run_id": run_id})
     def job(self, name: str) -> Job:
         """Return Job model that exists on this pipeline.
@@ -591,7 +700,10 @@ class Pipeline(BaseModel):
         :returns: A job model that exists on this pipeline by input name.
         """
         if name not in self.jobs:
-            raise ValueError(f"Job {name!r} does not exists")
+            raise ValueError(
+                f"A Job {name!r} does not exists in this pipeline, "
+                f"{self.name!r}"
+            )
         return self.jobs[name]
     def parameterize(self, params: DictData) -> DictData:
@@ -629,52 +741,146 @@ class Pipeline(BaseModel):
     def release(
         self,
         on: On,
-        params: DictData | None = None,
+        params: DictData,
         *,
-        waiting_sec: int = 600,
-        sleep_interval: int = 10,
-    ) -> str:
+        waiting_sec: int = 55,
+        sleep_interval: int = 15,
+        log: Log = None,
+        lq: list[datetime] = None,
+    ) -> Result:
         """Start running pipeline with the on schedule in period of 30 minutes.
         That mean it will still running at background 30 minutes until the
         schedule matching with its time.
+            This method allow pipeline use log object to save the execution
+        result to log destination like file log to local /logs directory.
+        :rtype: Result
         """
-        params: DictData = params or {}
-        logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
+        delay()
+        log: Log = log or FileLog
+        current_running_time = datetime.now()
+        if not (
+            latest_running_time := log.latest_point(name=self.name, queue=lq)
+        ) or (
+            latest_running_time.replace(tzinfo=ZoneInfo(on.tz))
+            < current_running_time.replace(tzinfo=ZoneInfo(on.tz))
+        ):
+            latest_running_time: datetime = current_running_time.replace(
+                tzinfo=ZoneInfo(on.tz)
+            )
+        else:
+            latest_running_time: datetime = latest_running_time.replace(
+                tzinfo=ZoneInfo(on.tz)
+            )
-        gen: CronRunner = on.generate(datetime.now())
+        gen: CronRunner = on.generate(
+            latest_running_time + timedelta(seconds=1)
+        )
         tz: ZoneInfo = gen.tz
+        # NOTE: get next schedule time that generate from now.
         next_running_time: datetime = gen.next
-        if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
+        # NOTE: get next utils it does not logging.
+        # while log.is_pointed(self.name, next_running_time, queue=lq):
+        #     next_running_time: datetime = gen.next
+        while log.is_pointed(self.name, next_running_time, queue=lq):
+            next_running_time: datetime = gen.next
+        heappush(lq, next_running_time)
+        # VALIDATE: Check the different time between the next schedule time and
+        #   now that less than waiting period (second unit).
+        if get_diff_sec(next_running_time, tz=tz) <= waiting_sec:
             logging.debug(
-                f"[CORE]: {self.name} closely to run >> "
-                f"{next_running_time:%Y-%m-%d %H:%M:%S}"
+                f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+                f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
             )
             # NOTE: Release when the time is nearly to schedule time.
-            while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
-                time.sleep(sleep_interval)
+            while (duration := get_diff_sec(next_running_time, tz=tz)) > (
+                sleep_interval + 5
+            ):
                 logging.debug(
-                    f"[CORE]: {self.name!r} : Sleep until: {duration}"
+                    f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+                    f"Sleep until: {duration}"
                 )
+                time.sleep(sleep_interval)
-            time.sleep(1)
-            rs: Result = self.execute(params=params)
-            logging.debug(f"{rs.context}")
+            time.sleep(0.5)
-            return f"[CORE]: Start Execute: {self.name}"
-        return f"[CORE]: {self.name} does not closely to run yet."
+            # NOTE: Release parameter that use to change if params has
+            #   templating.
+            release_params: DictData = {
+                "release": {
+                    "logical_date": next_running_time,
+                },
+            }
+            # WARNING: Re-create pipeline object that use new running pipeline
+            #   ID.
+            pipeline: Self = self.get_running_id(run_id=self.new_run_id)
+            rs: Result = pipeline.execute(
+                params=param2template(params, release_params),
+            )
+            logging.debug(
+                f"({pipeline.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+                f"End release"
+            )
-    def poke(self, params: DictData | None = None):
-        """Poke pipeline threading task for executing with its schedules that
-        was set on the `on`.
+            del pipeline
+            rs.set_parent_run_id(self.run_id)
+            rs_log: Log = log.model_validate(
+                {
+                    "name": self.name,
+                    "on": str(on.cronjob),
+                    "release": next_running_time,
+                    "context": rs.context,
+                    "parent_run_id": rs.run_id,
+                    "run_id": rs.run_id,
+                }
+            )
+            rs_log.save()
+        else:
+            logging.debug(
+                f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
+                f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
+            )
+            rs = Result(status=0, context={"params": params})
+        if lq is None:
+            return rs
+        lq.remove(next_running_time)
+        time.sleep(0.25)
+        return rs
+    def poke(
+        self,
+        params: DictData | None = None,
+        *,
+        log: Log | None = None,
+    ) -> list[Result]:
+        """Poke pipeline with threading executor pool for executing with all its
+        schedules that was set on the `on` value. This method will observe its
+        schedule that nearing to run with the ``self.release()`` method.
+        :param params: A parameters that want to pass to the release method.
+        :param log: A log object that want to use on this poking process.
+        :rtype: list[Result]
         """
         params: DictData = params or {}
-        logging.info(
-            f"[CORE]: Start Poking: {self.name!r} :"
-            f"{gen_id(self.name, unique=True)}"
-        )
-        results = []
+        logging.info(f"({self.run_id}) [CORE]: Start Poking: {self.name!r} ...")
+        results: list[Result] = []
+        log_queue: list[datetime] = []
+        # NOTE: If this pipeline does not set schedule, it will return empty
+        #   result.
+        if len(self.on) == 0:
+            return results
         with ThreadPoolExecutor(
             max_workers=int(
                 os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
@@ -685,13 +891,22 @@ class Pipeline(BaseModel):
                     self.release,
                     on,
                     params=params,
+                    log=log,
+                    lq=log_queue,
                 )
                 for on in self.on
             ]
             for future in as_completed(futures):
-                rs = future.result()
-                logging.info(rs)
+                rs: Result = future.result()
+                logging.info(rs.context.get("params", {}))
                 results.append(rs)
+        if len(log_queue) > 0:
+            logging.error(
+                f"({self.run_id}) [CORE]: Log Queue does empty when poke "
+                f"is finishing."
+            )
         return results
     def job_execute(
@@ -700,6 +915,7 @@ class Pipeline(BaseModel):
         params: DictData,
     ) -> Result:
         """Job Executor that use on pipeline executor.
         :param job: A job ID that want to execute.
         :param params: A params that was parameterized from pipeline execution.
         """
@@ -708,14 +924,17 @@ class Pipeline(BaseModel):
             raise PipelineException(
                 f"The job ID: {job} does not exists on {self.name!r} pipeline."
             )
         try:
             logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
-            job_obj: Job = self.jobs[job]
+            # IMPORTANT:
+            #   Change any job running IDs to this pipeline running ID.
+            job_obj: Job = self.jobs[job].get_running_id(self.run_id)
             j_rs: Result = job_obj.execute(params=params)
         except JobException as err:
             raise PipelineException(
-                f"The job ID: {job} get raise error: {err.__class__.__name__}:"
+                f"The job ID: {job} get error: {err.__class__.__name__}:"
                 f"\n{err}"
             ) from None
         return Result(
@@ -738,9 +957,8 @@ class Pipeline(BaseModel):
             for limit time of execution and waiting job dependency.
         :rtype: Result
-        ---
         See Also:
+        ---
             The result of execution process for each jobs and stages on this
         pipeline will keeping in dict which able to catch out with all jobs and
@@ -752,10 +970,7 @@ class Pipeline(BaseModel):
             ... ${job-name}.stages.${stage-id}.outputs.${key}
         """
-        logging.info(
-            f"[CORE]: Start Execute: {self.name}:"
-            f"{gen_id(self.name, unique=True)}"
-        )
+        logging.info(f"({self.run_id}) [CORE]: Start Execute: {self.name} ...")
         params: DictData = params or {}
         # NOTE: It should not do anything if it does not have job.
@@ -777,15 +992,15 @@ class Pipeline(BaseModel):
         rs: Result = Result(context=self.parameterize(params))
         try:
             rs.receive(
-                self.__exec_non_threading(rs, jq, ts, timeout=timeout)
+                self.__exec_non_threading(rs, ts, timeout=timeout)
                 if (
                     worker := int(
-                        os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1")
+                        os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2")
                     )
                 )
                 == 1
                 else self.__exec_threading(
-                    rs, jq, ts, worker=worker, timeout=timeout
+                    rs, ts, worker=worker, timeout=timeout
                 )
             )
             return rs
@@ -797,14 +1012,30 @@ class Pipeline(BaseModel):
     def __exec_threading(
         self,
         rs: Result,
-        job_queue: Queue,
         ts: float,
         *,
-        worker: int = 1,
+        worker: int = 2,
         timeout: int = 600,
     ) -> Result:
-        """Pipeline threading execution."""
+        """Pipeline threading execution.
+        :param rs:
+        :param ts:
+        :param timeout: A second value unit that bounding running time.
+        :param worker: A number of threading executor pool size.
+        :rtype: Result
+        """
         not_time_out_flag: bool = True
+        logging.debug(
+            f"({self.run_id}): [CORE]: Run {self.name} with threading job "
+            f"executor"
+        )
+        # NOTE: Create a job queue that keep the job that want to running after
+        #   it dependency condition.
+        job_queue: Queue = Queue()
+        for job_id in self.jobs:
+            job_queue.put(job_id)
         # IMPORTANT: The job execution can run parallel and waiting by
         #   needed.
@@ -816,10 +1047,6 @@ class Pipeline(BaseModel):
                 job_id: str = job_queue.get()
                 job: Job = self.jobs[job_id]
-                # IMPORTANT:
-                #   Change any job running IDs to this pipeline running ID.
-                job.run_id = self.run_id
                 if any(need not in rs.context["jobs"] for need in job.needs):
                     job_queue.put(job_id)
                     time.sleep(0.5)
@@ -832,6 +1059,10 @@ class Pipeline(BaseModel):
                         params=copy.deepcopy(rs.context),
                     ),
                 )
+                job_queue.task_done()
+            # NOTE: Wait for all items to finish processing
+            job_queue.join()
             for future in as_completed(futures):
                 if err := future.exception():
@@ -841,37 +1072,49 @@ class Pipeline(BaseModel):
                 # NOTE: Update job result to pipeline result.
                 rs.receive_jobs(future.result(timeout=20))
-        if not not_time_out_flag:
-            logging.warning(
-                f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
-            )
-            raise PipelineException(
-                f"Execution of pipeline: {self.name} was timeout"
-            )
-        rs.status = 0
-        return rs
+        if not_time_out_flag:
+            rs.status = 0
+            return rs
+        # NOTE: Raise timeout error.
+        logging.warning(
+            f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
+        )
+        raise PipelineException(
+            f"Execution of pipeline: {self.name} was timeout"
+        )
     def __exec_non_threading(
         self,
         rs: Result,
-        job_queue: Queue,
         ts: float,
         *,
         timeout: int = 600,
     ) -> Result:
-        """Pipeline non-threading execution."""
+        """Pipeline non-threading execution.
+        :param rs:
+        :param ts:
+        :param timeout: A second value unit that bounding running time.
+        :rtype: Result
+        """
         not_time_out_flag: bool = True
-        logging.info(f"[CORE]: Run {self.name} with non-threading job executor")
+        logging.debug(
+            f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
+            f"executor"
+        )
+        # NOTE: Create a job queue that keep the job that want to running after
+        #   it dependency condition.
+        job_queue: Queue = Queue()
+        for job_id in self.jobs:
+            job_queue.put(job_id)
         while not job_queue.empty() and (
             not_time_out_flag := ((time.monotonic() - ts) < timeout)
         ):
             job_id: str = job_queue.get()
             job: Job = self.jobs[job_id]
-            # IMPORTANT:
-            #   Change any job running IDs to this pipeline running ID.
-            job.run_id = self.run_id
             # NOTE:
             if any(need not in rs.context["jobs"] for need in job.needs):
                 job_queue.put(job_id)
@@ -881,13 +1124,19 @@ class Pipeline(BaseModel):
             # NOTE: Start job execution.
             job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
             rs.context["jobs"].update(job_rs.context)
+            job_queue.task_done()
-        if not not_time_out_flag:
-            logging.warning(
-                f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
-            )
-            raise PipelineException(
-                f"Execution of pipeline: {self.name} was timeout"
-            )
-        rs.status = 0
-        return rs
+        # NOTE: Wait for all items to finish processing
+        job_queue.join()
+        if not_time_out_flag:
+            rs.status = 0
+            return rs
+        # NOTE: Raise timeout error.
+        logging.warning(
+            f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
+        )
+        raise PipelineException(
+            f"Execution of pipeline: {self.name} was timeout"
+        )

ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

ddeutil-workflow 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl