PyPI - ddeutil-workflow - Versions diffs - 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

ddeutil-workflow 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +4 -1
ddeutil/workflow/__types.py +24 -8
ddeutil/workflow/api.py +2 -2
ddeutil/workflow/conf.py +41 -0
ddeutil/workflow/cron.py +19 -12
ddeutil/workflow/job.py +189 -153
ddeutil/workflow/log.py +28 -14
ddeutil/workflow/scheduler.py +233 -112
ddeutil/workflow/stage.py +66 -33
ddeutil/workflow/utils.py +106 -40
{ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/METADATA +6 -4
ddeutil_workflow-0.0.14.dist-info/RECORD +22 -0
{ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/WHEEL +1 -1
ddeutil_workflow-0.0.13.dist-info/RECORD +0 -21
{ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/entry_points.txt +0 -0
{ddeutil_workflow-0.0.13.dist-info → ddeutil_workflow-0.0.14.dist-info}/top_level.txt +0 -0

ddeutil/workflow/job.py CHANGED Viewed

@@ -3,6 +3,8 @@
 # Licensed under the MIT License. See LICENSE in the project root for
 # license information.
 # ------------------------------------------------------------------------------
+"""Job Model that use for keeping stages and node that running its stages.
+"""
 from __future__ import annotations
 import copy
@@ -15,7 +17,6 @@ from concurrent.futures import (
     wait,
 )
 from functools import lru_cache
-from pickle import PickleError
 from textwrap import dedent
 from threading import Event
 from typing import Optional
@@ -61,9 +62,13 @@ __all__: TupleStr = (
 @freeze_args
 @lru_cache
-def make(matrix, include, exclude) -> list[DictStr]:
-    """Return List of product of matrix values that already filter with
-    exclude and add include.
+def make(
+    matrix: Matrix,
+    include: MatrixInclude,
+    exclude: MatrixExclude,
+) -> list[DictStr]:
+    """Make a list of product of matrix values that already filter with
+    exclude matrix and add specific matrix with include.
     :param matrix: A matrix values that want to cross product to possible
         parallelism values.
@@ -72,11 +77,12 @@ def make(matrix, include, exclude) -> list[DictStr]:
     :rtype: list[DictStr]
     """
     # NOTE: If it does not set matrix, it will return list of an empty dict.
-    if not (mt := matrix):
+    if len(matrix) == 0:
         return [{}]
+    # NOTE: Remove matrix that exists on the exclude.
     final: list[DictStr] = []
-    for r in cross_product(matrix=mt):
+    for r in cross_product(matrix=matrix):
         if any(
             all(r[k] == v for k, v in exclude.items()) for exclude in exclude
         ):
@@ -85,7 +91,7 @@ def make(matrix, include, exclude) -> list[DictStr]:
     # NOTE: If it is empty matrix and include, it will return list of an
     #   empty dict.
-    if not final and not include:
+    if len(final) == 0 and not include:
         return [{}]
     # NOTE: Add include to generated matrix with exclude list.
@@ -95,16 +101,20 @@ def make(matrix, include, exclude) -> list[DictStr]:
         #   Validate any key in include list should be a subset of some one
         #   in matrix.
         if all(not (set(inc.keys()) <= set(m.keys())) for m in final):
-            raise ValueError("Include should have the keys equal to matrix")
+            raise ValueError(
+                "Include should have the keys that equal to all final matrix."
+            )
         # VALIDATE:
-        #   Validate value of include does not duplicate with generated
-        #   matrix.
+        #   Validate value of include should not duplicate with generated
+        #   matrix. So, it will skip if this value already exists.
         if any(
             all(inc.get(k) == v for k, v in m.items()) for m in [*final, *add]
         ):
             continue
         add.append(inc)
+    # NOTE: Merge all matrix together.
     final.extend(add)
     return final
@@ -190,7 +200,7 @@ class Strategy(BaseModel):
 class Job(BaseModel):
-    """Job Model (group of stages).
+    """Job Pydantic model object (group of stages).
         This job model allow you to use for-loop that call matrix strategy. If
     you pass matrix mapping and it able to generate, you will see it running
@@ -220,8 +230,7 @@ class Job(BaseModel):
     id: Optional[str] = Field(
         default=None,
         description=(
-            "A job ID, this value will add from workflow after validation "
-            "process."
+            "A job ID that it will add from workflow after validation process."
         ),
     )
     desc: Optional[str] = Field(
@@ -256,6 +265,9 @@ class Job(BaseModel):
     def __prepare_keys(cls, values: DictData) -> DictData:
         """Rename key that use dash to underscore because Python does not
         support this character exist in any variable name.
+        :param values: A passing value that coming for initialize this object.
+        :rtype: DictData
         """
         dash2underscore("runs-on", values)
         return values
@@ -266,8 +278,11 @@ class Job(BaseModel):
         return dedent(value)
     @model_validator(mode="after")
-    def __prepare_running_id(self):
-        """Prepare the job running ID."""
+    def __prepare_running_id(self) -> Self:
+        """Prepare the job running ID.
+        :rtype: Self
+        """
         if self.run_id is None:
             self.run_id = gen_id(self.id or "", unique=True)
@@ -287,17 +302,51 @@ class Job(BaseModel):
         return self.model_copy(update={"run_id": run_id})
     def stage(self, stage_id: str) -> Stage:
-        """Return stage model that match with an input stage ID."""
+        """Return stage model that match with an input stage ID.
+        :param stage_id: A stage ID that want to extract from this job.
+        :rtype: Stage
+        """
         for stage in self.stages:
             if stage_id == (stage.id or ""):
                 return stage
         raise ValueError(f"Stage ID {stage_id} does not exists")
-    def set_outputs(self, output: DictData) -> DictData:
-        """Setting output of job execution"""
-        if len(output) > 1 and self.strategy.is_set():
-            return {"strategies": output}
-        return output[next(iter(output))]
+    def set_outputs(self, output: DictData, to: DictData) -> DictData:
+        """Set an outputs from execution process to the receive context. The
+        result from execution will pass to value of ``strategies`` key.
+            For example of setting output method, If you receive execute output
+        and want to set on the `to` like;
+            ... (i)   output: {'strategy01': bar, 'strategy02': bar}
+            ... (ii)  to: {}
+        The result of the `to` variable will be;
+            ... (iii) to: {
+                            'strategies': {
+                                'strategy01': bar, 'strategy02': bar
+                            }
+                        }
+        :param output: An output context.
+        :param to: A context data that want to add output result.
+        :rtype: DictData
+        """
+        if self.id is None:
+            raise JobException(
+                "This job do not set the ID before setting output."
+            )
+        to[self.id] = (
+            {"strategies": output}
+            if self.strategy.is_set()
+            # NOTE:
+            #   This is the best way to get single key from dict.
+            else output[next(iter(output))]
+        )
+        return to
     def execute_strategy(
         self,
@@ -305,46 +354,38 @@ class Job(BaseModel):
         params: DictData,
         *,
         event: Event | None = None,
+        raise_error: bool = True,
     ) -> Result:
         """Job Strategy execution with passing dynamic parameters from the
         workflow execution to strategy matrix.
-            This execution is the minimum level execution of job model.
+            This execution is the minimum level of execution of this job model.
+        It different with ``self.execute`` because this method run only one
+        strategy and return with context of this strategy data.
+        :raise JobException: If it has any error from StageException or
+            UtilException.
         :param strategy: A metrix strategy value.
         :param params: A dynamic parameters.
         :param event: An manger event that pass to the PoolThreadExecutor.
+        :param raise_error: A flag that raise error instead catching to result
+            if it get exception from stage execution.
         :rtype: Result
-        :raise JobException: If it has any error from StageException or
-            UtilException.
         """
-        # NOTE: Force stop this execution if event was set from main execution.
-        if event and event.is_set():
-            return Result(
-                status=1,
-                context={
-                    gen_id(strategy): {
-                        "matrix": strategy,
-                        "stages": {},
-                        "error_message": {
-                            "message": "Process Event stopped before execution"
-                        },
-                    },
-                },
-            )
+        strategy_id: str = gen_id(strategy)
         # NOTE: Create strategy execution context and update a matrix and copied
         #   of params. So, the context value will have structure like;
-        #   ---
+        #
         #   {
         #       "params": { ... },      <== Current input params
         #       "jobs": { ... },        <== Current input params
         #       "matrix": { ... }       <== Current strategy value
         #   }
         #
-        context: DictData = params
-        context.update({"matrix": strategy})
+        context: DictData = copy.deepcopy(params)
+        context.update({"matrix": strategy, "stages": {}})
         # IMPORTANT: The stage execution only run sequentially one-by-one.
         for stage in self.stages:
@@ -355,9 +396,7 @@ class Job(BaseModel):
             _st_name: str = stage.id or stage.name
             if stage.is_skipped(params=context):
-                logger.info(
-                    f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
-                )
+                logger.info(f"({self.run_id}) [JOB]: Skip stage: {_st_name!r}")
                 continue
             logger.info(
@@ -368,34 +407,23 @@ class Job(BaseModel):
             if strategy:
                 logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
-            # NOTE:
-            #       I do not use below syntax because `params` dict be the
-            #   reference memory pointer and it was changed when I action
-            #   anything like update or re-construct this.
-            #
-            #       ... params |= stage.execute(params=params)
-            #
-            #   This step will add the stage result to ``stages`` key in
-            #   that stage id. It will have structure like;
-            #   ---
-            #   {
-            #       "params": { ... },
-            #       "jobs": { ... },
-            #       "matrix": { ... },
-            #       "stages": { { "stage-id-1": ... }, ... }
-            #   }
-            #
+            # NOTE: Force stop this execution if event was set from main
+            #   execution.
             if event and event.is_set():
                 return Result(
                     status=1,
                     context={
-                        gen_id(strategy): {
+                        strategy_id: {
                             "matrix": strategy,
                             # NOTE: If job strategy executor use multithreading,
                             #   it will not filter function object from context.
                             # ---
                             # "stages": filter_func(context.pop("stages", {})),
                             "stages": context.pop("stages", {}),
+                            # NOTE: Set the error keys.
+                            "error": JobException(
+                                "Process Event stopped before execution"
+                            ),
                             "error_message": {
                                 "message": (
                                     "Process Event stopped before execution"
@@ -404,17 +432,40 @@ class Job(BaseModel):
                         },
                     },
                 )
+            # NOTE:
+            #       I do not use below syntax because `params` dict be the
+            #   reference memory pointer and it was changed when I action
+            #   anything like update or re-construct this.
+            #
+            #       ... params |= stage.execute(params=params)
+            #
+            #   This step will add the stage result to ``stages`` key in
+            #   that stage id. It will have structure like;
+            #
+            #   {
+            #       "params": { ... },
+            #       "jobs": { ... },
+            #       "matrix": { ... },
+            #       "stages": { { "stage-id-1": ... }, ... }
+            #   }
+            #
             try:
-                rs: Result = stage.execute(params=context)
-                stage.set_outputs(rs.context, to=context)
+                stage.set_outputs(
+                    stage.execute(params=context).context,
+                    to=context,
+                )
             except (StageException, UtilException) as err:
                 logger.error(
                     f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
                 )
-                raise JobException(
-                    f"Get stage execution error: {err.__class__.__name__}: "
-                    f"{err}"
-                ) from None
+                if raise_error:
+                    raise JobException(
+                        f"Get stage execution error: {err.__class__.__name__}: "
+                        f"{err}"
+                    ) from None
+                else:
+                    raise NotImplementedError() from None
             # NOTE: Remove new stage object that was created from
             #   ``get_running_id`` method.
@@ -423,12 +474,8 @@ class Job(BaseModel):
         return Result(
             status=0,
             context={
-                gen_id(strategy): {
+                strategy_id: {
                     "matrix": strategy,
-                    # NOTE: (WF001) filter own created function from stages
-                    #   value, because it does not dump with pickle when you
-                    #   execute with multiprocess.
-                    #
                     "stages": filter_func(context.pop("stages", {})),
                 },
             },
@@ -436,19 +483,21 @@ class Job(BaseModel):
     def execute(self, params: DictData | None = None) -> Result:
         """Job execution with passing dynamic parameters from the workflow
-        execution. It will generate matrix values at the first step and for-loop
-        any metrix to all stages dependency.
+        execution. It will generate matrix values at the first step and run
+        multithread on this metrics to the ``stages`` field of this job.
         :param params: An input parameters that use on job execution.
         :rtype: Result
         """
         context: DictData = {}
+        params: DictData = {} if params is None else params
-        # NOTE: Normal Job execution.
+        # NOTE: Normal Job execution without parallel strategy.
         if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
             for strategy in self.strategy.make():
                 rs: Result = self.execute_strategy(
-                    strategy, params=copy.deepcopy(params)
+                    strategy=strategy,
+                    params=copy.deepcopy(params),
                 )
                 context.update(rs.context)
             return Result(
@@ -456,36 +505,6 @@ class Job(BaseModel):
                 context=context,
             )
-        # # WARNING: (WF001) I got error that raise when use
-        # #  ``ProcessPoolExecutor``;
-        # #   ---
-        # #   _pickle.PicklingError: Can't pickle
-        # #       <function ??? at 0x000001F0BE80F160>: attribute lookup ???
-        # #       on ddeutil.workflow.stage failed
-        # #
-        # # from multiprocessing import Event, Manager
-        # with Manager() as manager:
-        #     event: Event = manager.Event()
-        #
-        #     # NOTE: Start process pool executor for running strategy executor
-        #     #   in parallel mode.
-        #     with ProcessPoolExecutor(
-        #         max_workers=self.strategy.max_parallel
-        #     ) as executor:
-        #         futures: list[Future] = [
-        #             executor.submit(
-        #                 self.execute_strategy,
-        #                 strategy,
-        #                 params=copy.deepcopy(params),
-        #                 event=event,
-        #             )
-        #             for strategy in self.strategy.make()
-        #         ]
-        #         if self.strategy.fail_fast:
-        #             rs = self.__catch_fail_fast(event, futures)
-        #         else:
-        #             rs = self.__catch_all_completed(futures)
         # NOTE: Create event for cancel executor stop running.
         event: Event = Event()
@@ -495,53 +514,65 @@ class Job(BaseModel):
             futures: list[Future] = [
                 executor.submit(
                     self.execute_strategy,
-                    strategy,
-                    params=copy.deepcopy(params),
+                    strategy=strategy,
+                    params=params,
                     event=event,
                 )
                 for strategy in self.strategy.make()
             ]
             # NOTE: Dynamic catching futures object with fail-fast flag.
-            if self.strategy.fail_fast:
-                rs: Result = self.__catch_fail_fast(event, futures)
-            else:
-                rs: Result = self.__catch_all_completed(futures)
-        return Result(
-            status=0,
-            context=rs.context,
-        )
+            return (
+                self.__catch_fail_fast(event=event, futures=futures)
+                if self.strategy.fail_fast
+                else self.__catch_all_completed(futures=futures)
+            )
-    def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
+    def __catch_fail_fast(
+        self,
+        event: Event,
+        futures: list[Future],
+        *,
+        timeout: int = 1800,
+        result_timeout: int = 60,
+    ) -> Result:
         """Job parallel pool futures catching with fail-fast mode. That will
         stop all not done futures if it receive the first exception from all
         running futures.
-        :param event: An event
+        :param event: An event manager instance that able to set stopper on the
+            observing thread/process.
         :param futures: A list of futures.
+        :param timeout: A timeout to waiting all futures complete.
+        :param result_timeout: A timeout of getting result from the future
+            instance when it was running completely.
         :rtype: Result
         """
+        rs_final: Result = Result()
         context: DictData = {}
-        # NOTE: Get results from a collection of tasks with a
-        #   timeout that has the first exception.
+        status: int = 0
+        # NOTE: Get results from a collection of tasks with a timeout that has
+        #   the first exception.
         done, not_done = wait(
-            futures, timeout=1800, return_when=FIRST_EXCEPTION
+            futures,
+            timeout=timeout,
+            return_when=FIRST_EXCEPTION,
         )
         nd: str = (
             f", the strategies do not run is {not_done}" if not_done else ""
         )
         logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
+        # NOTE: Stop all running tasks with setting the event manager and cancel
+        #   any scheduled tasks.
         if len(done) != len(futures):
-            # NOTE: Stop all running tasks
             event.set()
-            # NOTE: Cancel any scheduled tasks
             for future in futures:
                 future.cancel()
-        status: int = 0
+            del future
         for future in done:
             if future.exception():
                 status = 1
@@ -551,32 +582,36 @@ class Job(BaseModel):
                 )
             elif future.cancelled():
                 continue
-            else:
-                rs: Result = future.result(timeout=60)
-                context.update(rs.context)
-        return Result(status=status, context=context)
-    def __catch_all_completed(self, futures: list[Future]) -> Result:
+            # NOTE: Update the result context to main job context.
+            context.update(future.result(timeout=result_timeout).context)
+            del future
+        return rs_final.catch(status=status, context=context)
+    def __catch_all_completed(
+        self,
+        futures: list[Future],
+        *,
+        timeout: int = 1800,
+        result_timeout: int = 60,
+    ) -> Result:
         """Job parallel pool futures catching with all-completed mode.
         :param futures: A list of futures that want to catch all completed
             result.
+        :param timeout: A timeout to waiting all futures complete.
+        :param result_timeout: A timeout of getting result from the future
+            instance when it was running completely.
         :rtype: Result
         """
+        rs_final: Result = Result()
         context: DictData = {}
         status: int = 0
-        for future in as_completed(futures):
+        for future in as_completed(futures, timeout=timeout):
             try:
-                rs: Result = future.result(timeout=60)
-                context.update(rs.context)
-            except PickleError as err:
-                # NOTE: (WF001) I do not want to fix this issue because
-                #   it does not make sense and over-engineering with
-                #   this bug fix process.
-                raise JobException(
-                    f"PyStage that create object on locals does use "
-                    f"parallel in strategy execution;\n\t{err}"
-                ) from None
+                context.update(future.result(timeout=result_timeout).context)
             except TimeoutError:
                 status = 1
                 logger.warning(
@@ -585,14 +620,13 @@ class Job(BaseModel):
                 )
                 future.cancel()
                 time.sleep(0.1)
-                if not future.cancelled():
-                    logger.warning(
-                        f"({self.run_id}) [JOB]: Failed to cancel the task."
-                    )
-                else:
-                    logger.warning(
-                        f"({self.run_id}) [JOB]: Task canceled successfully."
-                    )
+                stmt: str = (
+                    "Failed to cancel the task."
+                    if not future.cancelled()
+                    else "Task canceled successfully."
+                )
+                logger.warning(f"({self.run_id}) [JOB]: {stmt}")
             except JobException as err:
                 status = 1
                 logger.error(
@@ -600,4 +634,6 @@ class Job(BaseModel):
                     f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
                     f"{err}"
                 )
-        return Result(status=status, context=context)
+            finally:
+                del future
+        return rs_final.catch(status=status, context=context)

ddeutil-workflow 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

ddeutil-workflow 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl