PyPI - ddeutil-workflow - Versions diffs - 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl - Mend

ddeutil-workflow 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__types.py +58 -13
ddeutil/workflow/api.py +3 -4
ddeutil/workflow/cli.py +2 -5
ddeutil/workflow/conf.py +280 -3
ddeutil/workflow/job.py +44 -25
ddeutil/workflow/log.py +5 -8
ddeutil/workflow/on.py +1 -1
ddeutil/workflow/repeat.py +2 -5
ddeutil/workflow/route.py +4 -11
ddeutil/workflow/scheduler.py +97 -64
ddeutil/workflow/stage.py +44 -30
ddeutil/workflow/utils.py +91 -266
{ddeutil_workflow-0.0.14.dist-info → ddeutil_workflow-0.0.16.dist-info}/METADATA +39 -23
ddeutil_workflow-0.0.16.dist-info/RECORD +22 -0
{ddeutil_workflow-0.0.14.dist-info → ddeutil_workflow-0.0.16.dist-info}/WHEEL +1 -1
ddeutil_workflow-0.0.14.dist-info/RECORD +0 -22
{ddeutil_workflow-0.0.14.dist-info → ddeutil_workflow-0.0.16.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.14.dist-info → ddeutil_workflow-0.0.16.dist-info}/entry_points.txt +0 -0
{ddeutil_workflow-0.0.14.dist-info → ddeutil_workflow-0.0.16.dist-info}/top_level.txt +0 -0

ddeutil/workflow/log.py CHANGED Viewed

@@ -7,20 +7,18 @@ from __future__ import annotations
 import json
 import logging
-import os
 from abc import ABC, abstractmethod
 from datetime import datetime
 from functools import lru_cache
 from pathlib import Path
 from typing import ClassVar, Optional, Union
-from ddeutil.core import str2bool
 from pydantic import BaseModel, Field
 from pydantic.functional_validators import model_validator
 from typing_extensions import Self
 from .__types import DictData
-from .utils import load_config
+from .conf import config, load_config
 @lru_cache
@@ -42,8 +40,7 @@ def get_logger(name: str):
     stream.setFormatter(formatter)
     logger.addHandler(stream)
-    debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
-    logger.setLevel(logging.DEBUG if debug else logging.INFO)
+    logger.setLevel(logging.DEBUG if config.debug else logging.INFO)
     return logger
@@ -72,7 +69,7 @@ class BaseLog(BaseModel, ABC):
         :rtype: Self
         """
-        if str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
+        if config.enable_write_log:
             self.do_before()
         return self
@@ -141,7 +138,7 @@ class FileLog(BaseLog):
             future.
         """
         # NOTE: Check environ variable was set for real writing.
-        if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
+        if not config.enable_write_log:
             return False
         # NOTE: create pointer path that use the same logic of pointer method.
@@ -171,7 +168,7 @@ class FileLog(BaseLog):
         :rtype: Self
         """
         # NOTE: Check environ variable was set for real writing.
-        if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
+        if not config.enable_write_log:
             return self
         log_file: Path = self.pointer() / f"{self.run_id}.log"

ddeutil/workflow/on.py CHANGED Viewed

@@ -15,8 +15,8 @@ from pydantic.functional_validators import field_validator, model_validator
 from typing_extensions import Self
 from .__types import DictData, DictStr, TupleStr
+from .conf import Loader
 from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
-from .utils import Loader
 __all__: TupleStr = (
     "On",

ddeutil/workflow/repeat.py CHANGED Viewed

@@ -6,14 +6,13 @@
 from __future__ import annotations
 import asyncio
-import os
 from asyncio import ensure_future
 from datetime import datetime
 from functools import wraps
-from zoneinfo import ZoneInfo
 from starlette.concurrency import run_in_threadpool
+from .conf import config
 from .cron import CronJob
 from .log import get_logger
@@ -24,9 +23,7 @@ def get_cronjob_delta(cron: str) -> float:
     """This function returns the time delta between now and the next cron
     execution time.
     """
-    now: datetime = datetime.now(
-        tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
-    )
+    now: datetime = datetime.now(tz=config.tz)
     cron = CronJob(cron)
     return (cron.schedule(now).next - now).total_seconds()

ddeutil/workflow/route.py CHANGED Viewed

@@ -6,10 +6,8 @@
 from __future__ import annotations
 import copy
-import os
 from datetime import datetime, timedelta
 from typing import Any
-from zoneinfo import ZoneInfo
 from fastapi import APIRouter, HTTPException, Request
 from fastapi import status as st
@@ -18,9 +16,10 @@ from pydantic import BaseModel
 from . import Workflow
 from .__types import DictData
+from .conf import Loader, config
 from .log import get_logger
 from .scheduler import Schedule
-from .utils import Loader, Result
+from .utils import Result
 logger = get_logger("ddeutil.workflow")
 workflow = APIRouter(
@@ -87,12 +86,7 @@ async def execute_workflow(name: str, payload: ExecutePayload) -> DictData:
     # NOTE: Start execute manually
     rs: Result = wf.execute(params=payload.params)
-    return rs.model_dump(
-        by_alias=True,
-        exclude_none=True,
-        exclude_unset=True,
-        exclude_defaults=True,
-    )
+    return dict(rs)
 @workflow.get("/{name}/logs")
@@ -172,8 +166,7 @@ async def add_deploy_scheduler(request: Request, name: str):
     request.state.scheduler.append(name)
-    tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
-    start_date: datetime = datetime.now(tz=tz)
+    start_date: datetime = datetime.now(tz=config.tz)
     start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
         second=0, microsecond=0
     )

ddeutil/workflow/scheduler.py CHANGED Viewed

@@ -3,13 +3,26 @@
 # Licensed under the MIT License. See LICENSE in the project root for
 # license information.
 # ------------------------------------------------------------------------------
+"""
+The main schedule running is ``workflow_runner`` function that trigger the
+multiprocess of ``workflow_control`` function for listing schedules on the
+config by ``Loader.finds(Schedule)``.
+    The ``workflow_control`` is the scheduler function that release 2 schedule
+functions; ``workflow_task``, and ``workflow_monitor``.
+    ``workflow_control`` --- Every minute at :02 --> ``workflow_task``
+                         --- Every 5 minutes     --> ``workflow_monitor``
+    The ``workflow_task`` will run ``task.release`` method in threading object
+for multithreading strategy. This ``release`` method will run only one crontab
+value with the on field.
+"""
 from __future__ import annotations
 import copy
 import inspect
-import json
 import logging
-import os
 import time
 from concurrent.futures import (
     Future,
@@ -43,14 +56,13 @@ except ImportError:
     CancelJob = None
 from .__types import DictData, TupleStr
-from .conf import config
+from .conf import Loader, config
 from .cron import CronRunner
 from .exceptions import JobException, WorkflowException
 from .job import Job
 from .log import FileLog, Log, get_logger
 from .on import On
 from .utils import (
-    Loader,
     Param,
     Result,
     batch,
@@ -75,7 +87,7 @@ __all__: TupleStr = (
     "Schedule",
     "ScheduleWorkflow",
     "workflow_task",
-    "workflow_long_running_task",
+    "workflow_monitor",
     "workflow_control",
     "workflow_runner",
 )
@@ -184,7 +196,7 @@ class Workflow(BaseModel):
         return data
     @model_validator(mode="before")
-    def __prepare_params(cls, values: DictData) -> DictData:
+    def __prepare_model_before__(cls, values: DictData) -> DictData:
         """Prepare the params key."""
         # NOTE: Prepare params type if it passing with only type value.
         if params := values.pop("params", {}):
@@ -199,9 +211,10 @@ class Workflow(BaseModel):
         return values
     @field_validator("desc", mode="after")
-    def ___prepare_desc(cls, value: str) -> str:
+    def __dedent_desc__(cls, value: str) -> str:
         """Prepare description string that was created on a template.
+        :param value: A description string value that want to dedent.
         :rtype: str
         """
         return dedent(value)
@@ -458,8 +471,10 @@ class Workflow(BaseModel):
         queue: list[datetime] = []
         results: list[Result] = []
-        worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
-        with ThreadPoolExecutor(max_workers=worker) as executor:
+        with ThreadPoolExecutor(
+            max_workers=config.max_poking_pool_worker,
+            thread_name_prefix="wf_poking_",
+        ) as executor:
             futures: list[Future] = []
             for on in self.on:
                 futures.append(
@@ -513,7 +528,6 @@ class Workflow(BaseModel):
                 f"workflow."
             )
-        context: DictData = {}
         logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
         # IMPORTANT:
@@ -523,7 +537,7 @@ class Workflow(BaseModel):
             job: Job = self.jobs[job_id].get_running_id(self.run_id)
             job.set_outputs(
                 job.execute(params=params).context,
-                to=context,
+                to=params,
             )
         except JobException as err:
             logger.error(
@@ -536,7 +550,7 @@ class Workflow(BaseModel):
             else:
                 raise NotImplementedError() from None
-        return Result(status=0, context=context)
+        return Result(status=0, context=params)
     def execute(
         self,
@@ -587,8 +601,14 @@ class Workflow(BaseModel):
         for job_id in self.jobs:
             jq.put(job_id)
-        # NOTE: Create result context that will pass this context to any
-        #   execution dependency.
+        # NOTE: Create data context that will pass to any job executions
+        #   on this workflow.
+        #
+        #   {
+        #       'params': <input-params>,
+        #       'jobs': {},
+        #   }
+        #
         context: DictData = self.parameterize(params)
         status: int = 0
         try:
@@ -657,15 +677,23 @@ class Workflow(BaseModel):
                 job: Job = self.jobs[job_id]
                 if any(need not in context["jobs"] for need in job.needs):
+                    job_queue.task_done()
                     job_queue.put(job_id)
                     time.sleep(0.25)
                     continue
+                # NOTE: Start workflow job execution with deep copy context data
+                #   before release.
+                #
+                #   {
+                #       'params': <input-params>,
+                #       'jobs': {},
+                #   }
                 futures.append(
                     executor.submit(
                         self.execute_job,
                         job_id,
-                        params=copy.deepcopy(context),
+                        params=context,
                     ),
                 )
@@ -677,14 +705,13 @@ class Workflow(BaseModel):
             for future in as_completed(futures, timeout=1800):
                 if err := future.exception():
-                    logger.error(f"{err}")
+                    logger.error(f"({self.run_id}) [CORE]: {err}")
                     raise WorkflowException(f"{err}")
                 try:
-                    # NOTE: Update job result to workflow result.
-                    context["jobs"].update(future.result(timeout=60).context)
+                    future.result(timeout=60)
                 except TimeoutError as err:
                     raise WorkflowException(
-                        "Get result from future was timeout"
+                        "Timeout when getting result from future"
                     ) from err
         if not_time_out_flag:
@@ -731,18 +758,21 @@ class Workflow(BaseModel):
             job_id: str = job_queue.get()
             job: Job = self.jobs[job_id]
-            # NOTE:
+            # NOTE: Waiting dependency job run successful before release.
             if any(need not in context["jobs"] for need in job.needs):
+                job_queue.task_done()
                 job_queue.put(job_id)
-                time.sleep(0.25)
+                time.sleep(0.05)
                 continue
-            # NOTE: Start workflow job execution.
-            job_rs = self.execute_job(
-                job_id=job_id,
-                params=copy.deepcopy(context),
-            )
-            context["jobs"].update(job_rs.context)
+            # NOTE: Start workflow job execution with deep copy context data
+            #   before release.
+            #
+            #   {
+            #       'params': <input-params>,
+            #       'jobs': {},
+            #   }
+            self.execute_job(job_id=job_id, params=context)
             # NOTE: Mark this job queue done.
             job_queue.task_done()
@@ -780,7 +810,7 @@ class ScheduleWorkflow(BaseModel):
     )
     @model_validator(mode="before")
-    def __prepare_values(cls, values: DictData) -> DictData:
+    def __prepare_before__(cls, values: DictData) -> DictData:
         """Prepare incoming values before validating with model fields.
         :rtype: DictData
@@ -918,9 +948,11 @@ class Schedule(BaseModel):
         return workflow_tasks
-def catch_exceptions(
-    cancel_on_failure: bool = False,
-) -> Callable[P, Optional[CancelJob]]:
+ReturnCancelJob = Callable[P, Optional[CancelJob]]
+DecoratorCancelJob = Callable[[ReturnCancelJob], ReturnCancelJob]
+def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
     """Catch exception error from scheduler job that running with schedule
     package and return CancelJob if this function raise an error.
@@ -929,9 +961,7 @@ def catch_exceptions(
     :rtype: Callable[P, Optional[CancelJob]]
     """
-    def decorator(
-        func: Callable[P, Optional[CancelJob]],
-    ) -> Callable[P, Optional[CancelJob]]:
+    def decorator(func: ReturnCancelJob) -> ReturnCancelJob:
         try:
             # NOTE: Check the function that want to handle is method or not.
             if inspect.ismethod(func):
@@ -966,8 +996,8 @@ class WorkflowTaskData:
     workflow: Workflow
     on: On
     params: DictData = field(compare=False, hash=False)
-    queue: list[datetime] = field(compare=False, hash=False)
-    running: list[datetime] = field(compare=False, hash=False)
+    queue: dict[str, list[datetime]] = field(compare=False, hash=False)
+    running: dict[str, list[datetime]] = field(compare=False, hash=False)
     @catch_exceptions(cancel_on_failure=True)
     def release(
@@ -1047,8 +1077,9 @@ class WorkflowTaskData:
             },
         }
-        # WARNING: Re-create workflow object that use new running workflow
-        #   ID.
+        # WARNING:
+        #   Re-create workflow object that use new running workflow ID.
+        #
         runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
         rs: Result = runner.execute(
             params=param2template(self.params, release_params),
@@ -1101,6 +1132,7 @@ class WorkflowTaskData:
                 self.workflow.name == other.workflow.name
                 and self.on.cronjob == other.on.cronjob
             )
+        return NotImplemented
 @catch_exceptions(cancel_on_failure=True)
@@ -1112,10 +1144,10 @@ def workflow_task(
     """Workflow task generator that create release pair of workflow and on to
     the threading in background.
-        This workflow task will start every minute at :02 second.
+        This workflow task will start every minute at ':02' second.
     :param workflow_tasks:
-    :param stop:
+    :param stop: A stop datetime object that force stop running scheduler.
     :param threads:
     :rtype: CancelJob | None
     """
@@ -1130,7 +1162,7 @@ def workflow_task(
                 "running in background."
             )
             time.sleep(15)
-            workflow_long_running_task(threads)
+            workflow_monitor(threads)
         return CancelJob
     # IMPORTANT:
@@ -1202,7 +1234,7 @@ def workflow_task(
     logger.debug(f"[WORKFLOW]: {'=' * 100}")
-def workflow_long_running_task(threads: dict[str, Thread]) -> None:
+def workflow_monitor(threads: dict[str, Thread]) -> None:
     """Workflow schedule for monitoring long running thread from the schedule
     control.
@@ -1260,30 +1292,29 @@ def workflow_control(
         sch: Schedule = Schedule.from_loader(name, externals=externals)
         workflow_tasks.extend(
             sch.tasks(
-                start_date_waiting, wf_queue, wf_running, externals=externals
+                start_date_waiting,
+                queue=wf_queue,
+                running=wf_running,
+                externals=externals,
             ),
         )
     # NOTE: This schedule job will start every minute at :02 seconds.
-    schedule.every(1).minutes.at(":02").do(
-        workflow_task,
-        workflow_tasks=workflow_tasks,
-        stop=stop
-        or (
-            start_date
-            + timedelta(
-                **json.loads(
-                    os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
-                    or '{"minutes": 5, "seconds": 20}'
-                )
-            )
-        ),
-        threads=thread_releases,
-    ).tag("control")
+    (
+        schedule.every(1)
+        .minutes.at(":02")
+        .do(
+            workflow_task,
+            workflow_tasks=workflow_tasks,
+            stop=(stop or (start_date + config.stop_boundary_delta)),
+            threads=thread_releases,
+        )
+        .tag("control")
+    )
     # NOTE: Checking zombie task with schedule job will start every 5 minute.
     schedule.every(5).minutes.at(":10").do(
-        workflow_long_running_task,
+        workflow_monitor,
         threads=thread_releases,
     ).tag("monitor")
@@ -1317,14 +1348,16 @@ def workflow_runner(
     """Workflow application that running multiprocessing schedule with chunk of
     workflows that exists in config path.
-    :param stop:
+    :param stop: A stop datetime object that force stop running scheduler.
     :param excluded:
     :param externals:
     :rtype: list[str]
         This function will get all workflows that include on value that was
-    created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
-    value to multiprocess executor pool.
+    created in config path and chuck it with application config variable
+    ``WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS`` env var to multiprocess executor
+    pool.
         The current workflow logic that split to process will be below diagram:
@@ -1341,7 +1374,7 @@ def workflow_runner(
     excluded: list[str] = excluded or []
     with ProcessPoolExecutor(
-        max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
+        max_workers=config.max_schedule_process,
     ) as executor:
         futures: list[Future] = [
             executor.submit(
@@ -1352,7 +1385,7 @@ def workflow_runner(
             )
             for loader in batch(
                 Loader.finds(Schedule, excluded=excluded),
-                n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
+                n=config.max_schedule_per_process,
             )
         ]

ddeutil-workflow 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

ddeutil-workflow 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl