PyPI - ddeutil-workflow - Versions diffs - 0.0.49__py3-none-any.whl → 0.0.51__py3-none-any.whl - Mend

ddeutil-workflow 0.0.49py3-none-any.whl → 0.0.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +8 -26
ddeutil/workflow/conf.py +11 -11
ddeutil/workflow/cron.py +46 -20
ddeutil/workflow/exceptions.py +3 -3
ddeutil/workflow/job.py +269 -145
ddeutil/workflow/logs.py +23 -19
ddeutil/workflow/params.py +56 -16
ddeutil/workflow/result.py +12 -4
ddeutil/workflow/reusables.py +4 -2
ddeutil/workflow/scheduler.py +5 -1
ddeutil/workflow/stages.py +580 -217
ddeutil/workflow/utils.py +42 -38
ddeutil/workflow/workflow.py +92 -95
{ddeutil_workflow-0.0.49.dist-info → ddeutil_workflow-0.0.51.dist-info}/METADATA +71 -14
ddeutil_workflow-0.0.51.dist-info/RECORD +31 -0
ddeutil_workflow-0.0.49.dist-info/RECORD +0 -31
{ddeutil_workflow-0.0.49.dist-info → ddeutil_workflow-0.0.51.dist-info}/WHEEL +0 -0
{ddeutil_workflow-0.0.49.dist-info → ddeutil_workflow-0.0.51.dist-info}/licenses/LICENSE +0 -0
{ddeutil_workflow-0.0.49.dist-info → ddeutil_workflow-0.0.51.dist-info}/top_level.txt +0 -0

ddeutil/workflow/utils.py CHANGED Viewed

@@ -26,9 +26,11 @@ T = TypeVar("T")
 UTC: Final[ZoneInfo] = ZoneInfo("UTC")
-def get_dt_now(
-    tz: ZoneInfo | None = None, offset: float = 0.0
-) -> datetime:  # pragma: no cov
+def replace_sec(dt: datetime) -> datetime:
+    return dt.replace(second=0, microsecond=0)
+def get_dt_now(tz: ZoneInfo | None = None, offset: float = 0.0) -> datetime:
     """Return the current datetime object.
     :param tz: A ZoneInfo object for replace timezone of return datetime object.
@@ -54,42 +56,31 @@ def get_d_now(
     return (datetime.now(tz=(tz or UTC)) - timedelta(seconds=offset)).date()
-def get_diff_sec(
-    dt: datetime, tz: ZoneInfo | None = None, offset: float = 0.0
-) -> int:  # pragma: no cov
+def get_diff_sec(dt: datetime, offset: float = 0.0) -> int:
     """Return second value that come from diff of an input datetime and the
     current datetime with specific timezone.
-    :param dt:
-    :param tz: A ZoneInfo object for replace timezone of return datetime object.
-    :param offset: An offset second value.
+    :param dt: (datetime) A datetime object that want to get different second value.
+    :param offset: (float) An offset second value.
     :rtype: int
     """
     return round(
         (
-            dt - datetime.now(tz=(tz or UTC)) - timedelta(seconds=offset)
+            dt - datetime.now(tz=dt.tzinfo) - timedelta(seconds=offset)
         ).total_seconds()
     )
-def reach_next_minute(
-    dt: datetime, tz: ZoneInfo | None = None, offset: float = 0.0
-) -> bool:
+def reach_next_minute(dt: datetime, offset: float = 0.0) -> bool:
     """Check this datetime object is not in range of minute level on the current
     datetime.
-    :param dt:
-    :param tz: A ZoneInfo object for replace timezone of return datetime object.
-    :param offset: An offset second value.
+    :param dt: (datetime) A datetime object that want to check.
+    :param offset: (float) An offset second value.
     """
     diff: float = (
-        dt.replace(second=0, microsecond=0)
-        - (
-            get_dt_now(tz=(tz or UTC), offset=offset).replace(
-                second=0, microsecond=0
-            )
-        )
+        replace_sec(dt) - replace_sec(get_dt_now(tz=dt.tzinfo, offset=offset))
     ).total_seconds()
     if diff >= 60:
         return True
@@ -106,7 +97,7 @@ def wait_to_next_minute(
     dt: datetime, second: float = 0
 ) -> None:  # pragma: no cov
     """Wait with sleep to the next minute with an offset second value."""
-    future = dt.replace(second=0, microsecond=0) + timedelta(minutes=1)
+    future: datetime = replace_sec(dt) + timedelta(minutes=1)
     time.sleep((future - dt).total_seconds() + second)
@@ -114,7 +105,7 @@ def delay(second: float = 0) -> None:  # pragma: no cov
     """Delay time that use time.sleep with random second value between
     0.00 - 0.99 seconds.
-    :param second: A second number that want to adds-on random value.
+    :param second: (float) A second number that want to adds-on random value.
     """
     time.sleep(second + randrange(0, 99, step=10) / 100)
@@ -124,32 +115,42 @@ def gen_id(
     *,
     sensitive: bool = True,
     unique: bool = False,
+    simple_mode: bool | None = None,
+    extras: DictData | None = None,
 ) -> str:
-    """Generate running ID for able to tracking. This generates process use `md5`
-    algorithm function if ``WORKFLOW_CORE_WORKFLOW_ID_SIMPLE_MODE`` set to
-    false. But it will cut this hashing value length to 10 it the setting value
-    set to true.
+    """Generate running ID for able to tracking. This generates process use
+    `md5` algorithm function if `WORKFLOW_CORE_WORKFLOW_ID_SIMPLE_MODE` set
+    to false. But it will cut this hashing value length to 10 it the setting
+    value set to true.
+    Simple Mode:
+        ... 0000 00    00  00   00     00     000000        T   0000000000
+        ... year month day hour minute second microsecond   sep simple-id
     :param value: A value that want to add to prefix before hashing with md5.
     :param sensitive: A flag that convert the value to lower case before hashing
     :param unique: A flag that add timestamp at microsecond level to value
         before hashing.
+    :param simple_mode: A flag for generate ID by simple mode.
+    :param extras: An extra parameter that use for override config value.
     :rtype: str
     """
-    from .conf import config
+    from .conf import dynamic
     if not isinstance(value, str):
         value: str = str(value)
-    if config.generate_id_simple_mode:
-        return (
-            f"{datetime.now(tz=config.tz):%Y%m%d%H%M%S%f}T" if unique else ""
-        ) + hash_str(f"{(value if sensitive else value.lower())}", n=10)
+    dt: datetime = datetime.now(tz=dynamic("tz", extras=extras))
+    if dynamic("generate_id_simple_mode", f=simple_mode, extras=extras):
+        return (f"{dt:%Y%m%d%H%M%S%f}T" if unique else "") + hash_str(
+            f"{(value if sensitive else value.lower())}", n=10
+        )
     return md5(
         (
-            (f"{datetime.now(tz=config.tz):%Y%m%d%H%M%S%f}T" if unique else "")
+            (f"{dt}T" if unique else "")
             + f"{(value if sensitive else value.lower())}"
         ).encode()
     ).hexdigest()
@@ -243,12 +244,15 @@ def cut_id(run_id: str, *, num: int = 6) -> str:
     """Cutting running ID with length.
     Example:
-        >>> cut_id(run_id='668931127320241228100331254567')
-        '254567'
+        >>> cut_id(run_id='20240101081330000000T1354680202')
+        '202401010813680202'
-    :param run_id:
+    :param run_id: A running ID That want to cut
     :param num:
     :rtype: str
     """
-    return run_id[-num:]
+    if "T" in run_id:
+        dt, simple = run_id.split("T", maxsplit=1)
+        return dt[:12] + simple[-num:]
+    return run_id[:12] + run_id[-num:]

ddeutil/workflow/workflow.py CHANGED Viewed

@@ -6,6 +6,9 @@
 # [x] Use dynamic config
 """A Workflow module that is the core module of this package. It keeps Release
 and Workflow Pydantic models.
+    I will implement timeout on the workflow execution layer only because the
+main propose of this package in Workflow model.
 """
 from __future__ import annotations
@@ -36,7 +39,7 @@ from .__cron import CronJob, CronRunner
 from .__types import DictData, TupleStr
 from .conf import Loader, SimLoad, dynamic
 from .cron import On
-from .exceptions import JobException, WorkflowException
+from .exceptions import JobException, UtilException, WorkflowException
 from .job import Job
 from .logs import Audit, get_audit
 from .params import Param
@@ -636,20 +639,20 @@ class Workflow(BaseModel):
         run_id: str | None = None,
         parent_run_id: str | None = None,
         audit: type[Audit] = None,
-        queue: ReleaseQueue | None = None,
+        queue: Optional[ReleaseQueue] = None,
         override_log_name: str | None = None,
-        result: Result | None = None,
+        result: Optional[Result] = None,
+        timeout: int = 600,
     ) -> Result:
         """Release the workflow execution with overriding parameter with the
         release templating that include logical date (release date), execution
         date, or running id to the params.
             This method allow workflow use audit object to save the execution
-        result to audit destination like file audit to the local `/logs`
-        directory.
+        result to audit destination like file audit to the local `./logs` path.
         Steps:
-            - Initialize ReleaseQueue and Release if they do not pass.
+            - Initialize Release and validate ReleaseQueue.
             - Create release data for pass to parameter templating function.
             - Execute this workflow with mapping release data to its parameters.
             - Writing result audit
@@ -658,15 +661,15 @@ class Workflow(BaseModel):
         :param release: A release datetime or Release object.
         :param params: A workflow parameter that pass to execute method.
-        :param queue: A ReleaseQueue that use for mark complete.
-        :param run_id: A workflow running ID for this release.
-        :param parent_run_id: A parent workflow running ID for this release.
+        :param run_id: (str) A workflow running ID.
+        :param parent_run_id: (str) A parent workflow running ID.
         :param audit: An audit class that want to save the execution result.
-        :param queue: A ReleaseQueue object.
-        :param override_log_name: An override logging name that use instead
-            the workflow name.
+        :param queue: (ReleaseQueue) A ReleaseQueue object.
+        :param override_log_name: (str) An override logging name that use
+            instead the workflow name.
         :param result: (Result) A result object for keeping context and status
             data.
+        :param timeout: (int) A workflow execution time out in second unit.
         :raise TypeError: If a queue parameter does not match with ReleaseQueue
             type.
@@ -683,7 +686,8 @@ class Workflow(BaseModel):
             extras=self.extras,
         )
-        if queue is not None and not isinstance(queue, ReleaseQueue):
+        # VALIDATE: check type of queue that valid with ReleaseQueue.
+        if queue and not isinstance(queue, ReleaseQueue):
             raise TypeError(
                 "The queue argument should be ReleaseQueue object only."
             )
@@ -693,36 +697,29 @@ class Workflow(BaseModel):
             release: Release = Release.from_dt(release, extras=self.extras)
         result.trace.debug(
-            f"[RELEASE]: Start release - {name!r} : "
-            f"{release.date:%Y-%m-%d %H:%M:%S}"
+            f"[RELEASE]: Start {name!r} : {release.date:%Y-%m-%d %H:%M:%S}"
         )
-        # NOTE: Release parameters that use to templating on the schedule
-        #   config data.
-        release_params: DictData = {
-            "release": {
-                "logical_date": release.date,
-                "execute_date": datetime.now(
-                    tz=dynamic("tz", extras=self.extras)
-                ),
-                "run_id": result.run_id,
-                "timezone": dynamic("tz", extras=self.extras),
-            }
-        }
-        # NOTE: Execute workflow with templating params from release mapping.
-        #   The result context that return from execution method is:
-        #
-        #   ... {"params": ..., "jobs": ...}
-        #
         self.execute(
-            params=param2template(params, release_params, extras=self.extras),
+            params=param2template(
+                params,
+                params={
+                    "release": {
+                        "logical_date": release.date,
+                        "execute_date": datetime.now(
+                            tz=dynamic("tz", extras=self.extras)
+                        ),
+                        "run_id": result.run_id,
+                        "timezone": dynamic("tz", extras=self.extras),
+                    }
+                },
+                extras=self.extras,
+            ),
             result=result,
             parent_run_id=result.parent_run_id,
+            timeout=timeout,
         )
         result.trace.debug(
-            f"[RELEASE]: End release - {name!r} : "
-            f"{release.date:%Y-%m-%d %H:%M:%S}"
+            f"[RELEASE]: End {name!r} : {release.date:%Y-%m-%d %H:%M:%S}"
         )
         # NOTE: Saving execution result to destination of the input audit
@@ -741,19 +738,10 @@ class Workflow(BaseModel):
             ).save(excluded=None)
         )
-        # NOTE: Remove this release from running.
-        if queue is not None:
+        if queue:
             queue.remove_running(release)
             queue.mark_complete(release)
-        # NOTE: Remove the params key from the result context for deduplicate.
-        #   This step is prepare result context for this release method.
-        context: DictData = result.context
-        jobs: DictData = context.pop("jobs", {})
-        errors: DictData = (
-            {"errors": context.pop("errors", {})} if "errors" in context else {}
-        )
         return result.catch(
             status=SUCCESS,
             context={
@@ -763,8 +751,7 @@ class Workflow(BaseModel):
                     "logical_date": release.date,
                     "release": release,
                 },
-                "outputs": {"jobs": jobs},
-                **errors,
+                "outputs": {"jobs": result.context.pop("jobs", {})},
             },
         )
@@ -923,15 +910,11 @@ class Workflow(BaseModel):
                 # NOTE: Pop the latest Release object from the release queue.
                 release: Release = heappop(q.queue)
-                if reach_next_minute(
-                    release.date,
-                    tz=dynamic("tz", extras=self.extras),
-                    offset=offset,
-                ):
+                if reach_next_minute(release.date, offset=offset):
                     result.trace.debug(
-                        f"[POKING]: The latest release, "
-                        f"{release.date:%Y-%m-%d %H:%M:%S}, is not able to run "
-                        f"on this minute"
+                        f"[POKING]: Latest Release, "
+                        f"{release.date:%Y-%m-%d %H:%M:%S}, can not run on "
+                        f"this time"
                     )
                     heappush(q.queue, release)
                     wait_to_next_minute(
@@ -976,7 +959,6 @@ class Workflow(BaseModel):
         *,
         result: Result | None = None,
         event: Event | None = None,
-        raise_error: bool = True,
     ) -> Result:
         """Job execution with passing dynamic parameters from the main workflow
         execution to the target job object via job's ID.
@@ -987,7 +969,6 @@ class Workflow(BaseModel):
         :raise WorkflowException: If execute with not exist job's ID.
         :raise WorkflowException: If the job execution raise JobException.
-        :raise NotImplementedError: If set raise_error argument to False.
         :param job_id: A job ID that want to execute.
         :param params: A params that was parameterized from workflow execution.
@@ -995,8 +976,6 @@ class Workflow(BaseModel):
             data.
         :param event: (Event) An event manager that pass to the
             PoolThreadExecutor.
-        :param raise_error: A flag that raise error instead catching to result
-            if it gets exception from job execution.
         :rtype: Result
         :return: Return the result object that receive the job execution result
@@ -1012,6 +991,12 @@ class Workflow(BaseModel):
                 f"workflow."
             )
+        job: Job = self.job(name=job_id)
+        if job.is_skipped(params=params):
+            result.trace.info(f"[WORKFLOW]: Skip job: {job_id!r}")
+            job.set_outputs(output={"skipped": True}, to=params)
+            return result.catch(status=SKIP, context=params)
         if event and event.is_set():  # pragma: no cov
             raise WorkflowException(
                 "Workflow job was canceled from event that had set before "
@@ -1019,31 +1004,31 @@ class Workflow(BaseModel):
             )
         try:
-            job: Job = self.jobs[job_id]
-            if job.is_skipped(params=params):
-                result.trace.info(f"[JOB]: Skip job: {job_id!r}")
-                job.set_outputs(output={"SKIP": {"skipped": True}}, to=params)
-            else:
-                result.trace.info(f"[JOB]: Start execute job: {job_id!r}")
-                job.set_outputs(
-                    job.execute(
-                        params=params,
-                        run_id=result.run_id,
-                        parent_run_id=result.parent_run_id,
-                        event=event,
-                    ).context,
-                    to=params,
-                )
-        except JobException as e:
+            result.trace.info(f"[WORKFLOW]: Execute Job: {job_id!r}")
+            rs: Result = job.execute(
+                params=params,
+                run_id=result.run_id,
+                parent_run_id=result.parent_run_id,
+                event=event,
+            )
+            job.set_outputs(rs.context, to=params)
+        except (JobException, UtilException) as e:
             result.trace.error(f"[WORKFLOW]: {e.__class__.__name__}: {e}")
-            if raise_error:
-                raise WorkflowException(
-                    f"Get job execution error {job_id}: JobException: {e}"
-                ) from None
-            raise NotImplementedError(
-                "Handle error from the job execution does not support yet."
+            raise WorkflowException(
+                f"Get job execution error {job_id}: JobException: {e}"
             ) from None
+        if rs.status == FAILED:
+            error_msg: str = (
+                f"Workflow job, {job.id}, failed without raise error."
+            )
+            return result.catch(
+                status=FAILED,
+                context={
+                    "errors": WorkflowException(error_msg).to_dict(),
+                    **params,
+                },
+            )
         return result.catch(status=SUCCESS, context=params)
     def execute(
@@ -1095,7 +1080,7 @@ class Workflow(BaseModel):
             extras=self.extras,
         )
-        result.trace.info(f"[WORKFLOW]: Start Execute: {self.name!r} ...")
+        result.trace.info(f"[WORKFLOW]: Execute: {self.name!r} ...")
         if not self.jobs:
             result.trace.warning(
                 f"[WORKFLOW]: {self.name!r} does not have any jobs"
@@ -1140,7 +1125,7 @@ class Workflow(BaseModel):
                     timeout=timeout,
                     event=event,
                 )
-        except WorkflowException as e:
+        except (WorkflowException, JobException) as e:
             status: Status = FAILED
             context.update({"errors": e.to_dict()})
@@ -1179,7 +1164,7 @@ class Workflow(BaseModel):
             "max_job_exec_timeout", f=timeout, extras=self.extras
         )
         event: Event = event or Event()
-        result.trace.debug(f"[WORKFLOW]: Run {self.name!r} with threading.")
+        result.trace.debug(f"... Run {self.name!r} with threading.")
         with ThreadPoolExecutor(
             max_workers=dynamic("max_job_parallel", extras=self.extras),
             thread_name_prefix="wf_exec_threading_",
@@ -1204,7 +1189,7 @@ class Workflow(BaseModel):
                     )
                 elif check == SKIP:  # pragma: no cov
                     result.trace.info(f"[JOB]: Skip job: {job_id!r}")
-                    job.set_outputs({"SKIP": {"skipped": True}}, to=context)
+                    job.set_outputs(output={"skipped": True}, to=context)
                     job_queue.task_done()
                     continue
@@ -1271,12 +1256,12 @@ class Workflow(BaseModel):
             "max_job_exec_timeout", f=timeout, extras=self.extras
         )
         event: Event = event or Event()
-        result.trace.debug(f"[WORKFLOW]: Run {self.name!r} with non-threading.")
+        result.trace.debug(f"... Run {self.name!r} with non-threading.")
         with ThreadPoolExecutor(
             max_workers=1,
             thread_name_prefix="wf_exec_non_threading_",
         ) as executor:
-            future: Future | None = None
+            future: Optional[Future] = None
             while not job_queue.empty() and (
                 not_timeout_flag := ((time.monotonic() - ts) < timeout)
@@ -1296,7 +1281,7 @@ class Workflow(BaseModel):
                     )
                 elif check == SKIP:  # pragma: no cov
                     result.trace.info(f"[JOB]: Skip job: {job_id!r}")
-                    job.set_outputs({"SKIP": {"skipped": True}}, to=context)
+                    job.set_outputs(output={"skipped": True}, to=context)
                     job_queue.task_done()
                     continue
@@ -1309,27 +1294,33 @@ class Workflow(BaseModel):
                         event=event,
                     )
                     time.sleep(0.025)
-                elif future.done():
+                elif future.done() or future.cancelled():
                     if e := future.exception():
                         result.trace.error(f"[WORKFLOW]: {e}")
                         raise WorkflowException(str(e))
                     future = None
                     job_queue.put(job_id)
-                elif future.running():
+                elif future.running() or "state=pending" in str(future):
                     time.sleep(0.075)
                     job_queue.put(job_id)
                 else:  # pragma: no cov
                     job_queue.put(job_id)
-                    result.trace.debug(
-                        f"Execution non-threading does not handle case: {future} "
-                        f"that not running."
+                    result.trace.warning(
+                        f"... Execution non-threading not handle: {future}."
                     )
                 job_queue.task_done()
             if not_timeout_flag:
                 job_queue.join()
+                if future:  # pragma: no cov
+                    if e := future.exception():
+                        result.trace.error(f"[WORKFLOW]: {e}")
+                        raise WorkflowException(str(e))
+                    future.result()
                 return context
             result.trace.error(
@@ -1352,6 +1343,12 @@ class WorkflowTask:
         This dataclass has the release method for itself that prepare necessary
     arguments before passing to the parent release method.
+    :param alias: (str) An alias name of Workflow model.
+    :param workflow: (Workflow) A Workflow model instance.
+    :param runner: (CronRunner)
+    :param values:
+    :param extras:
     """
     alias: str

ddeutil-workflow 0.0.49__py3-none-any.whl → 0.0.51__py3-none-any.whl

ddeutil-workflow 0.0.49py3-none-any.whl → 0.0.51py3-none-any.whl