PyPI - ddeutil-workflow - Versions diffs - 0.0.15__py3-none-any.whl → 0.0.17__py3-none-any.whl - Mend

ddeutil-workflow 0.0.15py3-none-any.whl → 0.0.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/{cron.py → __cron.py} +12 -6
ddeutil/workflow/__init__.py +1 -0
ddeutil/workflow/__types.py +18 -6
ddeutil/workflow/api.py +3 -5
ddeutil/workflow/cli.py +2 -6
ddeutil/workflow/conf.py +441 -3
ddeutil/workflow/job.py +119 -62
ddeutil/workflow/on.py +11 -8
ddeutil/workflow/repeat.py +2 -6
ddeutil/workflow/route.py +4 -12
ddeutil/workflow/scheduler.py +71 -54
ddeutil/workflow/stage.py +79 -43
ddeutil/workflow/utils.py +96 -283
{ddeutil_workflow-0.0.15.dist-info → ddeutil_workflow-0.0.17.dist-info}/METADATA +44 -25
ddeutil_workflow-0.0.17.dist-info/RECORD +21 -0
{ddeutil_workflow-0.0.15.dist-info → ddeutil_workflow-0.0.17.dist-info}/WHEEL +1 -1
ddeutil/workflow/log.py +0 -198
ddeutil_workflow-0.0.15.dist-info/RECORD +0 -22
{ddeutil_workflow-0.0.15.dist-info → ddeutil_workflow-0.0.17.dist-info}/LICENSE +0 -0
{ddeutil_workflow-0.0.15.dist-info → ddeutil_workflow-0.0.17.dist-info}/entry_points.txt +0 -0
{ddeutil_workflow-0.0.15.dist-info → ddeutil_workflow-0.0.17.dist-info}/top_level.txt +0 -0

ddeutil/workflow/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__: str = "0.0.15"
1	+ __version__: str = "0.0.17"

ddeutil/workflow/{cron.py → __cron.py} RENAMED Viewed

@@ -18,10 +18,7 @@ from ddeutil.core import (
     isinstance_check,
     must_split,
 )
-from ddeutil.core.dtutils import (
-    next_date,
-    replace_date,
-)
+from ddeutil.core.dtutils import next_date, replace_date
 WEEKDAYS: dict[str, int] = {
     "Sun": 0,
@@ -37,7 +34,7 @@ WEEKDAYS: dict[str, int] = {
 class CronYearLimit(Exception): ...
-def str2cron(value: str) -> str:
+def str2cron(value: str) -> str:  # pragma: no cov
     """Convert Special String with the @ prefix to Crontab value.
     :param value: A string value that want to convert to cron value.
@@ -69,6 +66,8 @@ def str2cron(value: str) -> str:
 @dataclass(frozen=True)
 class Unit:
+    """Unit dataclass for CronPart object."""
     name: str
     range: partial
     min: int
@@ -85,6 +84,8 @@ class Unit:
 @dataclass
 class Options:
+    """Options dataclass for config CronPart object."""
     output_weekday_names: bool = False
     output_month_names: bool = False
     output_hashes: bool = False
@@ -158,7 +159,12 @@ CRON_UNITS_YEAR: tuple[Unit, ...] = CRON_UNITS + (
 @total_ordering
 class CronPart:
-    """Part of Cron object that represent a collection of positive integers."""
+    """Part of Cron object that represent a collection of positive integers.
+    :param unit: A Unit dataclass object.
+    :param values: A crontab values that want to validate
+    :param options: A Options dataclass object.
+    """
     __slots__: tuple[str, ...] = (
         "unit",

ddeutil/workflow/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@
 # Licensed under the MIT License. See LICENSE in the project root for
 # license information.
 # ------------------------------------------------------------------------------
+from .conf import Config, FileLog, Loader
 from .exceptions import (
     JobException,
     ParamValueException,

ddeutil/workflow/__types.py CHANGED Viewed

@@ -16,7 +16,7 @@ from re import (
     Match,
     Pattern,
 )
-from typing import Any, Optional, Union
+from typing import Any, Optional, TypedDict, Union
 from typing_extensions import Self
@@ -24,8 +24,11 @@ TupleStr = tuple[str, ...]
 DictData = dict[str, Any]
 DictStr = dict[str, str]
 Matrix = dict[str, Union[list[str], list[int]]]
-MatrixInclude = list[dict[str, Union[str, int]]]
-MatrixExclude = list[dict[str, Union[str, int]]]
+class Context(TypedDict):
+    params: dict[str, Any]
+    jobs: dict[str, Any]
 @dataclass(frozen=True)
@@ -56,20 +59,24 @@ class Re:
     #   Regular expression:
     #       - Version 1:
     #         \${{\s*(?P<caller>[a-zA-Z0-9_.\s'\"\[\]\(\)\-\{}]+?)\s*(?P<post_filters>(?:\|\s*(?:[a-zA-Z0-9_]{3,}[a-zA-Z0-9_.,-\\%\s'\"[\]()\{}]+)\s*)*)}}
-    #       - Version 2 (2024-09-30):
+    #       - Version 2: (2024-09-30):
     #         \${{\s*(?P<caller>(?P<caller_prefix>(?:[a-zA-Z_-]+\.)*)(?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+))\s*(?P<post_filters>(?:\|\s*(?:[a-zA-Z0-9_]{3,}[a-zA-Z0-9_.,-\\%\s'\"[\]()\{}]+)\s*)*)}}
+    #       - Version 3: (2024-10-05):
+    #         \${{\s*(?P<caller>(?P<caller_prefix>(?:[a-zA-Z_-]+\??\.)*)(?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+\??))\s*(?P<post_filters>(?:\|\s*(?:[a-zA-Z0-9_]{3,}[a-zA-Z0-9_.,-\\%\s'\"[\]()\{}]+)\s*)*)}}
     #
     #   Examples:
     #       - ${{ params.data_dt }}
     #       - ${{ params.source.table }}
+    #       - ${{ params.datetime | fmt('%Y-%m-%d') }}
+    #       - ${{ params.source?.schema }}
     #
     __re_caller: str = r"""
         \$
         {{
             \s*
             (?P<caller>
-                (?P<caller_prefix>(?:[a-zA-Z_-]+\.)*)
-                (?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+)
+                (?P<caller_prefix>(?:[a-zA-Z_-]+\??\.)*)
+                (?P<caller_last>[a-zA-Z0-9_\-.'\"(\)[\]{}]+\??)
             )
             \s*
             (?P<post_filters>
@@ -109,5 +116,10 @@ class Re:
     @classmethod
     def finditer_caller(cls, value) -> Iterator[CallerRe]:
+        """Generate CallerRe object that create from matching object that
+        extract with re.finditer function.
+        :rtype: Iterator[CallerRe]
+        """
         for found in cls.RE_CALLER.finditer(value):
             yield CallerRe.from_regex(found)

ddeutil/workflow/api.py CHANGED Viewed

@@ -7,7 +7,6 @@ from __future__ import annotations
 import asyncio
 import contextlib
-import os
 import uuid
 from collections.abc import AsyncIterator
 from datetime import datetime, timedelta
@@ -15,7 +14,6 @@ from queue import Empty, Queue
 from threading import Thread
 from typing import TypedDict
-from ddeutil.core import str2bool
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.middleware.gzip import GZipMiddleware
@@ -23,7 +21,7 @@ from fastapi.responses import UJSONResponse
 from pydantic import BaseModel
 from .__about__ import __version__
-from .log import get_logger
+from .conf import config, get_logger
 from .repeat import repeat_at, repeat_every
 from .scheduler import WorkflowTaskData
@@ -131,12 +129,12 @@ async def message_upper(payload: Payload):
     return await get_result(request_id)
-if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")):
+if config.enable_route_workflow:
     from .route import workflow
     app.include_router(workflow)
-if str2bool(os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")):
+if config.enable_route_schedule:
     from .route import schedule
     from .scheduler import workflow_task

ddeutil/workflow/cli.py CHANGED Viewed

@@ -6,16 +6,14 @@
 from __future__ import annotations
 import json
-import os
 from datetime import datetime
 from enum import Enum
 from typing import Annotated, Optional
-from zoneinfo import ZoneInfo
 from ddeutil.core import str2list
 from typer import Argument, Option, Typer
-from .log import get_logger
+from .conf import config, get_logger
 logger = get_logger("ddeutil.workflow")
 cli: Typer = Typer()
@@ -73,9 +71,7 @@ def schedule(
     excluded: list[str] = str2list(excluded) if excluded else []
     externals: str = externals or "{}"
     if stop:
-        stop: datetime = stop.astimezone(
-            tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
-        )
+        stop: datetime = stop.astimezone(tz=config.tz)
     from .scheduler import workflow_runner

ddeutil/workflow/conf.py CHANGED Viewed

@@ -5,13 +5,31 @@
 # ------------------------------------------------------------------------------
 from __future__ import annotations
+import json
+import logging
 import os
+from abc import ABC, abstractmethod
+from collections.abc import Iterator
+from datetime import datetime, timedelta
+from functools import cached_property, lru_cache
+from pathlib import Path
+from typing import ClassVar, Optional, TypeVar, Union
 from zoneinfo import ZoneInfo
-from ddeutil.core import str2bool
+from ddeutil.core import import_string, str2bool
+from ddeutil.io import PathSearch, YamlFlResolve
 from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from pydantic.functional_validators import model_validator
+from typing_extensions import Self
+from .__types import DictData
+AnyModel = TypeVar("AnyModel", bound=BaseModel)
+AnyModelType = type[AnyModel]
 load_dotenv()
 env = os.getenv
@@ -21,25 +39,445 @@ class Config:
     """
     # NOTE: Core
+    root_path: Path = Path(os.getenv("WORKFLOW_ROOT_PATH", "."))
     tz: ZoneInfo = ZoneInfo(env("WORKFLOW_CORE_TIMEZONE", "UTC"))
+    workflow_id_simple_mode: bool = str2bool(
+        os.getenv("WORKFLOW_CORE_WORKFLOW_ID_SIMPLE_MODE", "true")
+    )
+    # NOTE: Register
+    regis_hook_str: str = os.getenv(
+        "WORKFLOW_CORE_REGISTRY", "ddeutil.workflow"
+    )
+    regis_filter_str: str = os.getenv(
+        "WORKFLOW_CORE_REGISTRY_FILTER", "ddeutil.workflow.utils"
+    )
+    # NOTE: Logging
+    debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
+    enable_write_log: bool = str2bool(
+        os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")
+    )
     # NOTE: Stage
     stage_raise_error: bool = str2bool(
-        env("WORKFLOW_CORE_STAGE_RAISE_ERROR", "true")
+        env("WORKFLOW_CORE_STAGE_RAISE_ERROR", "false")
     )
     stage_default_id: bool = str2bool(
         env("WORKFLOW_CORE_STAGE_DEFAULT_ID", "false")
     )
+    # NOTE: Job
+    job_raise_error: bool = str2bool(
+        env("WORKFLOW_CORE_JOB_RAISE_ERROR", "true")
+    )
+    job_default_id: bool = str2bool(
+        env("WORKFLOW_CORE_JOB_DEFAULT_ID", "false")
+    )
     # NOTE: Workflow
     max_job_parallel: int = int(env("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
+    max_poking_pool_worker: int = int(
+        os.getenv("WORKFLOW_CORE_MAX_NUM_POKING", "4")
+    )
+    # NOTE: Schedule App
+    max_schedule_process: int = int(env("WORKFLOW_APP_MAX_PROCESS", "2"))
+    max_schedule_per_process: int = int(
+        env("WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS", "100")
+    )
+    stop_boundary_delta_str: str = env(
+        "WORKFLOW_APP_STOP_BOUNDARY_DELTA", '{"minutes": 5, "seconds": 20}'
+    )
+    # NOTE: API
+    enable_route_workflow: bool = str2bool(
+        os.getenv("WORKFLOW_API_ENABLE_ROUTE_WORKFLOW", "true")
+    )
+    enable_route_schedule: bool = str2bool(
+        os.getenv("WORKFLOW_API_ENABLE_ROUTE_SCHEDULE", "true")
+    )
     def __init__(self):
         if self.max_job_parallel < 0:
             raise ValueError(
-                f"MAX_JOB_PARALLEL should more than 0 but got "
+                f"``MAX_JOB_PARALLEL`` should more than 0 but got "
                 f"{self.max_job_parallel}."
             )
+        try:
+            self.stop_boundary_delta: timedelta = timedelta(
+                **json.loads(self.stop_boundary_delta_str)
+            )
+        except Exception as err:
+            raise ValueError(
+                "Config ``WORKFLOW_APP_STOP_BOUNDARY_DELTA`` can not parsing to"
+                f"timedelta with {self.stop_boundary_delta_str}."
+            ) from err
+    def refresh_dotenv(self) -> Self:
+        """Reload environment variables from the current stage."""
+        self.tz: ZoneInfo = ZoneInfo(env("WORKFLOW_CORE_TIMEZONE", "UTC"))
+        self.stage_raise_error: bool = str2bool(
+            env("WORKFLOW_CORE_STAGE_RAISE_ERROR", "false")
+        )
+        return self
+    @property
+    def conf_path(self) -> Path:
+        """Config path that use root_path class argument for this construction.
+        :rtype: Path
+        """
+        return self.root_path / os.getenv("WORKFLOW_CORE_PATH_CONF", "conf")
+    @property
+    def regis_hook(self) -> list[str]:
+        return [r.strip() for r in self.regis_hook_str.split(",")]
+    @property
+    def regis_filter(self) -> list[str]:
+        return [r.strip() for r in self.regis_filter_str.split(",")]
+class SimLoad:
+    """Simple Load Object that will search config data by given some identity
+    value like name of workflow or on.
+    :param name: A name of config data that will read by Yaml Loader object.
+    :param conf: A Params model object.
+    :param externals: An external parameters
+    Noted:
+        The config data should have ``type`` key for modeling validation that
+    make this loader know what is config should to do pass to.
+        ... <identity-key>:
+        ...     type: <importable-object>
+        ...     <key-data>: <value-data>
+        ...     ...
+    """
+    def __init__(
+        self,
+        name: str,
+        conf: Config,
+        externals: DictData | None = None,
+    ) -> None:
+        self.data: DictData = {}
+        for file in PathSearch(conf.conf_path).files:
+            if data := self.filter_suffix(file, name):
+                self.data = data
+        # VALIDATE: check the data that reading should not empty.
+        if not self.data:
+            raise ValueError(f"Config {name!r} does not found on conf path")
+        self.conf: Config = conf
+        self.externals: DictData = externals or {}
+        self.data.update(self.externals)
+    @classmethod
+    def finds(
+        cls,
+        obj: object,
+        conf: Config,
+        *,
+        include: list[str] | None = None,
+        exclude: list[str] | None = None,
+    ) -> Iterator[tuple[str, DictData]]:
+        """Find all data that match with object type in config path. This class
+        method can use include and exclude list of identity name for filter and
+        adds-on.
+        :param obj: A object that want to validate matching before return.
+        :param conf: A config object.
+        :param include:
+        :param exclude:
+        :rtype: Iterator[tuple[str, DictData]]
+        """
+        exclude: list[str] = exclude or []
+        for file in PathSearch(conf.conf_path).files:
+            for key, data in cls.filter_suffix(file).items():
+                if key in exclude:
+                    continue
+                if issubclass(get_type(data["type"], conf), obj):
+                    yield key, (
+                        {k: data[k] for k in data if k in include}
+                        if include
+                        else data
+                    )
+    @classmethod
+    def filter_suffix(cls, file: Path, name: str | None = None) -> DictData:
+        if any(file.suffix.endswith(s) for s in (".yml", ".yaml")):
+            values: DictData = YamlFlResolve(file).read()
+            return values.get(name, {}) if name else values
+        return {}
+    @cached_property
+    def type(self) -> AnyModelType:
+        """Return object of string type which implement on any registry. The
+        object type.
+        :rtype: AnyModelType
+        """
+        if _typ := self.data.get("type"):
+            return get_type(_typ, self.conf)
+        raise ValueError(
+            f"the 'type' value: {_typ} does not exists in config data."
+        )
+class Loader(SimLoad):
+    """Loader Object that get the config `yaml` file from current path.
+    :param name: A name of config data that will read by Yaml Loader object.
+    :param externals: An external parameters
+    """
+    @classmethod
+    def finds(
+        cls,
+        obj: object,
+        *,
+        include: list[str] | None = None,
+        exclude: list[str] | None = None,
+        **kwargs,
+    ) -> DictData:
+        """Override the find class method from the Simple Loader object.
+        :param obj: A object that want to validate matching before return.
+        :param include:
+        :param exclude:
+        """
+        return super().finds(
+            obj=obj, conf=Config(), include=include, exclude=exclude
+        )
+    def __init__(self, name: str, externals: DictData) -> None:
+        super().__init__(name, conf=Config(), externals=externals)
+def get_type(t: str, params: Config) -> AnyModelType:
+    """Return import type from string importable value in the type key.
+    :param t: A importable type string.
+    :param params: A config parameters that use registry to search this
+        type.
+    :rtype: AnyModelType
+    """
+    try:
+        # NOTE: Auto adding module prefix if it does not set
+        return import_string(f"ddeutil.workflow.{t}")
+    except ModuleNotFoundError:
+        for registry in params.regis_hook:
+            try:
+                return import_string(f"{registry}.{t}")
+            except ModuleNotFoundError:
+                continue
+        return import_string(f"{t}")
 config = Config()
+@lru_cache
+def get_logger(name: str):
+    """Return logger object with an input module name.
+    :param name: A module name that want to log.
+    """
+    logger = logging.getLogger(name)
+    formatter = logging.Formatter(
+        fmt=(
+            "%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, "
+            "%(thread)-5d) [%(levelname)-7s] %(message)-120s "
+            "(%(filename)s:%(lineno)s)"
+        ),
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    stream = logging.StreamHandler()
+    stream.setFormatter(formatter)
+    logger.addHandler(stream)
+    logger.setLevel(logging.DEBUG if config.debug else logging.INFO)
+    return logger
+class BaseLog(BaseModel, ABC):
+    """Base Log Pydantic Model with abstraction class property that implement
+    only model fields. This model should to use with inherit to logging
+    sub-class like file, sqlite, etc.
+    """
+    name: str = Field(description="A workflow name.")
+    on: str = Field(description="A cronjob string of this piepline schedule.")
+    release: datetime = Field(description="A release datetime.")
+    context: DictData = Field(
+        default_factory=dict,
+        description=(
+            "A context data that receive from a workflow execution result.",
+        ),
+    )
+    parent_run_id: Optional[str] = Field(default=None)
+    run_id: str
+    update: datetime = Field(default_factory=datetime.now)
+    @model_validator(mode="after")
+    def __model_action(self) -> Self:
+        """Do before the Log action with WORKFLOW_LOG_ENABLE_WRITE env variable.
+        :rtype: Self
+        """
+        if config.enable_write_log:
+            self.do_before()
+        return self
+    def do_before(self) -> None:  # pragma: no cov
+        """To something before end up of initial log model."""
+    @abstractmethod
+    def save(self, excluded: list[str] | None) -> None:  # pragma: no cov
+        """Save this model logging to target logging store."""
+        raise NotImplementedError("Log should implement ``save`` method.")
+class FileLog(BaseLog):
+    """File Log Pydantic Model that use to saving log data from result of
+    workflow execution. It inherit from BaseLog model that implement the
+    ``self.save`` method for file.
+    """
+    filename_fmt: ClassVar[str] = (
+        "./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
+    )
+    def do_before(self) -> None:
+        """Create directory of release before saving log file."""
+        self.pointer().mkdir(parents=True, exist_ok=True)
+    @classmethod
+    def find_logs(cls, name: str) -> Iterator[Self]:
+        """Generate the logging data that found from logs path with specific a
+        workflow name.
+        :param name: A workflow name that want to search release logging data.
+        """
+        pointer: Path = config.root_path / f"./logs/workflow={name}"
+        if not pointer.exists():
+            raise FileNotFoundError(
+                f"Pointer: ./logs/workflow={name} does not found."
+            )
+        for file in pointer.glob("./release=*/*.log"):
+            with file.open(mode="r", encoding="utf-8") as f:
+                yield cls.model_validate(obj=json.load(f))
+    @classmethod
+    def find_log_latest(
+        cls,
+        name: str,
+        release: datetime | None = None,
+    ) -> Self:
+        """Return the logging data that found from logs path with specific
+        workflow name and release values. If a release does not pass to an input
+        argument, it will return the latest release from the current log path.
+        :raise FileNotFoundError:
+        :raise NotImplementedError:
+        :rtype: Self
+        """
+        if release is None:
+            raise NotImplementedError("Find latest log does not implement yet.")
+        pointer: Path = (
+            config.root_path
+            / f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
+        )
+        if not pointer.exists():
+            raise FileNotFoundError(
+                f"Pointer: ./logs/workflow={name}/"
+                f"release={release:%Y%m%d%H%M%S} does not found."
+            )
+        with max(pointer.glob("./*.log"), key=os.path.getctime).open(
+            mode="r", encoding="utf-8"
+        ) as f:
+            return cls.model_validate(obj=json.load(f))
+    @classmethod
+    def is_pointed(
+        cls,
+        name: str,
+        release: datetime,
+        *,
+        queue: list[datetime] | None = None,
+    ) -> bool:
+        """Check this log already point in the destination.
+        :param name: A workflow name.
+        :param release: A release datetime.
+        :param queue: A list of queue of datetime that already run in the
+            future.
+        """
+        # NOTE: Check environ variable was set for real writing.
+        if not config.enable_write_log:
+            return False
+        # NOTE: create pointer path that use the same logic of pointer method.
+        pointer: Path = config.root_path / cls.filename_fmt.format(
+            name=name, release=release
+        )
+        if not queue:
+            return pointer.exists()
+        return pointer.exists() or (release in queue)
+    def pointer(self) -> Path:
+        """Return release directory path that was generated from model data.
+        :rtype: Path
+        """
+        return config.root_path / self.filename_fmt.format(
+            name=self.name, release=self.release
+        )
+    def save(self, excluded: list[str] | None) -> Self:
+        """Save logging data that receive a context data from a workflow
+        execution result.
+        :param excluded: An excluded list of key name that want to pass in the
+            model_dump method.
+        :rtype: Self
+        """
+        # NOTE: Check environ variable was set for real writing.
+        if not config.enable_write_log:
+            return self
+        log_file: Path = self.pointer() / f"{self.run_id}.log"
+        log_file.write_text(
+            json.dumps(
+                self.model_dump(exclude=excluded),
+                default=str,
+                indent=2,
+            ),
+            encoding="utf-8",
+        )
+        return self
+class SQLiteLog(BaseLog):  # pragma: no cov
+    def save(self, excluded: list[str] | None) -> None:
+        raise NotImplementedError("SQLiteLog does not implement yet.")
+Log = Union[
+    FileLog,
+    SQLiteLog,
+]

ddeutil-workflow 0.0.15__py3-none-any.whl → 0.0.17__py3-none-any.whl

ddeutil-workflow 0.0.15py3-none-any.whl → 0.0.17py3-none-any.whl