PyPI - ddeutil-workflow - Versions diffs - 0.0.78__py3-none-any.whl → 0.0.80__py3-none-any.whl - Mend

ddeutil-workflow 0.0.78py3-none-any.whl → 0.0.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +2 -6
ddeutil/workflow/api/routes/job.py +2 -2
ddeutil/workflow/api/routes/logs.py +5 -5
ddeutil/workflow/api/routes/workflows.py +3 -3
ddeutil/workflow/audits.py +547 -176
ddeutil/workflow/cli.py +19 -1
ddeutil/workflow/conf.py +10 -20
ddeutil/workflow/event.py +15 -6
ddeutil/workflow/job.py +147 -74
ddeutil/workflow/params.py +172 -58
ddeutil/workflow/plugins/__init__.py +0 -0
ddeutil/workflow/plugins/providers/__init__.py +0 -0
ddeutil/workflow/plugins/providers/aws.py +908 -0
ddeutil/workflow/plugins/providers/az.py +1003 -0
ddeutil/workflow/plugins/providers/container.py +703 -0
ddeutil/workflow/plugins/providers/gcs.py +826 -0
ddeutil/workflow/result.py +6 -4
ddeutil/workflow/reusables.py +151 -95
ddeutil/workflow/stages.py +28 -28
ddeutil/workflow/traces.py +1697 -541
ddeutil/workflow/utils.py +109 -67
ddeutil/workflow/workflow.py +42 -30
{ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/METADATA +39 -19
ddeutil_workflow-0.0.80.dist-info/RECORD +36 -0
ddeutil_workflow-0.0.78.dist-info/RECORD +0 -30
{ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/WHEEL +0 -0
{ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/entry_points.txt +0 -0
{ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/licenses/LICENSE +0 -0
{ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/top_level.txt +0 -0

ddeutil/workflow/cli.py CHANGED Viewed

@@ -73,6 +73,10 @@ def init() -> None:
                   uses: tasks/say-hello-func@example
                   with:
                     name: ${{ params.name }}
+            second-job:
+                - name: "Hello Env"
+                  echo: "Start say hi with ${ WORKFLOW_DEMO_HELLO }"
         """
         ).lstrip("\n")
     )
@@ -98,8 +102,22 @@ def init() -> None:
         init_path = task_path / "__init__.py"
         init_path.write_text("from .example import hello_world_task\n")
+    dotenv_file = Path(".env")
+    mode: str = "a" if dotenv_file.exists() else "w"
+    with dotenv_file.open(mode=mode) as f:
+        f.write("\n# Workflow env vars\n")
+        f.write(
+            "WORKFLOW_DEMO_HELLO=foo\n"
+            "WORKFLOW_CORE_DEBUG_MODE=true\n"
+            "WORKFLOW_LOG_TIMEZONE=Asia/Bangkok\n"
+            "WORKFLOW_LOG_TRACE_ENABLE_WRITE=false\n"
+            "WORKFLOW_LOG_AUDIT_ENABLE_WRITE=true\n"
+        )
+    typer.echo("Starter command:")
     typer.echo(
-        "Starter command: `workflow-cli workflows execute --name=wf-example`"
+        "> `source .env && workflow-cli workflows execute --name=wf-example`"
     )

ddeutil/workflow/conf.py CHANGED Viewed

@@ -40,12 +40,12 @@ Note:
     ${VAR_NAME} syntax and provide extensive validation capabilities.
 """
 import copy
+import json
 import os
 from collections.abc import Iterator
 from functools import cached_property
 from pathlib import Path
-from typing import Final, Optional, TypeVar, Union
-from urllib.parse import ParseResult, urlparse
+from typing import Any, Final, Optional, TypeVar, Union
 from zoneinfo import ZoneInfo
 from ddeutil.core import str2bool
@@ -122,8 +122,8 @@ class Config:  # pragma: no cov
         return [r.strip() for r in regis_filter_str.split(",")]
     @property
-    def trace_url(self) -> ParseResult:
-        return urlparse(env("LOG_TRACE_URL", "file:./logs"))
+    def trace_handlers(self) -> list[dict[str, Any]]:
+        return json.loads(env("LOG_TRACE_HANDLERS", '[{"type": "console"}]'))
     @property
     def debug(self) -> bool:
@@ -155,23 +155,11 @@ class Config:  # pragma: no cov
         )
     @property
-    def log_format_file(self) -> str:
-        return env(
-            "LOG_FORMAT_FILE",
-            (
-                "{datetime} ({process:5d}, {thread:5d}) ({cut_id}) "
-                "{message:120s} ({filename}:{lineno})"
-            ),
+    def audit_conf(self) -> str:
+        return json.loads(
+            env("LOG_AUDIT_URL", '{"type": "file", "path": "./audits"}')
         )
-    @property
-    def enable_write_log(self) -> bool:
-        return str2bool(env("LOG_TRACE_ENABLE_WRITE", "false"))
-    @property
-    def audit_url(self) -> ParseResult:
-        return urlparse(env("LOG_AUDIT_URL", "file:./audits"))
     @property
     def enable_write_audit(self) -> bool:
         return str2bool(env("LOG_AUDIT_ENABLE_WRITE", "false"))
@@ -464,7 +452,9 @@ def dynamic(
     conf: Optional[T] = getattr(config, key, None) if f is None else f
     if extra is None:
         return conf
-    if not isinstance(extra, type(conf)):
+    # NOTE: Fix type checking for boolean value and int type like
+    #   `isinstance(False, int)` which return True.
+    if type(extra) is not type(conf):
         raise TypeError(
             f"Type of config {key!r} from extras: {extra!r} does not valid "
             f"as config {type(conf)}."

ddeutil/workflow/event.py CHANGED Viewed

@@ -19,6 +19,9 @@ Classes:
     Crontab: Main cron-based event scheduler.
     CrontabYear: Enhanced cron scheduler with year constraints.
     ReleaseEvent: Release-based event triggers.
+    FileEvent: File system monitoring triggers.
+    WebhookEvent: API/webhook-based triggers.
+    DatabaseEvent: Database change monitoring triggers.
     SensorEvent: Sensor-based event monitoring.
 Example:
@@ -312,11 +315,9 @@ class CrontabYear(Crontab):
         cronjob: CronJobYear instance for year-aware schedule validation and generation.
     """
-    cronjob: CronJobYear = (
-        Field(
-            description=(
-                "A Cronjob object that use for validate and generate datetime."
-            ),
+    cronjob: CronJobYear = Field(
+        description=(
+            "A Cronjob object that use for validate and generate datetime."
         ),
     )
@@ -369,7 +370,15 @@ Cron = Annotated[
 class Event(BaseModel):
-    """Event model."""
+    """Event model with comprehensive trigger support.
+    Supports multiple types of event triggers including cron scheduling,
+    file monitoring, webhooks, database changes, sensor-based triggers,
+    polling-based triggers, message queue events, stream processing events,
+    batch processing events, data quality events, API rate limiting events,
+    data lineage events, ML pipeline events, data catalog events,
+    infrastructure events, compliance events, and business events.
+    """
     schedule: list[Cron] = Field(
         default_factory=list,

ddeutil/workflow/job.py CHANGED Viewed

@@ -72,7 +72,7 @@ from .result import (
 )
 from .reusables import has_template, param2template
 from .stages import Stage
-from .traces import Trace, get_trace
+from .traces import TraceManager, get_trace
 from .utils import cross_product, filter_func, gen_id
 MatrixFilter = list[dict[str, Union[str, int]]]
@@ -249,14 +249,21 @@ class RunsOn(str, Enum):
     SELF_HOSTED = "self_hosted"
     AZ_BATCH = "azure_batch"
     AWS_BATCH = "aws_batch"
+    GCP_BATCH = "gcp_batch"
     CLOUD_BATCH = "cloud_batch"
     DOCKER = "docker"
+    CONTAINER = "container"
+# Import constants for backward compatibility
 LOCAL = RunsOn.LOCAL
 SELF_HOSTED = RunsOn.SELF_HOSTED
 AZ_BATCH = RunsOn.AZ_BATCH
+AWS_BATCH = RunsOn.AWS_BATCH
+GCP_BATCH = RunsOn.GCP_BATCH
+CLOUD_BATCH = RunsOn.CLOUD_BATCH
 DOCKER = RunsOn.DOCKER
+CONTAINER = RunsOn.CONTAINER
 class BaseRunsOn(BaseModel):  # pragma: no cov
@@ -328,6 +335,98 @@ class OnDocker(BaseRunsOn):  # pragma: no cov
     args: DockerArgs = Field(default_factory=DockerArgs, alias="with")
+class ContainerArgs(BaseModel):
+    """Container arguments."""
+    image: str = Field(description="Docker image to use")
+    container_name: Optional[str] = Field(
+        default=None, description="Container name"
+    )
+    volumes: Optional[list[dict[str, str]]] = Field(
+        default=None, description="Volume mounts"
+    )
+    environment: Optional[dict[str, str]] = Field(
+        default=None, description="Environment variables"
+    )
+    network: Optional[dict[str, Any]] = Field(
+        default=None, description="Network configuration"
+    )
+    resources: Optional[dict[str, Any]] = Field(
+        default=None, description="Resource limits"
+    )
+    working_dir: Optional[str] = Field(
+        default="/app", description="Working directory"
+    )
+    user: Optional[str] = Field(default=None, description="User to run as")
+    command: Optional[str] = Field(
+        default=None, description="Override default command"
+    )
+    timeout: int = Field(
+        default=3600, description="Execution timeout in seconds"
+    )
+    remove: bool = Field(
+        default=True, description="Remove container after execution"
+    )
+    docker_host: Optional[str] = Field(
+        default=None, description="Docker host URL"
+    )
+class OnContainer(BaseRunsOn):  # pragma: no cov
+    """Runs-on Container."""
+    type: RunsOn = CONTAINER
+    args: ContainerArgs = Field(default_factory=ContainerArgs, alias="with")
+class AWSBatchArgs(BaseModel):
+    """AWS Batch arguments."""
+    job_queue_arn: str = Field(description="AWS Batch job queue ARN")
+    s3_bucket: str = Field(description="S3 bucket for file storage")
+    region_name: str = Field(default="us-east-1", description="AWS region")
+    aws_access_key_id: Optional[str] = Field(
+        default=None, description="AWS access key ID"
+    )
+    aws_secret_access_key: Optional[str] = Field(
+        default=None, description="AWS secret access key"
+    )
+    aws_session_token: Optional[str] = Field(
+        default=None, description="AWS session token"
+    )
+class OnAWSBatch(BaseRunsOn):  # pragma: no cov
+    """Runs-on AWS Batch."""
+    type: RunsOn = AWS_BATCH
+    args: AWSBatchArgs = Field(alias="with")
+class GCPBatchArgs(BaseModel):
+    """Google Cloud Batch arguments."""
+    project_id: str = Field(description="Google Cloud project ID")
+    region: str = Field(description="Google Cloud region")
+    gcs_bucket: str = Field(description="Google Cloud Storage bucket")
+    credentials_path: Optional[str] = Field(
+        default=None, description="Path to service account credentials"
+    )
+    machine_type: str = Field(
+        default="e2-standard-4", description="Machine type"
+    )
+    max_parallel_tasks: int = Field(
+        default=1, description="Maximum parallel tasks"
+    )
+class OnGCPBatch(BaseRunsOn):  # pragma: no cov
+    """Runs-on Google Cloud Batch."""
+    type: RunsOn = GCP_BATCH
+    args: GCPBatchArgs = Field(alias="with")
 def get_discriminator_runs_on(model: dict[str, Any]) -> RunsOn:
     """Get discriminator of the RunsOn models."""
     t: str = model.get("type")
@@ -339,6 +438,9 @@ RunsOnModel = Annotated[
         Annotated[OnSelfHosted, Tag(SELF_HOSTED)],
         Annotated[OnDocker, Tag(DOCKER)],
         Annotated[OnLocal, Tag(LOCAL)],
+        Annotated[OnContainer, Tag(CONTAINER)],
+        Annotated[OnAWSBatch, Tag(AWS_BATCH)],
+        Annotated[OnGCPBatch, Tag(GCP_BATCH)],
     ],
     Discriminator(get_discriminator_runs_on),
 ]
@@ -482,7 +584,8 @@ class Job(BaseModel):
         return self
     @field_serializer("runs_on")
-    def __serialize_runs_on(self, value: RunsOnModel):
+    def __serialize_runs_on(self, value: RunsOnModel) -> DictData:
+        """Serialize the runs_on field."""
         return value.model_dump(by_alias=True)
     def stage(self, stage_id: str) -> Stage:
@@ -776,7 +879,7 @@ class Job(BaseModel):
         ts: float = time.monotonic()
         parent_run_id: str = run_id
         run_id: str = gen_id((self.id or "EMPTY"), unique=True)
-        trace: Trace = get_trace(
+        trace: TraceManager = get_trace(
             run_id, parent_run_id=parent_run_id, extras=self.extras
         )
         trace.info(
@@ -795,7 +898,14 @@ class Job(BaseModel):
         elif self.runs_on.type == SELF_HOSTED:  # pragma: no cov
             pass
         elif self.runs_on.type == AZ_BATCH:  # pragma: no cov
-            pass
+            from .plugins.providers.az import azure_batch_execute
+            return azure_batch_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
         elif self.runs_on.type == DOCKER:  # pragma: no cov
             return docker_execution(
                 self,
@@ -803,6 +913,33 @@ class Job(BaseModel):
                 run_id=parent_run_id,
                 event=event,
             ).make_info({"execution_time": time.monotonic() - ts})
+        elif self.runs_on.type == CONTAINER:  # pragma: no cov
+            from .plugins.providers.container import container_execute
+            return container_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
+        elif self.runs_on.type == AWS_BATCH:  # pragma: no cov
+            from .plugins.providers.aws import aws_batch_execute
+            return aws_batch_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
+        elif self.runs_on.type == GCP_BATCH:  # pragma: no cov
+            from .plugins.providers.gcs import gcp_batch_execute
+            return gcp_batch_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
         trace.error(
             f"[JOB]: Execution not support runs-on: {self.runs_on.type.value!r} "
@@ -879,7 +1016,7 @@ def local_execute_strategy(
     :rtype: tuple[Status, DictData]
     """
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     if strategy:
@@ -1015,7 +1152,7 @@ def local_execute(
     ts: float = time.monotonic()
     parent_run_id: StrOrNone = run_id
     run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     context: DictData = {"status": WAIT}
@@ -1158,7 +1295,7 @@ def self_hosted_execute(
     """
     parent_run_id: StrOrNone = run_id
     run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     context: DictData = {"status": WAIT}
@@ -1221,71 +1358,8 @@ def self_hosted_execute(
     )
-def azure_batch_execute(
-    job: Job,
-    params: DictData,
-    *,
-    run_id: StrOrNone = None,
-    event: Optional[Event] = None,
-) -> Result:  # pragma: no cov
-    """Azure Batch job execution that will run all job's stages on the Azure
-    Batch Node and extract the result file to be returning context result.
-    Steps:
-        - Create a Batch account and a Batch pool.
-        - Create a Batch job and add tasks to the job. Each task represents a
-          command to run on a compute node.
-        - Specify the command to run the Python script in the task. You can use
-          the cmd /c command to run the script with the Python interpreter.
-        - Upload the Python script and any required input files to Azure Storage
-          Account.
-        - Configure the task to download the input files from Azure Storage to
-          the compute node before running the script.
-        - Monitor the job and retrieve the output files from Azure Storage.
-    References:
-        - https://docs.azure.cn/en-us/batch/tutorial-parallel-python
-    :param job:
-    :param params:
-    :param run_id:
-    :param event:
-    :rtype: Result
-    """
-    parent_run_id: StrOrNone = run_id
-    run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
-        run_id, parent_run_id=parent_run_id, extras=job.extras
-    )
-    context: DictData = {"status": WAIT}
-    trace.info("[JOB]: Start Azure Batch executor.")
-    if event and event.is_set():
-        return Result(
-            run_id=run_id,
-            parent_run_id=parent_run_id,
-            status=CANCEL,
-            context=catch(
-                context,
-                status=CANCEL,
-                updated={
-                    "errors": JobCancelError(
-                        "Execution was canceled from the event before start "
-                        "self-hosted execution."
-                    ).to_dict()
-                },
-            ),
-            extras=job.extras,
-        )
-    print(params)
-    return Result(
-        run_id=run_id,
-        parent_run_id=parent_run_id,
-        status=SUCCESS,
-        context=catch(context, status=SUCCESS),
-        extras=job.extras,
-    )
+# Azure Batch execution is now handled by the Azure Batch provider
+# See src/ddeutil/workflow/plugins/providers/az.py for implementation
 def docker_execution(
@@ -1304,7 +1378,7 @@ def docker_execution(
     """
     parent_run_id: StrOrNone = run_id
     run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     context: DictData = {"status": WAIT}
@@ -1327,7 +1401,6 @@ def docker_execution(
             ),
             extras=job.extras,
         )
-    print(params)
     return Result(
         run_id=run_id,
         parent_run_id=parent_run_id,

ddeutil-workflow 0.0.78__py3-none-any.whl → 0.0.80__py3-none-any.whl

ddeutil-workflow 0.0.78py3-none-any.whl → 0.0.80py3-none-any.whl