PyPI - ddeutil-workflow - Versions diffs - 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl - Mend

ddeutil-workflow 0.0.77py3-none-any.whl → 0.0.79py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

ddeutil/workflow/__about__.py +1 -1
ddeutil/workflow/__init__.py +1 -5
ddeutil/workflow/api/routes/job.py +2 -2
ddeutil/workflow/audits.py +554 -112
ddeutil/workflow/cli.py +25 -3
ddeutil/workflow/conf.py +16 -28
ddeutil/workflow/errors.py +13 -15
ddeutil/workflow/event.py +37 -41
ddeutil/workflow/job.py +161 -92
ddeutil/workflow/params.py +172 -58
ddeutil/workflow/plugins/__init__.py +0 -0
ddeutil/workflow/plugins/providers/__init__.py +0 -0
ddeutil/workflow/plugins/providers/aws.py +908 -0
ddeutil/workflow/plugins/providers/az.py +1003 -0
ddeutil/workflow/plugins/providers/container.py +703 -0
ddeutil/workflow/plugins/providers/gcs.py +826 -0
ddeutil/workflow/result.py +35 -37
ddeutil/workflow/reusables.py +153 -96
ddeutil/workflow/stages.py +84 -60
ddeutil/workflow/traces.py +1660 -521
ddeutil/workflow/utils.py +111 -69
ddeutil/workflow/workflow.py +74 -47
{ddeutil_workflow-0.0.77.dist-info → ddeutil_workflow-0.0.79.dist-info}/METADATA +52 -20
ddeutil_workflow-0.0.79.dist-info/RECORD +36 -0
ddeutil_workflow-0.0.77.dist-info/RECORD +0 -30
{ddeutil_workflow-0.0.77.dist-info → ddeutil_workflow-0.0.79.dist-info}/WHEEL +0 -0
{ddeutil_workflow-0.0.77.dist-info → ddeutil_workflow-0.0.79.dist-info}/entry_points.txt +0 -0
{ddeutil_workflow-0.0.77.dist-info → ddeutil_workflow-0.0.79.dist-info}/licenses/LICENSE +0 -0
{ddeutil_workflow-0.0.77.dist-info → ddeutil_workflow-0.0.79.dist-info}/top_level.txt +0 -0

ddeutil/workflow/job.py CHANGED Viewed

@@ -48,10 +48,11 @@ from enum import Enum
 from functools import lru_cache
 from textwrap import dedent
 from threading import Event
-from typing import Annotated, Any, Literal, Optional, Union
+from typing import Annotated, Any, Optional, Union
 from ddeutil.core import freeze_args
 from pydantic import BaseModel, Discriminator, Field, SecretStr, Tag
+from pydantic.functional_serializers import field_serializer
 from pydantic.functional_validators import field_validator, model_validator
 from typing_extensions import Self
@@ -71,7 +72,7 @@ from .result import (
 )
 from .reusables import has_template, param2template
 from .stages import Stage
-from .traces import Trace, get_trace
+from .traces import TraceManager, get_trace
 from .utils import cross_product, filter_func, gen_id
 MatrixFilter = list[dict[str, Union[str, int]]]
@@ -248,14 +249,21 @@ class RunsOn(str, Enum):
     SELF_HOSTED = "self_hosted"
     AZ_BATCH = "azure_batch"
     AWS_BATCH = "aws_batch"
+    GCP_BATCH = "gcp_batch"
     CLOUD_BATCH = "cloud_batch"
     DOCKER = "docker"
+    CONTAINER = "container"
+# Import constants for backward compatibility
 LOCAL = RunsOn.LOCAL
 SELF_HOSTED = RunsOn.SELF_HOSTED
 AZ_BATCH = RunsOn.AZ_BATCH
+AWS_BATCH = RunsOn.AWS_BATCH
+GCP_BATCH = RunsOn.GCP_BATCH
+CLOUD_BATCH = RunsOn.CLOUD_BATCH
 DOCKER = RunsOn.DOCKER
+CONTAINER = RunsOn.CONTAINER
 class BaseRunsOn(BaseModel):  # pragma: no cov
@@ -263,24 +271,20 @@ class BaseRunsOn(BaseModel):  # pragma: no cov
     object and override execute method.
     """
-    type: RunsOn = Field(description="A runs-on type.")
+    type: RunsOn = LOCAL
     args: DictData = Field(
         default_factory=dict,
-        alias="with",
         description=(
             "An argument that pass to the runs-on execution function. This "
             "args will override by this child-model with specific args model."
         ),
+        alias="with",
     )
 class OnLocal(BaseRunsOn):  # pragma: no cov
     """Runs-on local."""
-    type: Literal[RunsOn.LOCAL] = Field(
-        default=RunsOn.LOCAL, validate_default=True
-    )
 class SelfHostedArgs(BaseModel):
     """Self-Hosted arguments."""
@@ -292,9 +296,7 @@ class SelfHostedArgs(BaseModel):
 class OnSelfHosted(BaseRunsOn):  # pragma: no cov
     """Runs-on self-hosted."""
-    type: Literal[RunsOn.SELF_HOSTED] = Field(
-        default=RunsOn.SELF_HOSTED, validate_default=True
-    )
+    type: RunsOn = SELF_HOSTED
     args: SelfHostedArgs = Field(alias="with")
@@ -310,9 +312,7 @@ class AzBatchArgs(BaseModel):
 class OnAzBatch(BaseRunsOn):  # pragma: no cov
-    type: Literal[RunsOn.AZ_BATCH] = Field(
-        default=RunsOn.AZ_BATCH, validate_default=True
-    )
+    type: RunsOn = AZ_BATCH
     args: AzBatchArgs = Field(alias="with")
@@ -331,23 +331,116 @@ class DockerArgs(BaseModel):
 class OnDocker(BaseRunsOn):  # pragma: no cov
     """Runs-on Docker container."""
-    type: Literal[RunsOn.DOCKER] = Field(
-        default=RunsOn.DOCKER, validate_default=True
+    type: RunsOn = DOCKER
+    args: DockerArgs = Field(default_factory=DockerArgs, alias="with")
+class ContainerArgs(BaseModel):
+    """Container arguments."""
+    image: str = Field(description="Docker image to use")
+    container_name: Optional[str] = Field(
+        default=None, description="Container name"
+    )
+    volumes: Optional[list[dict[str, str]]] = Field(
+        default=None, description="Volume mounts"
+    )
+    environment: Optional[dict[str, str]] = Field(
+        default=None, description="Environment variables"
+    )
+    network: Optional[dict[str, Any]] = Field(
+        default=None, description="Network configuration"
+    )
+    resources: Optional[dict[str, Any]] = Field(
+        default=None, description="Resource limits"
+    )
+    working_dir: Optional[str] = Field(
+        default="/app", description="Working directory"
+    )
+    user: Optional[str] = Field(default=None, description="User to run as")
+    command: Optional[str] = Field(
+        default=None, description="Override default command"
+    )
+    timeout: int = Field(
+        default=3600, description="Execution timeout in seconds"
+    )
+    remove: bool = Field(
+        default=True, description="Remove container after execution"
+    )
+    docker_host: Optional[str] = Field(
+        default=None, description="Docker host URL"
+    )
+class OnContainer(BaseRunsOn):  # pragma: no cov
+    """Runs-on Container."""
+    type: RunsOn = CONTAINER
+    args: ContainerArgs = Field(default_factory=ContainerArgs, alias="with")
+class AWSBatchArgs(BaseModel):
+    """AWS Batch arguments."""
+    job_queue_arn: str = Field(description="AWS Batch job queue ARN")
+    s3_bucket: str = Field(description="S3 bucket for file storage")
+    region_name: str = Field(default="us-east-1", description="AWS region")
+    aws_access_key_id: Optional[str] = Field(
+        default=None, description="AWS access key ID"
+    )
+    aws_secret_access_key: Optional[str] = Field(
+        default=None, description="AWS secret access key"
+    )
+    aws_session_token: Optional[str] = Field(
+        default=None, description="AWS session token"
+    )
+class OnAWSBatch(BaseRunsOn):  # pragma: no cov
+    """Runs-on AWS Batch."""
+    type: RunsOn = AWS_BATCH
+    args: AWSBatchArgs = Field(alias="with")
+class GCPBatchArgs(BaseModel):
+    """Google Cloud Batch arguments."""
+    project_id: str = Field(description="Google Cloud project ID")
+    region: str = Field(description="Google Cloud region")
+    gcs_bucket: str = Field(description="Google Cloud Storage bucket")
+    credentials_path: Optional[str] = Field(
+        default=None, description="Path to service account credentials"
     )
-    args: DockerArgs = Field(alias="with", default_factory=DockerArgs)
+    machine_type: str = Field(
+        default="e2-standard-4", description="Machine type"
+    )
+    max_parallel_tasks: int = Field(
+        default=1, description="Maximum parallel tasks"
+    )
+class OnGCPBatch(BaseRunsOn):  # pragma: no cov
+    """Runs-on Google Cloud Batch."""
+    type: RunsOn = GCP_BATCH
+    args: GCPBatchArgs = Field(alias="with")
 def get_discriminator_runs_on(model: dict[str, Any]) -> RunsOn:
     """Get discriminator of the RunsOn models."""
     t: str = model.get("type")
-    return RunsOn(t) if t else RunsOn.LOCAL
+    return RunsOn(t) if t else LOCAL
 RunsOnModel = Annotated[
     Union[
-        Annotated[OnSelfHosted, Tag(RunsOn.SELF_HOSTED)],
-        Annotated[OnDocker, Tag(RunsOn.DOCKER)],
-        Annotated[OnLocal, Tag(RunsOn.LOCAL)],
+        Annotated[OnSelfHosted, Tag(SELF_HOSTED)],
+        Annotated[OnDocker, Tag(DOCKER)],
+        Annotated[OnLocal, Tag(LOCAL)],
+        Annotated[OnContainer, Tag(CONTAINER)],
+        Annotated[OnAWSBatch, Tag(AWS_BATCH)],
+        Annotated[OnGCPBatch, Tag(GCP_BATCH)],
     ],
     Discriminator(get_discriminator_runs_on),
 ]
@@ -490,6 +583,11 @@ class Job(BaseModel):
         return self
+    @field_serializer("runs_on")
+    def __serialize_runs_on(self, value: RunsOnModel) -> DictData:
+        """Serialize the runs_on field."""
+        return value.model_dump(by_alias=True)
     def stage(self, stage_id: str) -> Stage:
         """Return stage instance that exists in this job via passing an input
         stage ID.
@@ -781,7 +879,7 @@ class Job(BaseModel):
         ts: float = time.monotonic()
         parent_run_id: str = run_id
         run_id: str = gen_id((self.id or "EMPTY"), unique=True)
-        trace: Trace = get_trace(
+        trace: TraceManager = get_trace(
             run_id, parent_run_id=parent_run_id, extras=self.extras
         )
         trace.info(
@@ -800,7 +898,14 @@ class Job(BaseModel):
         elif self.runs_on.type == SELF_HOSTED:  # pragma: no cov
             pass
         elif self.runs_on.type == AZ_BATCH:  # pragma: no cov
-            pass
+            from .plugins.providers.az import azure_batch_execute
+            return azure_batch_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
         elif self.runs_on.type == DOCKER:  # pragma: no cov
             return docker_execution(
                 self,
@@ -808,6 +913,33 @@ class Job(BaseModel):
                 run_id=parent_run_id,
                 event=event,
             ).make_info({"execution_time": time.monotonic() - ts})
+        elif self.runs_on.type == CONTAINER:  # pragma: no cov
+            from .plugins.providers.container import container_execute
+            return container_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
+        elif self.runs_on.type == AWS_BATCH:  # pragma: no cov
+            from .plugins.providers.aws import aws_batch_execute
+            return aws_batch_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
+        elif self.runs_on.type == GCP_BATCH:  # pragma: no cov
+            from .plugins.providers.gcs import gcp_batch_execute
+            return gcp_batch_execute(
+                self,
+                params,
+                run_id=parent_run_id,
+                event=event,
+            ).make_info({"execution_time": time.monotonic() - ts})
         trace.error(
             f"[JOB]: Execution not support runs-on: {self.runs_on.type.value!r} "
@@ -884,7 +1016,7 @@ def local_execute_strategy(
     :rtype: tuple[Status, DictData]
     """
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     if strategy:
@@ -1020,7 +1152,7 @@ def local_execute(
     ts: float = time.monotonic()
     parent_run_id: StrOrNone = run_id
     run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     context: DictData = {"status": WAIT}
@@ -1163,7 +1295,7 @@ def self_hosted_execute(
     """
     parent_run_id: StrOrNone = run_id
     run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     context: DictData = {"status": WAIT}
@@ -1226,71 +1358,8 @@ def self_hosted_execute(
     )
-def azure_batch_execute(
-    job: Job,
-    params: DictData,
-    *,
-    run_id: StrOrNone = None,
-    event: Optional[Event] = None,
-) -> Result:  # pragma: no cov
-    """Azure Batch job execution that will run all job's stages on the Azure
-    Batch Node and extract the result file to be returning context result.
-    Steps:
-        - Create a Batch account and a Batch pool.
-        - Create a Batch job and add tasks to the job. Each task represents a
-          command to run on a compute node.
-        - Specify the command to run the Python script in the task. You can use
-          the cmd /c command to run the script with the Python interpreter.
-        - Upload the Python script and any required input files to Azure Storage
-          Account.
-        - Configure the task to download the input files from Azure Storage to
-          the compute node before running the script.
-        - Monitor the job and retrieve the output files from Azure Storage.
-    References:
-        - https://docs.azure.cn/en-us/batch/tutorial-parallel-python
-    :param job:
-    :param params:
-    :param run_id:
-    :param event:
-    :rtype: Result
-    """
-    parent_run_id: StrOrNone = run_id
-    run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
-        run_id, parent_run_id=parent_run_id, extras=job.extras
-    )
-    context: DictData = {"status": WAIT}
-    trace.info("[JOB]: Start Azure Batch executor.")
-    if event and event.is_set():
-        return Result(
-            run_id=run_id,
-            parent_run_id=parent_run_id,
-            status=CANCEL,
-            context=catch(
-                context,
-                status=CANCEL,
-                updated={
-                    "errors": JobCancelError(
-                        "Execution was canceled from the event before start "
-                        "self-hosted execution."
-                    ).to_dict()
-                },
-            ),
-            extras=job.extras,
-        )
-    print(params)
-    return Result(
-        run_id=run_id,
-        parent_run_id=parent_run_id,
-        status=SUCCESS,
-        context=catch(context, status=SUCCESS),
-        extras=job.extras,
-    )
+# Azure Batch execution is now handled by the Azure Batch provider
+# See src/ddeutil/workflow/plugins/providers/az.py for implementation
 def docker_execution(
@@ -1309,7 +1378,7 @@ def docker_execution(
     """
     parent_run_id: StrOrNone = run_id
     run_id: str = gen_id((job.id or "EMPTY"), unique=True)
-    trace: Trace = get_trace(
+    trace: TraceManager = get_trace(
         run_id, parent_run_id=parent_run_id, extras=job.extras
     )
     context: DictData = {"status": WAIT}

ddeutil-workflow 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl

ddeutil-workflow 0.0.77py3-none-any.whl → 0.0.79py3-none-any.whl