PyPI - pydagu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pydagu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

pydagu/__init__.py +7 -0
pydagu/builder.py +508 -0
pydagu/http.py +88 -0
pydagu/models/__init__.py +59 -0
pydagu/models/base.py +12 -0
pydagu/models/dag.py +219 -0
pydagu/models/executor.py +183 -0
pydagu/models/handlers.py +30 -0
pydagu/models/infrastructure.py +71 -0
pydagu/models/notifications.py +26 -0
pydagu/models/request.py +14 -0
pydagu/models/response.py +82 -0
pydagu/models/step.py +144 -0
pydagu/models/types.py +16 -0
pydagu-0.1.0.dist-info/METADATA +196 -0
pydagu-0.1.0.dist-info/RECORD +18 -0
pydagu-0.1.0.dist-info/WHEEL +4 -0
pydagu-0.1.0.dist-info/licenses/LICENSE +21 -0

pydagu/models/dag.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""Main DAG model"""
+import re
+from typing import Self
+from pydantic import Field, field_validator, model_validator, BaseModel
+from .base import Precondition
+from .step import Step
+from .handlers import HandlerOn
+from .notifications import MailOn, SMTPConfig
+from .infrastructure import ContainerConfig, SSHConfig
+class Dag(BaseModel):
+    """Dagu DAG (Directed Acyclic Graph) definition"""
+    name: str = Field(
+        description="DAG name",
+        pattern=r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$",
+        examples=["production-etl", "daily-backup", "data-pipeline"],
+    )
+    description: str | None = Field(
+        "",
+        description="DAG description",
+        examples=["Daily ETL pipeline for production data"],
+    )
+    tags: list[str] | None = Field(
+        None,
+        description="Tags for categorization",
+        examples=[["production", "etl", "critical"]],
+    )
+    schedule: str | None = Field(
+        None,
+        description="Cron expression for scheduling",
+        pattern=r"^[\w*,/-]+\s+[\w*,/-]+\s+[\w*,/-]+\s+[\w*,/-]+\s+[\w*,/-]+(\s+[\w*,/-]+)?$",
+        examples=["0 2 * * *", "*/5 * * * *", "0 0 1 * *", "0 9-17 * * MON-FRI"],
+    )
+    # Execution settings
+    maxActiveRuns: int | None = Field(
+        None, ge=1, description="Maximum concurrent DAG runs", examples=[1, 3, 5]
+    )
+    maxActiveSteps: int | None = Field(
+        None, ge=1, description="Maximum concurrent steps", examples=[3, 5, 10]
+    )
+    timeoutSec: int | None = Field(
+        None, ge=0, description="Timeout in seconds", examples=[3600, 7200, 14400]
+    )
+    delay: int | None = Field(
+        None, ge=0, description="Delay before execution", examples=[0, 30, 60]
+    )
+    histRetentionDays: int | None = Field(
+        None, ge=0, description="History retention in days", examples=[30, 90, 365]
+    )
+    # Parameters and environment
+    params: list[str | dict[str, str]] | None = Field(
+        None,
+        description="DAG parameters",
+        examples=[[{"DATE": "`date +%Y-%m-%d`"}, {"ENVIRONMENT": "production"}]],
+    )
+    env: list[str | dict[str, str]] | None = Field(
+        None,
+        description="Environment variables",
+        examples=[[{"DATA_DIR": "/data/etl"}, {"LOG_LEVEL": "info"}]],
+    )
+    dotenv: list[str] | None = Field(
+        None,
+        description="Paths to .env files",
+        examples=[["/etc/dagu/production.env", ".env"]],
+    )
+    # Container configuration
+    container: ContainerConfig | None = Field(
+        None, description="Default container configuration"
+    )
+    # Preconditions
+    preconditions: list[Precondition] | None = Field(
+        None, description="DAG-level preconditions"
+    )
+    # Steps
+    steps: list[str | Step] = Field(
+        ...,
+        min_length=1,
+        description="DAG steps (at least one required)",
+        examples=[["./scripts/validate.sh", "python process.py"]],
+    )
+    # Handlers
+    handlerOn: HandlerOn | None = Field(None, description="Event handlers")
+    # Notifications
+    mailOn: MailOn | None = Field(None, description="Email notification triggers")
+    smtp: SMTPConfig | None = Field(None, description="SMTP configuration")
+    # SSH configuration
+    ssh: SSHConfig | None = Field(
+        None, description="SSH configuration for remote execution"
+    )
+    # Additional settings
+    logDir: str | None = Field(
+        None, description="Log directory", examples=["/var/log/dagu", "./logs"]
+    )
+    restartWaitSec: int | None = Field(
+        None, ge=0, description="Wait time before restart", examples=[10, 30, 60]
+    )
+    @field_validator("schedule")
+    @classmethod
+    def validate_cron_expression(cls, v: str | None) -> str | None:
+        """Validate cron expression format (5 or 6 fields) - permissive sanity check"""
+        if v is None:
+            return v
+        # Split the cron expression into fields
+        fields = v.split()
+        # Must have 5 or 6 fields
+        if len(fields) not in (5, 6):
+            raise ValueError(
+                f"Invalid cron expression: '{v}'. "
+                "Expected format: 'minute hour day month weekday [year]' "
+                "(e.g., '0 2 * * *' for daily at 2 AM, or '*/5 * * * *' for every 5 minutes)"
+            )
+        # Permissive patterns - just check for reasonable structure
+        # Allows: *, numbers, ranges, steps, lists, and named values
+        # This is a sanity check, not exhaustive validation
+        # Basic pattern: anything with numbers, *, /, -, , and letters (for named values)
+        basic_pattern = re.compile(r"^[\w*,/-]+$")
+        # More specific patterns for better validation
+        # Pattern that matches: *, single values, ranges, steps, lists, and combinations
+        # Examples: *, 5, 1-5, */5, 1-10/2, 1,5,10, MON-FRI
+        field_pattern = re.compile(r"^(\*|[\w-]+)(\/\d+)?$|^[\w-]+(,[\w-]+)+$")
+        # Validate each field with permissive pattern
+        for i, field in enumerate(fields):
+            # First check basic structure
+            if not basic_pattern.match(field):
+                field_names = ["minute", "hour", "day", "month", "weekday", "year"]
+                raise ValueError(
+                    f"Invalid cron expression: '{v}'. "
+                    f"Invalid {field_names[i]} field: '{field}'. "
+                    "Expected format: 'minute hour day month weekday [year]'"
+                )
+            # Then check field-specific pattern
+            if not field_pattern.match(field):
+                field_names = ["minute", "hour", "day", "month", "weekday", "year"]
+                raise ValueError(
+                    f"Invalid cron expression: '{v}'. "
+                    f"Invalid {field_names[i]} field: '{field}'. "
+                    "Expected format: 'minute hour day month weekday [year]'"
+                )
+        return v
+    @model_validator(mode="after")
+    def validate_unique_step_names(self: Self) -> Self:
+        """Validate that all named steps have unique names"""
+        step_names = []
+        for i, step in enumerate(self.steps):
+            if isinstance(step, Step) and step.name:
+                step_names.append((step.name, i))
+        # Check for duplicates
+        seen = set()
+        for name, index in step_names:
+            if name in seen:
+                raise ValueError(
+                    f"Step name must be unique. Duplicate name found: '{name}'"
+                )
+            seen.add(name)
+        return self
+    @model_validator(mode="after")
+    def validate_step_dependencies(self: Self) -> Self:
+        """Validate that all step dependencies reference defined steps"""
+        # Build a set of valid step names
+        step_names = set()
+        for i, step in enumerate(self.steps):
+            if isinstance(step, str):
+                # String steps don't have explicit names, they're auto-numbered
+                step_names.add(str(i + 1))
+            elif isinstance(step, Step):
+                if step.name:
+                    step_names.add(step.name)
+                else:
+                    # If no name, dagu auto-generates based on position
+                    step_names.add(str(i + 1))
+        # Check each step's dependencies
+        for i, step in enumerate(self.steps):
+            if isinstance(step, str):
+                continue
+            if not step.depends:
+                continue
+            # depends can be a string or list of strings
+            depends_list = (
+                [step.depends] if isinstance(step.depends, str) else step.depends
+            )
+            for dep in depends_list:
+                if dep not in step_names:
+                    step_identifier = step.name if step.name else f"step at index {i}"
+                    raise ValueError(
+                        f"Step '{step_identifier}' has invalid dependency '{dep}'. "
+                        f"Available steps: {', '.join(sorted(step_names))}"
+                    )
+        return self

pydagu/models/executor.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""Executor configuration models"""
+import json
+from typing import Any, Literal
+from pydantic import BaseModel, Field, field_validator
+class HTTPExecutorConfig(BaseModel):
+    """Configuration for HTTP executor"""
+    headers: dict[str, str] | None = Field(
+        None,
+        description="Request headers",
+        examples=[
+            {"Authorization": "Bearer token", "Content-Type": "application/json"}
+        ],
+    )
+    query: dict[str, str] | None = Field(
+        None,
+        description="URL query parameters",
+        examples=[{"page": "1", "limit": "100"}],
+    )
+    body: str | dict[str, Any] | None = Field(
+        None, description="Request body", examples=[{"name": "value", "id": 123}]
+    )
+    timeout: int | None = Field(
+        None, ge=0, description="Timeout in seconds", examples=[30, 60, 120]
+    )
+    silent: bool | None = Field(
+        None, description="Return body only without status info"
+    )
+    skipTLSVerify: bool | None = Field(
+        None, description="Skip TLS certificate verification"
+    )
+    @field_validator("body", mode="before")
+    @classmethod
+    def serialize_body_to_json(cls, v: Any) -> str | None:
+        """Convert dict body to JSON string automatically for Dagu compatibility"""
+        if v is None:
+            return None
+        if isinstance(v, dict):
+            return json.dumps(v)
+        if isinstance(v, str):
+            return v
+        # For other types, try to serialize them
+        return json.dumps(v)
+class SSHExecutorConfig(BaseModel):
+    """Configuration for SSH executor"""
+    user: str | None = Field(
+        None, description="SSH username", examples=["deploy", "admin", "ubuntu"]
+    )
+    host: str | None = Field(
+        None,
+        description="SSH host",
+        examples=["production.example.com", "192.168.1.100"],
+    )
+    port: int | None = Field(22, description="SSH port", examples=[22, 2222])
+    key: str | None = Field(
+        None,
+        description="Path to SSH private key",
+        examples=["~/.ssh/deploy_key", "/etc/ssh/id_rsa"],
+    )
+    password: str | None = Field(
+        None, description="SSH password", examples=["${SSH_PASSWORD}"]
+    )
+    strictHostKey: bool | None = Field(True, description="Strict host key checking")
+    knownHostFile: str | None = Field(
+        None, description="Path to known_hosts file", examples=["~/.ssh/known_hosts"]
+    )
+class MailExecutorConfig(BaseModel):
+    """Configuration for mail executor"""
+    to: str | list[str] | None = Field(
+        None,
+        description="Email recipient(s)",
+        examples=["data-team@example.com", ["admin@example.com", "alerts@example.com"]],
+    )
+    from_: str | None = Field(
+        None,
+        alias="from",
+        description="Email sender",
+        examples=["etl-notifications@company.com"],
+    )
+    subject: str | None = Field(
+        None,
+        description="Email subject",
+        examples=["ETL Failed - ${DATE}", "Pipeline Alert"],
+    )
+    body: str | None = Field(
+        None, description="Email body", examples=["Check logs at ${DAG_RUN_LOG_FILE}"]
+    )
+    attachLogs: bool | None = Field(None, description="Attach execution logs to email")
+    smtp: dict[str, Any] | None = Field(None, description="SMTP configuration override")
+class DockerExecutorConfig(BaseModel):
+    """Configuration for Docker executor"""
+    image: str | None = Field(
+        None,
+        description="Docker image to use",
+        examples=["postgres:16", "python:3.11-slim"],
+    )
+    container: str | None = Field(
+        None, description="Container name", examples=["etl-worker", "db-backup"]
+    )
+    pull: bool | None = Field(None, description="Pull image before running")
+    autoRemove: bool | None = Field(
+        None, description="Automatically remove container after execution"
+    )
+    env: list[str] | dict[str, str] | None = Field(
+        None,
+        description="Environment variables",
+        examples=[{"PGPASSWORD": "${DB_PASSWORD}"}, ["DEBUG=1", "LOG_LEVEL=info"]],
+    )
+    volumes: list[str] | None = Field(
+        None,
+        description="Volume mounts",
+        examples=[["./data:/data", "./scripts:/scripts:ro"]],
+    )
+    network: str | None = Field(
+        None,
+        description="Docker network",
+        examples=["bridge", "host", "custom-network"],
+    )
+    user: str | None = Field(
+        None, description="User to run as", examples=["1000:1000", "nobody"]
+    )
+    workdir: str | None = Field(
+        None, description="Working directory", examples=["/app", "/data"]
+    )
+class JQExecutorConfig(BaseModel):
+    """Configuration for jq (JSON processor) executor"""
+    query: str | None = Field(
+        None,
+        description="jq query expression",
+        examples=[".data[] | select(.active)", ".results[0].name"],
+    )
+    raw: bool | None = Field(None, description="Output raw strings, not JSON")
+    compact: bool | None = Field(None, description="Compact output")
+class ShellExecutorConfig(BaseModel):
+    """Configuration for shell executor"""
+    shell: str | None = Field(
+        None,
+        description="Shell to use (e.g., bash, sh, zsh)",
+        examples=["bash", "sh", "zsh"],
+    )
+    env: dict[str, str] | None = Field(
+        None,
+        description="Environment variables",
+        examples=[{"PATH": "/usr/local/bin:$PATH", "DEBUG": "1"}],
+    )
+class ExecutorConfig(BaseModel):
+    """Executor configuration for a step"""
+    type: Literal["docker", "http", "jq", "mail", "shell", "ssh"] = Field(
+        description="Executor type",
+        examples=["docker", "http", "ssh", "mail", "shell", "jq"],
+    )
+    config: (
+        HTTPExecutorConfig
+        | SSHExecutorConfig
+        | MailExecutorConfig
+        | DockerExecutorConfig
+        | JQExecutorConfig
+        | ShellExecutorConfig
+        | dict[str, Any]
+        | None
+    ) = Field(None, description="Executor-specific configuration")

pydagu/models/handlers.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Handler configuration models"""
+from pydantic import BaseModel, Field
+from pydagu.models.executor import ExecutorConfig
+class HandlerConfig(BaseModel):
+    """Handler configuration for DAG events"""
+    command: str | None = Field(
+        None,
+        description="Command to execute",
+        examples=[
+            "./scripts/notify-success.sh",
+            "echo 'ETL completed successfully for ${DATE}'",
+        ],
+    )
+    executor: ExecutorConfig | None = Field(
+        None, description="Executor for the handler"
+    )
+class HandlerOn(BaseModel):
+    """Handlers for different DAG lifecycle events"""
+    success: HandlerConfig | None = Field(None, description="Handler on success")
+    failure: HandlerConfig | None = Field(None, description="Handler on failure")
+    cancel: HandlerConfig | None = Field(None, description="Handler on cancel")
+    exit: HandlerConfig | None = Field(None, description="Handler on exit")

pydagu/models/infrastructure.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Infrastructure configuration models"""
+from typing import Literal
+from pydantic import BaseModel, Field
+class ContainerConfig(BaseModel):
+    """Container configuration for steps"""
+    image: str = Field(
+        description="Container image to use",
+        examples=["python:3.11-slim", "postgres:16", "alpine:latest"],
+    )
+    pullPolicy: Literal["always", "missing", "never"] | None = Field(
+        None,
+        description="Image pull policy",
+        examples=["always", "missing", "never"],
+    )
+    env: list[str] | None = Field(
+        None,
+        description="Environment variables",
+        examples=[["PYTHONUNBUFFERED=1", "DEBUG=true"]],
+    )
+    volumes: list[str] | None = Field(
+        None,
+        description="Volume mounts",
+        examples=[["./data:/data", "./scripts:/scripts:ro"]],
+    )
+class SSHConfig(BaseModel):
+    """SSH configuration for remote execution"""
+    user: str = Field(
+        description="SSH username", examples=["deploy", "admin", "ubuntu"]
+    )
+    host: str = Field(
+        description="SSH host", examples=["production.example.com", "192.168.1.100"]
+    )
+    port: int | None = Field(
+        22, description="SSH port (default: 22)", examples=[22, 2222]
+    )
+    key: str | None = Field(
+        None,
+        description="Path to SSH private key file",
+        examples=["~/.ssh/deploy_key", "/etc/ssh/id_rsa"],
+    )
+    password: str | None = Field(
+        None,
+        description="SSH password (prefer keys for security)",
+        examples=["${SSH_PASSWORD}"],
+    )
+    strictHostKey: bool | None = Field(
+        True, description="Enable strict host key checking (default: true)"
+    )
+    knownHostFile: str | None = Field(
+        "~/.ssh/known_hosts",
+        description="Path to known_hosts file",
+        examples=["~/.ssh/known_hosts", "/etc/ssh/known_hosts"],
+    )
+class LogConfig(BaseModel):
+    """Logging configuration"""
+    dir: str | None = Field(
+        None, description="Log directory", examples=["/var/log/dagu", "./logs"]
+    )
+    prefix: str | None = Field(
+        None, description="Log file prefix", examples=["dag-", "etl-"]
+    )

pydagu/models/notifications.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Notification configuration models"""
+from pydantic import BaseModel, Field
+class MailOn(BaseModel):
+    """Email notification configuration"""
+    failure: bool | None = Field(None, description="Send email on failure")
+    success: bool | None = Field(None, description="Send email on success")
+class SMTPConfig(BaseModel):
+    """SMTP configuration for email notifications"""
+    host: str = Field(
+        description="SMTP server host",
+        examples=["smtp.gmail.com", "smtp.company.com", "localhost"],
+    )
+    port: str = Field(description="SMTP server port", examples=["587", "465", "25"])
+    username: str | None = Field(
+        None, description="SMTP username", examples=["user@example.com"]
+    )
+    password: str | None = Field(
+        None, description="SMTP password", examples=["${SMTP_PASSWORD}"]
+    )

pydagu/models/request.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+Pydantic models for requests to the Dagu HTTP API.
+"""
+from pydantic import BaseModel
+class StartDagRun(BaseModel):
+    """Model for starting a DAG run via the Dagu HTTP API."""
+    params: str | None = None
+    dagRunId: str | None = None
+    dagName: str | None = None
+    singleton: bool | None = None

pydagu/models/response.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""
+Pydantic models for Dagu HTTP API responses.
+"""
+from datetime import datetime
+from pydantic import BaseModel
+from .types import EmptyStrToNone
+class DagRunId(BaseModel):
+    """Model for DAG run ID response from the Dagu HTTP API."""
+    dagRunId: str
+class DagResponseMessage(BaseModel):
+    """Model for DAG start response from the Dagu HTTP API."""
+    code: str
+    message: str
+class DagSubRun(BaseModel):
+    """
+    Model for DAG run sub-run response from the Dagu HTTP API.
+    """
+    dagRunId: str
+    name: str
+    status: int
+    statusLabel: str
+class DagNodeStep(BaseModel):
+    """
+    Model for DAG run node step response from the Dagu HTTP API.
+    """
+    name: str
+    command: str | None = None
+    run: str | None = None
+    params: str | None = None
+class DagRunNode(BaseModel):
+    """
+    Model for DAG run node response from the Dagu HTTP API.
+    """
+    step: DagNodeStep
+    status: int
+    statusLabel: str
+    startedAt: datetime | EmptyStrToNone = None
+    finishedAt: datetime | EmptyStrToNone = None
+    retryCount: int | None = None
+    stdout: str | None = None
+    stderr: str | None = None
+    subRuns: list[DagSubRun] | None = None
+class DagRunResult(BaseModel):
+    """
+    Model for DAG run result response from the Dagu HTTP API.
+    """
+    dagRunId: str
+    name: str
+    status: int
+    statusLabel: str
+    startedAt: datetime | EmptyStrToNone = None
+    finishedAt: datetime | EmptyStrToNone = None
+    params: str | None = None
+    nodes: list[DagRunNode]
+__all__ = ["DagRunId", "DagResponseMessage", "DagRunResult"]