pydagu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydagu/models/dag.py ADDED
@@ -0,0 +1,219 @@
1
+ """Main DAG model"""
2
+
3
+ import re
4
+ from typing import Self
5
+
6
+ from pydantic import Field, field_validator, model_validator, BaseModel
7
+
8
+ from .base import Precondition
9
+ from .step import Step
10
+ from .handlers import HandlerOn
11
+ from .notifications import MailOn, SMTPConfig
12
+ from .infrastructure import ContainerConfig, SSHConfig
13
+
14
+
15
+ class Dag(BaseModel):
16
+ """Dagu DAG (Directed Acyclic Graph) definition"""
17
+
18
+ name: str = Field(
19
+ description="DAG name",
20
+ pattern=r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$",
21
+ examples=["production-etl", "daily-backup", "data-pipeline"],
22
+ )
23
+ description: str | None = Field(
24
+ "",
25
+ description="DAG description",
26
+ examples=["Daily ETL pipeline for production data"],
27
+ )
28
+ tags: list[str] | None = Field(
29
+ None,
30
+ description="Tags for categorization",
31
+ examples=[["production", "etl", "critical"]],
32
+ )
33
+ schedule: str | None = Field(
34
+ None,
35
+ description="Cron expression for scheduling",
36
+ pattern=r"^[\w*,/-]+\s+[\w*,/-]+\s+[\w*,/-]+\s+[\w*,/-]+\s+[\w*,/-]+(\s+[\w*,/-]+)?$",
37
+ examples=["0 2 * * *", "*/5 * * * *", "0 0 1 * *", "0 9-17 * * MON-FRI"],
38
+ )
39
+
40
+ # Execution settings
41
+ maxActiveRuns: int | None = Field(
42
+ None, ge=1, description="Maximum concurrent DAG runs", examples=[1, 3, 5]
43
+ )
44
+ maxActiveSteps: int | None = Field(
45
+ None, ge=1, description="Maximum concurrent steps", examples=[3, 5, 10]
46
+ )
47
+ timeoutSec: int | None = Field(
48
+ None, ge=0, description="Timeout in seconds", examples=[3600, 7200, 14400]
49
+ )
50
+ delay: int | None = Field(
51
+ None, ge=0, description="Delay before execution", examples=[0, 30, 60]
52
+ )
53
+ histRetentionDays: int | None = Field(
54
+ None, ge=0, description="History retention in days", examples=[30, 90, 365]
55
+ )
56
+
57
+ # Parameters and environment
58
+ params: list[str | dict[str, str]] | None = Field(
59
+ None,
60
+ description="DAG parameters",
61
+ examples=[[{"DATE": "`date +%Y-%m-%d`"}, {"ENVIRONMENT": "production"}]],
62
+ )
63
+ env: list[str | dict[str, str]] | None = Field(
64
+ None,
65
+ description="Environment variables",
66
+ examples=[[{"DATA_DIR": "/data/etl"}, {"LOG_LEVEL": "info"}]],
67
+ )
68
+ dotenv: list[str] | None = Field(
69
+ None,
70
+ description="Paths to .env files",
71
+ examples=[["/etc/dagu/production.env", ".env"]],
72
+ )
73
+
74
+ # Container configuration
75
+ container: ContainerConfig | None = Field(
76
+ None, description="Default container configuration"
77
+ )
78
+
79
+ # Preconditions
80
+ preconditions: list[Precondition] | None = Field(
81
+ None, description="DAG-level preconditions"
82
+ )
83
+
84
+ # Steps
85
+ steps: list[str | Step] = Field(
86
+ ...,
87
+ min_length=1,
88
+ description="DAG steps (at least one required)",
89
+ examples=[["./scripts/validate.sh", "python process.py"]],
90
+ )
91
+
92
+ # Handlers
93
+ handlerOn: HandlerOn | None = Field(None, description="Event handlers")
94
+
95
+ # Notifications
96
+ mailOn: MailOn | None = Field(None, description="Email notification triggers")
97
+ smtp: SMTPConfig | None = Field(None, description="SMTP configuration")
98
+
99
+ # SSH configuration
100
+ ssh: SSHConfig | None = Field(
101
+ None, description="SSH configuration for remote execution"
102
+ )
103
+
104
+ # Additional settings
105
+ logDir: str | None = Field(
106
+ None, description="Log directory", examples=["/var/log/dagu", "./logs"]
107
+ )
108
+ restartWaitSec: int | None = Field(
109
+ None, ge=0, description="Wait time before restart", examples=[10, 30, 60]
110
+ )
111
+
112
+ @field_validator("schedule")
113
+ @classmethod
114
+ def validate_cron_expression(cls, v: str | None) -> str | None:
115
+ """Validate cron expression format (5 or 6 fields) - permissive sanity check"""
116
+ if v is None:
117
+ return v
118
+
119
+ # Split the cron expression into fields
120
+ fields = v.split()
121
+
122
+ # Must have 5 or 6 fields
123
+ if len(fields) not in (5, 6):
124
+ raise ValueError(
125
+ f"Invalid cron expression: '{v}'. "
126
+ "Expected format: 'minute hour day month weekday [year]' "
127
+ "(e.g., '0 2 * * *' for daily at 2 AM, or '*/5 * * * *' for every 5 minutes)"
128
+ )
129
+
130
+ # Permissive patterns - just check for reasonable structure
131
+ # Allows: *, numbers, ranges, steps, lists, and named values
132
+ # This is a sanity check, not exhaustive validation
133
+
134
+ # Basic pattern: anything with numbers, *, /, -, , and letters (for named values)
135
+ basic_pattern = re.compile(r"^[\w*,/-]+$")
136
+
137
+ # More specific patterns for better validation
138
+ # Pattern that matches: *, single values, ranges, steps, lists, and combinations
139
+ # Examples: *, 5, 1-5, */5, 1-10/2, 1,5,10, MON-FRI
140
+ field_pattern = re.compile(r"^(\*|[\w-]+)(\/\d+)?$|^[\w-]+(,[\w-]+)+$")
141
+
142
+ # Validate each field with permissive pattern
143
+ for i, field in enumerate(fields):
144
+ # First check basic structure
145
+ if not basic_pattern.match(field):
146
+ field_names = ["minute", "hour", "day", "month", "weekday", "year"]
147
+ raise ValueError(
148
+ f"Invalid cron expression: '{v}'. "
149
+ f"Invalid {field_names[i]} field: '{field}'. "
150
+ "Expected format: 'minute hour day month weekday [year]'"
151
+ )
152
+ # Then check field-specific pattern
153
+ if not field_pattern.match(field):
154
+ field_names = ["minute", "hour", "day", "month", "weekday", "year"]
155
+ raise ValueError(
156
+ f"Invalid cron expression: '{v}'. "
157
+ f"Invalid {field_names[i]} field: '{field}'. "
158
+ "Expected format: 'minute hour day month weekday [year]'"
159
+ )
160
+
161
+ return v
162
+
163
+ @model_validator(mode="after")
164
+ def validate_unique_step_names(self: Self) -> Self:
165
+ """Validate that all named steps have unique names"""
166
+ step_names = []
167
+ for i, step in enumerate(self.steps):
168
+ if isinstance(step, Step) and step.name:
169
+ step_names.append((step.name, i))
170
+
171
+ # Check for duplicates
172
+ seen = set()
173
+ for name, index in step_names:
174
+ if name in seen:
175
+ raise ValueError(
176
+ f"Step name must be unique. Duplicate name found: '{name}'"
177
+ )
178
+ seen.add(name)
179
+
180
+ return self
181
+
182
+ @model_validator(mode="after")
183
+ def validate_step_dependencies(self: Self) -> Self:
184
+ """Validate that all step dependencies reference defined steps"""
185
+ # Build a set of valid step names
186
+ step_names = set()
187
+ for i, step in enumerate(self.steps):
188
+ if isinstance(step, str):
189
+ # String steps don't have explicit names, they're auto-numbered
190
+ step_names.add(str(i + 1))
191
+ elif isinstance(step, Step):
192
+ if step.name:
193
+ step_names.add(step.name)
194
+ else:
195
+ # If no name, dagu auto-generates based on position
196
+ step_names.add(str(i + 1))
197
+
198
+ # Check each step's dependencies
199
+ for i, step in enumerate(self.steps):
200
+ if isinstance(step, str):
201
+ continue
202
+
203
+ if not step.depends:
204
+ continue
205
+
206
+ # depends can be a string or list of strings
207
+ depends_list = (
208
+ [step.depends] if isinstance(step.depends, str) else step.depends
209
+ )
210
+
211
+ for dep in depends_list:
212
+ if dep not in step_names:
213
+ step_identifier = step.name if step.name else f"step at index {i}"
214
+ raise ValueError(
215
+ f"Step '{step_identifier}' has invalid dependency '{dep}'. "
216
+ f"Available steps: {', '.join(sorted(step_names))}"
217
+ )
218
+
219
+ return self
@@ -0,0 +1,183 @@
1
+ """Executor configuration models"""
2
+
3
+ import json
4
+ from typing import Any, Literal
5
+ from pydantic import BaseModel, Field, field_validator
6
+
7
+
8
+ class HTTPExecutorConfig(BaseModel):
9
+ """Configuration for HTTP executor"""
10
+
11
+ headers: dict[str, str] | None = Field(
12
+ None,
13
+ description="Request headers",
14
+ examples=[
15
+ {"Authorization": "Bearer token", "Content-Type": "application/json"}
16
+ ],
17
+ )
18
+ query: dict[str, str] | None = Field(
19
+ None,
20
+ description="URL query parameters",
21
+ examples=[{"page": "1", "limit": "100"}],
22
+ )
23
+ body: str | dict[str, Any] | None = Field(
24
+ None, description="Request body", examples=[{"name": "value", "id": 123}]
25
+ )
26
+ timeout: int | None = Field(
27
+ None, ge=0, description="Timeout in seconds", examples=[30, 60, 120]
28
+ )
29
+ silent: bool | None = Field(
30
+ None, description="Return body only without status info"
31
+ )
32
+ skipTLSVerify: bool | None = Field(
33
+ None, description="Skip TLS certificate verification"
34
+ )
35
+
36
+ @field_validator("body", mode="before")
37
+ @classmethod
38
+ def serialize_body_to_json(cls, v: Any) -> str | None:
39
+ """Convert dict body to JSON string automatically for Dagu compatibility"""
40
+ if v is None:
41
+ return None
42
+ if isinstance(v, dict):
43
+ return json.dumps(v)
44
+ if isinstance(v, str):
45
+ return v
46
+ # For other types, try to serialize them
47
+ return json.dumps(v)
48
+
49
+
50
+ class SSHExecutorConfig(BaseModel):
51
+ """Configuration for SSH executor"""
52
+
53
+ user: str | None = Field(
54
+ None, description="SSH username", examples=["deploy", "admin", "ubuntu"]
55
+ )
56
+ host: str | None = Field(
57
+ None,
58
+ description="SSH host",
59
+ examples=["production.example.com", "192.168.1.100"],
60
+ )
61
+ port: int | None = Field(22, description="SSH port", examples=[22, 2222])
62
+ key: str | None = Field(
63
+ None,
64
+ description="Path to SSH private key",
65
+ examples=["~/.ssh/deploy_key", "/etc/ssh/id_rsa"],
66
+ )
67
+ password: str | None = Field(
68
+ None, description="SSH password", examples=["${SSH_PASSWORD}"]
69
+ )
70
+ strictHostKey: bool | None = Field(True, description="Strict host key checking")
71
+ knownHostFile: str | None = Field(
72
+ None, description="Path to known_hosts file", examples=["~/.ssh/known_hosts"]
73
+ )
74
+
75
+
76
+ class MailExecutorConfig(BaseModel):
77
+ """Configuration for mail executor"""
78
+
79
+ to: str | list[str] | None = Field(
80
+ None,
81
+ description="Email recipient(s)",
82
+ examples=["data-team@example.com", ["admin@example.com", "alerts@example.com"]],
83
+ )
84
+ from_: str | None = Field(
85
+ None,
86
+ alias="from",
87
+ description="Email sender",
88
+ examples=["etl-notifications@company.com"],
89
+ )
90
+ subject: str | None = Field(
91
+ None,
92
+ description="Email subject",
93
+ examples=["ETL Failed - ${DATE}", "Pipeline Alert"],
94
+ )
95
+ body: str | None = Field(
96
+ None, description="Email body", examples=["Check logs at ${DAG_RUN_LOG_FILE}"]
97
+ )
98
+ attachLogs: bool | None = Field(None, description="Attach execution logs to email")
99
+ smtp: dict[str, Any] | None = Field(None, description="SMTP configuration override")
100
+
101
+
102
+ class DockerExecutorConfig(BaseModel):
103
+ """Configuration for Docker executor"""
104
+
105
+ image: str | None = Field(
106
+ None,
107
+ description="Docker image to use",
108
+ examples=["postgres:16", "python:3.11-slim"],
109
+ )
110
+ container: str | None = Field(
111
+ None, description="Container name", examples=["etl-worker", "db-backup"]
112
+ )
113
+ pull: bool | None = Field(None, description="Pull image before running")
114
+ autoRemove: bool | None = Field(
115
+ None, description="Automatically remove container after execution"
116
+ )
117
+ env: list[str] | dict[str, str] | None = Field(
118
+ None,
119
+ description="Environment variables",
120
+ examples=[{"PGPASSWORD": "${DB_PASSWORD}"}, ["DEBUG=1", "LOG_LEVEL=info"]],
121
+ )
122
+ volumes: list[str] | None = Field(
123
+ None,
124
+ description="Volume mounts",
125
+ examples=[["./data:/data", "./scripts:/scripts:ro"]],
126
+ )
127
+ network: str | None = Field(
128
+ None,
129
+ description="Docker network",
130
+ examples=["bridge", "host", "custom-network"],
131
+ )
132
+ user: str | None = Field(
133
+ None, description="User to run as", examples=["1000:1000", "nobody"]
134
+ )
135
+ workdir: str | None = Field(
136
+ None, description="Working directory", examples=["/app", "/data"]
137
+ )
138
+
139
+
140
+ class JQExecutorConfig(BaseModel):
141
+ """Configuration for jq (JSON processor) executor"""
142
+
143
+ query: str | None = Field(
144
+ None,
145
+ description="jq query expression",
146
+ examples=[".data[] | select(.active)", ".results[0].name"],
147
+ )
148
+ raw: bool | None = Field(None, description="Output raw strings, not JSON")
149
+ compact: bool | None = Field(None, description="Compact output")
150
+
151
+
152
+ class ShellExecutorConfig(BaseModel):
153
+ """Configuration for shell executor"""
154
+
155
+ shell: str | None = Field(
156
+ None,
157
+ description="Shell to use (e.g., bash, sh, zsh)",
158
+ examples=["bash", "sh", "zsh"],
159
+ )
160
+ env: dict[str, str] | None = Field(
161
+ None,
162
+ description="Environment variables",
163
+ examples=[{"PATH": "/usr/local/bin:$PATH", "DEBUG": "1"}],
164
+ )
165
+
166
+
167
+ class ExecutorConfig(BaseModel):
168
+ """Executor configuration for a step"""
169
+
170
+ type: Literal["docker", "http", "jq", "mail", "shell", "ssh"] = Field(
171
+ description="Executor type",
172
+ examples=["docker", "http", "ssh", "mail", "shell", "jq"],
173
+ )
174
+ config: (
175
+ HTTPExecutorConfig
176
+ | SSHExecutorConfig
177
+ | MailExecutorConfig
178
+ | DockerExecutorConfig
179
+ | JQExecutorConfig
180
+ | ShellExecutorConfig
181
+ | dict[str, Any]
182
+ | None
183
+ ) = Field(None, description="Executor-specific configuration")
@@ -0,0 +1,30 @@
1
+ """Handler configuration models"""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from pydagu.models.executor import ExecutorConfig
6
+
7
+
8
+ class HandlerConfig(BaseModel):
9
+ """Handler configuration for DAG events"""
10
+
11
+ command: str | None = Field(
12
+ None,
13
+ description="Command to execute",
14
+ examples=[
15
+ "./scripts/notify-success.sh",
16
+ "echo 'ETL completed successfully for ${DATE}'",
17
+ ],
18
+ )
19
+ executor: ExecutorConfig | None = Field(
20
+ None, description="Executor for the handler"
21
+ )
22
+
23
+
24
+ class HandlerOn(BaseModel):
25
+ """Handlers for different DAG lifecycle events"""
26
+
27
+ success: HandlerConfig | None = Field(None, description="Handler on success")
28
+ failure: HandlerConfig | None = Field(None, description="Handler on failure")
29
+ cancel: HandlerConfig | None = Field(None, description="Handler on cancel")
30
+ exit: HandlerConfig | None = Field(None, description="Handler on exit")
@@ -0,0 +1,71 @@
1
+ """Infrastructure configuration models"""
2
+
3
+ from typing import Literal
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class ContainerConfig(BaseModel):
8
+ """Container configuration for steps"""
9
+
10
+ image: str = Field(
11
+ description="Container image to use",
12
+ examples=["python:3.11-slim", "postgres:16", "alpine:latest"],
13
+ )
14
+ pullPolicy: Literal["always", "missing", "never"] | None = Field(
15
+ None,
16
+ description="Image pull policy",
17
+ examples=["always", "missing", "never"],
18
+ )
19
+ env: list[str] | None = Field(
20
+ None,
21
+ description="Environment variables",
22
+ examples=[["PYTHONUNBUFFERED=1", "DEBUG=true"]],
23
+ )
24
+ volumes: list[str] | None = Field(
25
+ None,
26
+ description="Volume mounts",
27
+ examples=[["./data:/data", "./scripts:/scripts:ro"]],
28
+ )
29
+
30
+
31
+ class SSHConfig(BaseModel):
32
+ """SSH configuration for remote execution"""
33
+
34
+ user: str = Field(
35
+ description="SSH username", examples=["deploy", "admin", "ubuntu"]
36
+ )
37
+ host: str = Field(
38
+ description="SSH host", examples=["production.example.com", "192.168.1.100"]
39
+ )
40
+ port: int | None = Field(
41
+ 22, description="SSH port (default: 22)", examples=[22, 2222]
42
+ )
43
+ key: str | None = Field(
44
+ None,
45
+ description="Path to SSH private key file",
46
+ examples=["~/.ssh/deploy_key", "/etc/ssh/id_rsa"],
47
+ )
48
+ password: str | None = Field(
49
+ None,
50
+ description="SSH password (prefer keys for security)",
51
+ examples=["${SSH_PASSWORD}"],
52
+ )
53
+ strictHostKey: bool | None = Field(
54
+ True, description="Enable strict host key checking (default: true)"
55
+ )
56
+ knownHostFile: str | None = Field(
57
+ "~/.ssh/known_hosts",
58
+ description="Path to known_hosts file",
59
+ examples=["~/.ssh/known_hosts", "/etc/ssh/known_hosts"],
60
+ )
61
+
62
+
63
+ class LogConfig(BaseModel):
64
+ """Logging configuration"""
65
+
66
+ dir: str | None = Field(
67
+ None, description="Log directory", examples=["/var/log/dagu", "./logs"]
68
+ )
69
+ prefix: str | None = Field(
70
+ None, description="Log file prefix", examples=["dag-", "etl-"]
71
+ )
@@ -0,0 +1,26 @@
1
+ """Notification configuration models"""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class MailOn(BaseModel):
7
+ """Email notification configuration"""
8
+
9
+ failure: bool | None = Field(None, description="Send email on failure")
10
+ success: bool | None = Field(None, description="Send email on success")
11
+
12
+
13
+ class SMTPConfig(BaseModel):
14
+ """SMTP configuration for email notifications"""
15
+
16
+ host: str = Field(
17
+ description="SMTP server host",
18
+ examples=["smtp.gmail.com", "smtp.company.com", "localhost"],
19
+ )
20
+ port: str = Field(description="SMTP server port", examples=["587", "465", "25"])
21
+ username: str | None = Field(
22
+ None, description="SMTP username", examples=["user@example.com"]
23
+ )
24
+ password: str | None = Field(
25
+ None, description="SMTP password", examples=["${SMTP_PASSWORD}"]
26
+ )
@@ -0,0 +1,14 @@
1
+ """
2
+ Pydantic models for requests to the Dagu HTTP API.
3
+ """
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class StartDagRun(BaseModel):
9
+ """Model for starting a DAG run via the Dagu HTTP API."""
10
+
11
+ params: str | None = None
12
+ dagRunId: str | None = None
13
+ dagName: str | None = None
14
+ singleton: bool | None = None
@@ -0,0 +1,82 @@
1
+ """
2
+ Pydantic models for Dagu HTTP API responses.
3
+ """
4
+
5
+ from datetime import datetime
6
+
7
+ from pydantic import BaseModel
8
+
9
+ from .types import EmptyStrToNone
10
+
11
+
12
+ class DagRunId(BaseModel):
13
+ """Model for DAG run ID response from the Dagu HTTP API."""
14
+
15
+ dagRunId: str
16
+
17
+
18
+ class DagResponseMessage(BaseModel):
19
+ """Model for DAG start response from the Dagu HTTP API."""
20
+
21
+ code: str
22
+ message: str
23
+
24
+
25
+ class DagSubRun(BaseModel):
26
+ """
27
+ Model for DAG run sub-run response from the Dagu HTTP API.
28
+
29
+ """
30
+
31
+ dagRunId: str
32
+ name: str
33
+ status: int
34
+ statusLabel: str
35
+
36
+
37
+ class DagNodeStep(BaseModel):
38
+ """
39
+ Model for DAG run node step response from the Dagu HTTP API.
40
+
41
+ """
42
+
43
+ name: str
44
+ command: str | None = None
45
+ run: str | None = None
46
+ params: str | None = None
47
+
48
+
49
+ class DagRunNode(BaseModel):
50
+ """
51
+ Model for DAG run node response from the Dagu HTTP API.
52
+
53
+ """
54
+
55
+ step: DagNodeStep
56
+ status: int
57
+ statusLabel: str
58
+ startedAt: datetime | EmptyStrToNone = None
59
+ finishedAt: datetime | EmptyStrToNone = None
60
+ retryCount: int | None = None
61
+ stdout: str | None = None
62
+ stderr: str | None = None
63
+ subRuns: list[DagSubRun] | None = None
64
+
65
+
66
+ class DagRunResult(BaseModel):
67
+ """
68
+ Model for DAG run result response from the Dagu HTTP API.
69
+
70
+ """
71
+
72
+ dagRunId: str
73
+ name: str
74
+ status: int
75
+ statusLabel: str
76
+ startedAt: datetime | EmptyStrToNone = None
77
+ finishedAt: datetime | EmptyStrToNone = None
78
+ params: str | None = None
79
+ nodes: list[DagRunNode]
80
+
81
+
82
+ __all__ = ["DagRunId", "DagResponseMessage", "DagRunResult"]