highway-dsl 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of highway-dsl might be problematic. Click here for more details.

@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Farseed Ashouri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: highway_dsl
3
+ Version: 0.0.1
4
+ Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
5
+ Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
8
+ Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pydantic>=2.12.3
16
+ Requires-Dist: pyyaml>=6.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
19
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
20
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
21
+ Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # Highway DSL
25
+
26
+ Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
27
+
28
+ ## Features
29
+
30
+ * **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
31
+ * **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
32
+ * **Rich Task Types:** Supports various operators including:
33
+ * `TaskOperator`: Executes a Python function.
34
+ * `ConditionOperator`: Enables conditional branching based on expressions.
35
+ * `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
36
+ * `ParallelOperator`: Executes multiple branches of tasks concurrently.
37
+ * `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
38
+ * **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
39
+ * **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
40
+ * **Workflow Builder:** A fluent API for constructing workflows programmatically.
41
+
42
+ ## Installation
43
+
44
+ To install Highway DSL, you can use pip:
45
+
46
+ ```bash
47
+ pip install highway-dsl
48
+ ```
49
+
50
+ If you want to install it for development, including testing dependencies:
51
+
52
+ ```bash
53
+ pip install "highway-dsl[dev]"
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ ### Defining a Simple Workflow
59
+
60
+ ```python
61
+ from datetime import timedelta
62
+ from workflow_dsl import WorkflowBuilder
63
+
64
+ def demonstrate_basic_workflow():
65
+ """Show a simple complete workflow using just the builder"""
66
+
67
+ workflow = (
68
+ WorkflowBuilder("simple_etl")
69
+ .task("extract", "etl.extract_data", result_key="raw_data")
70
+ .task(
71
+ "transform",
72
+ "etl.transform_data",
73
+ args=["{{raw_data}}"],
74
+ result_key="transformed_data",
75
+ )
76
+ .retry(max_retries=3, delay=timedelta(seconds=10))
77
+ .task("load", "etl.load_data", args=["{{transformed_data}}"])
78
+ .timeout(timeout=timedelta(minutes=30))
79
+ .wait("wait_next", timedelta(hours=24))
80
+ .task("cleanup", "etl.cleanup")
81
+ .build()
82
+ )
83
+
84
+ workflow.set_variables(
85
+ {"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
86
+ )
87
+
88
+ return workflow
89
+
90
+ if __name__ == "__main__":
91
+ basic_workflow = demonstrate_basic_workflow()
92
+ print(basic_workflow.to_yaml())
93
+ ```
94
+
95
+ ### Defining a Complex Workflow
96
+
97
+ Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
98
+
99
+ ### YAML Configuration
100
+
101
+ You can also define workflows directly in YAML:
102
+
103
+ ```yaml
104
+ name: simple_etl
105
+ version: 1.0.0
106
+ description: Simple ETL workflow with retry and timeout
107
+ variables:
108
+ database_url: postgresql://localhost/mydb
109
+ chunk_size: 1000
110
+ start_task: extract
111
+ tasks:
112
+ extract:
113
+ task_id: extract
114
+ operator_type: task
115
+ function: etl.extract_data
116
+ result_key: raw_data
117
+ dependencies: []
118
+ metadata: {}
119
+
120
+ transform:
121
+ task_id: transform
122
+ operator_type: task
123
+ function: etl.transform_data
124
+ args: ["{{raw_data}}"]
125
+ result_key: transformed_data
126
+ dependencies: ["extract"]
127
+ retry_policy:
128
+ max_retries: 3
129
+ delay: PT10S
130
+ backoff_factor: 2.0
131
+ metadata: {}
132
+
133
+ load:
134
+ task_id: load
135
+ operator_type: task
136
+ function: etl.load_data
137
+ args: ["{{transformed_data}}"]
138
+ dependencies: ["transform"]
139
+ timeout_policy:
140
+ timeout: PT30M
141
+ kill_on_timeout: true
142
+ metadata: {}
143
+
144
+ wait_next:
145
+ task_id: wait_next
146
+ operator_type: wait
147
+ wait_for: "P1D"
148
+ dependencies: ["load"]
149
+ metadata: {}
150
+
151
+ cleanup:
152
+ task_id: cleanup
153
+ operator_type: task
154
+ function: etl.cleanup
155
+ dependencies: ["wait_next"]
156
+ metadata: {}
157
+ ```
158
+
159
+ To load this YAML:
160
+
161
+ ```python
162
+ from workflow_dsl import Workflow
163
+
164
+ yaml_content = """
165
+ # ... (yaml content from above)
166
+ """
167
+
168
+ workflow = Workflow.from_yaml(yaml_content)
169
+ print(workflow.name)
170
+ ```
171
+
172
+ ## Development
173
+
174
+ ### Running Tests
175
+
176
+ To run the unit tests, navigate to the project root and execute:
177
+
178
+ ```bash
179
+ pytest
180
+ ```
181
+
182
+ ### Type Checking
183
+
184
+ To perform static type checking with MyPy:
185
+
186
+ ```bash
187
+ mypy .
188
+ ```
189
+
190
+ ## Project Structure
191
+
192
+ ```
193
+ .highway/
194
+ ├── workflow_dsl.py # Core DSL definitions (Pydantic models)
195
+ ├── example_usage.py # Examples of how to use the DSL
196
+ ├── tests/
197
+ │ ├── __init__.py
198
+ │ ├── conftest.py # Pytest configuration
199
+ │ └── test_workflow_dsl.py # Unit and integration tests
200
+ ├── pyproject.toml # Project metadata and dependencies
201
+ ├── README.md # This file
202
+ └── SUMMARY.md # Summary of changes and future instructions
203
+ ```
@@ -0,0 +1,180 @@
1
+ # Highway DSL
2
+
3
+ Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
4
+
5
+ ## Features
6
+
7
+ * **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
8
+ * **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
9
+ * **Rich Task Types:** Supports various operators including:
10
+ * `TaskOperator`: Executes a Python function.
11
+ * `ConditionOperator`: Enables conditional branching based on expressions.
12
+ * `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
13
+ * `ParallelOperator`: Executes multiple branches of tasks concurrently.
14
+ * `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
15
+ * **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
16
+ * **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
17
+ * **Workflow Builder:** A fluent API for constructing workflows programmatically.
18
+
19
+ ## Installation
20
+
21
+ To install Highway DSL, you can use pip:
22
+
23
+ ```bash
24
+ pip install highway-dsl
25
+ ```
26
+
27
+ If you want to install it for development, including testing dependencies:
28
+
29
+ ```bash
30
+ pip install "highway-dsl[dev]"
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ ### Defining a Simple Workflow
36
+
37
+ ```python
38
+ from datetime import timedelta
39
+ from workflow_dsl import WorkflowBuilder
40
+
41
+ def demonstrate_basic_workflow():
42
+ """Show a simple complete workflow using just the builder"""
43
+
44
+ workflow = (
45
+ WorkflowBuilder("simple_etl")
46
+ .task("extract", "etl.extract_data", result_key="raw_data")
47
+ .task(
48
+ "transform",
49
+ "etl.transform_data",
50
+ args=["{{raw_data}}"],
51
+ result_key="transformed_data",
52
+ )
53
+ .retry(max_retries=3, delay=timedelta(seconds=10))
54
+ .task("load", "etl.load_data", args=["{{transformed_data}}"])
55
+ .timeout(timeout=timedelta(minutes=30))
56
+ .wait("wait_next", timedelta(hours=24))
57
+ .task("cleanup", "etl.cleanup")
58
+ .build()
59
+ )
60
+
61
+ workflow.set_variables(
62
+ {"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
63
+ )
64
+
65
+ return workflow
66
+
67
+ if __name__ == "__main__":
68
+ basic_workflow = demonstrate_basic_workflow()
69
+ print(basic_workflow.to_yaml())
70
+ ```
71
+
72
+ ### Defining a Complex Workflow
73
+
74
+ Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
75
+
76
+ ### YAML Configuration
77
+
78
+ You can also define workflows directly in YAML:
79
+
80
+ ```yaml
81
+ name: simple_etl
82
+ version: 1.0.0
83
+ description: Simple ETL workflow with retry and timeout
84
+ variables:
85
+ database_url: postgresql://localhost/mydb
86
+ chunk_size: 1000
87
+ start_task: extract
88
+ tasks:
89
+ extract:
90
+ task_id: extract
91
+ operator_type: task
92
+ function: etl.extract_data
93
+ result_key: raw_data
94
+ dependencies: []
95
+ metadata: {}
96
+
97
+ transform:
98
+ task_id: transform
99
+ operator_type: task
100
+ function: etl.transform_data
101
+ args: ["{{raw_data}}"]
102
+ result_key: transformed_data
103
+ dependencies: ["extract"]
104
+ retry_policy:
105
+ max_retries: 3
106
+ delay: PT10S
107
+ backoff_factor: 2.0
108
+ metadata: {}
109
+
110
+ load:
111
+ task_id: load
112
+ operator_type: task
113
+ function: etl.load_data
114
+ args: ["{{transformed_data}}"]
115
+ dependencies: ["transform"]
116
+ timeout_policy:
117
+ timeout: PT30M
118
+ kill_on_timeout: true
119
+ metadata: {}
120
+
121
+ wait_next:
122
+ task_id: wait_next
123
+ operator_type: wait
124
+ wait_for: "P1D"
125
+ dependencies: ["load"]
126
+ metadata: {}
127
+
128
+ cleanup:
129
+ task_id: cleanup
130
+ operator_type: task
131
+ function: etl.cleanup
132
+ dependencies: ["wait_next"]
133
+ metadata: {}
134
+ ```
135
+
136
+ To load this YAML:
137
+
138
+ ```python
139
+ from workflow_dsl import Workflow
140
+
141
+ yaml_content = """
142
+ # ... (yaml content from above)
143
+ """
144
+
145
+ workflow = Workflow.from_yaml(yaml_content)
146
+ print(workflow.name)
147
+ ```
148
+
149
+ ## Development
150
+
151
+ ### Running Tests
152
+
153
+ To run the unit tests, navigate to the project root and execute:
154
+
155
+ ```bash
156
+ pytest
157
+ ```
158
+
159
+ ### Type Checking
160
+
161
+ To perform static type checking with MyPy:
162
+
163
+ ```bash
164
+ mypy .
165
+ ```
166
+
167
+ ## Project Structure
168
+
169
+ ```
170
+ .highway/
171
+ ├── workflow_dsl.py # Core DSL definitions (Pydantic models)
172
+ ├── example_usage.py # Examples of how to use the DSL
173
+ ├── tests/
174
+ │ ├── __init__.py
175
+ │ ├── conftest.py # Pytest configuration
176
+ │ └── test_workflow_dsl.py # Unit and integration tests
177
+ ├── pyproject.toml # Project metadata and dependencies
178
+ ├── README.md # This file
179
+ └── SUMMARY.md # Summary of changes and future instructions
180
+ ```
File without changes
@@ -0,0 +1,252 @@
1
+ # workflow_dsl.py
2
+ from typing import Any, Dict, List, Optional, Union, Callable, Type
3
+ from enum import Enum
4
+ from datetime import datetime, timedelta
5
+ import yaml
6
+ import json
7
+ from abc import ABC, abstractmethod
8
+ from pydantic import BaseModel, Field, model_validator, ConfigDict
9
+
10
+
11
+ class OperatorType(Enum):
12
+ TASK = "task"
13
+ CONDITION = "condition"
14
+ WAIT = "wait"
15
+ PARALLEL = "parallel"
16
+ FOREACH = "foreach"
17
+ SWITCH = "switch"
18
+ TRY_CATCH = "try_catch"
19
+
20
+
21
+ class RetryPolicy(BaseModel):
22
+ max_retries: int = Field(3, description="Maximum number of retries")
23
+ delay: timedelta = Field(timedelta(seconds=5), description="Delay between retries")
24
+ backoff_factor: float = Field(2.0, description="Factor by which to increase delay")
25
+
26
+
27
+ class TimeoutPolicy(BaseModel):
28
+ timeout: timedelta = Field(..., description="Timeout duration")
29
+ kill_on_timeout: bool = Field(True, description="Whether to kill the task on timeout")
30
+
31
+
32
+ class BaseOperator(BaseModel, ABC):
33
+ task_id: str
34
+ operator_type: OperatorType
35
+ dependencies: List[str] = Field(default_factory=list)
36
+ retry_policy: Optional[RetryPolicy] = None
37
+ timeout_policy: Optional[TimeoutPolicy] = None
38
+ metadata: Dict[str, Any] = Field(default_factory=dict)
39
+
40
+ model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
41
+
42
+
43
+ class TaskOperator(BaseOperator):
44
+ function: str
45
+ args: List[Any] = Field(default_factory=list)
46
+ kwargs: Dict[str, Any] = Field(default_factory=dict)
47
+ result_key: Optional[str] = None
48
+ operator_type: OperatorType = Field(OperatorType.TASK, frozen=True)
49
+
50
+
51
+ class ConditionOperator(BaseOperator):
52
+ condition: str
53
+ if_true: str
54
+ if_false: str
55
+ operator_type: OperatorType = Field(OperatorType.CONDITION, frozen=True)
56
+
57
+
58
+ class WaitOperator(BaseOperator):
59
+ wait_for: Union[timedelta, datetime, str]
60
+ operator_type: OperatorType = Field(OperatorType.WAIT, frozen=True)
61
+
62
+ @model_validator(mode='before')
63
+ @classmethod
64
+ def parse_wait_for(cls, data: Any) -> Any:
65
+ if isinstance(data, dict) and "wait_for" in data:
66
+ wait_for = data["wait_for"]
67
+ if isinstance(wait_for, str):
68
+ if wait_for.startswith("duration:"):
69
+ data["wait_for"] = timedelta(seconds=float(wait_for.split(":")[1]))
70
+ elif wait_for.startswith("datetime:"):
71
+ data["wait_for"] = datetime.fromisoformat(wait_for.split(":", 1)[1])
72
+ return data
73
+
74
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
75
+ data = super().model_dump(**kwargs)
76
+ wait_for = data["wait_for"]
77
+ if isinstance(wait_for, timedelta):
78
+ data["wait_for"] = f"duration:{wait_for.total_seconds()}"
79
+ elif isinstance(wait_for, datetime):
80
+ data["wait_for"] = f"datetime:{wait_for.isoformat()}"
81
+ return data
82
+
83
+
84
+ class ParallelOperator(BaseOperator):
85
+ branches: Dict[str, List[str]] = Field(default_factory=dict)
86
+ operator_type: OperatorType = Field(OperatorType.PARALLEL, frozen=True)
87
+
88
+
89
+ class ForEachOperator(BaseOperator):
90
+ items: str
91
+ task_chain: List[str] = Field(default_factory=list)
92
+ operator_type: OperatorType = Field(OperatorType.FOREACH, frozen=True)
93
+
94
+
95
+ class Workflow(BaseModel):
96
+ name: str
97
+ version: str = "1.0.0"
98
+ description: str = ""
99
+ tasks: Dict[str, Union[
100
+ TaskOperator,
101
+ ConditionOperator,
102
+ WaitOperator,
103
+ ParallelOperator,
104
+ ForEachOperator,
105
+ ]] = Field(default_factory=dict)
106
+ variables: Dict[str, Any] = Field(default_factory=dict)
107
+ start_task: Optional[str] = None
108
+
109
+ @model_validator(mode='before')
110
+ @classmethod
111
+ def validate_tasks(cls, data: Any) -> Any:
112
+ if isinstance(data, dict) and "tasks" in data:
113
+ validated_tasks = {}
114
+ operator_classes: Dict[str, Type[BaseOperator]] = {
115
+ OperatorType.TASK.value: TaskOperator,
116
+ OperatorType.CONDITION.value: ConditionOperator,
117
+ OperatorType.WAIT.value: WaitOperator,
118
+ OperatorType.PARALLEL.value: ParallelOperator,
119
+ OperatorType.FOREACH.value: ForEachOperator,
120
+ }
121
+ for task_id, task_data in data["tasks"].items():
122
+ operator_type = task_data.get("operator_type")
123
+ if operator_type and operator_type in operator_classes:
124
+ operator_class = operator_classes[operator_type]
125
+ validated_tasks[task_id] = operator_class.model_validate(task_data)
126
+ else:
127
+ raise ValueError(f"Unknown operator type: {operator_type}")
128
+ data["tasks"] = validated_tasks
129
+ return data
130
+
131
+ def add_task(self, task: Union[
132
+ TaskOperator,
133
+ ConditionOperator,
134
+ WaitOperator,
135
+ ParallelOperator,
136
+ ForEachOperator,
137
+ ]) -> "Workflow":
138
+ self.tasks[task.task_id] = task
139
+ return self
140
+
141
+ def set_variables(self, variables: Dict[str, Any]) -> "Workflow":
142
+ self.variables.update(variables)
143
+ return self
144
+
145
+ def set_start_task(self, task_id: str) -> "Workflow":
146
+ self.start_task = task_id
147
+ return self
148
+
149
+ def to_yaml(self) -> str:
150
+ data = self.model_dump(mode='json', by_alias=True, exclude_none=True)
151
+ return yaml.dump(data, default_flow_style=False)
152
+
153
+ def to_json(self) -> str:
154
+ return self.model_dump_json(indent=2)
155
+
156
+ @classmethod
157
+ def from_yaml(cls, yaml_str: str) -> "Workflow":
158
+ data = yaml.safe_load(yaml_str)
159
+ return cls.model_validate(data)
160
+
161
+ @classmethod
162
+ def from_json(cls, json_str: str) -> "Workflow":
163
+ return cls.model_validate_json(json_str)
164
+
165
+
166
+ class WorkflowBuilder:
167
+ def __init__(self, name: str, existing_workflow: Optional[Workflow] = None):
168
+ if existing_workflow:
169
+ self.workflow = existing_workflow
170
+ else:
171
+ self.workflow = Workflow(name=name)
172
+ self._current_task: Optional[str] = None
173
+
174
+ def task(self, task_id: str, function: str, **kwargs) -> "WorkflowBuilder":
175
+ task = TaskOperator(task_id=task_id, function=function, **kwargs)
176
+ if self._current_task:
177
+ task.dependencies.append(self._current_task)
178
+ self.workflow.add_task(task)
179
+ self._current_task = task_id
180
+ return self
181
+
182
+ def condition(
183
+ self, task_id: str, condition: str, if_true: str, if_false: str, **kwargs
184
+ ) -> "WorkflowBuilder":
185
+ task = ConditionOperator(
186
+ task_id=task_id, condition=condition, if_true=if_true, if_false=if_false, **kwargs
187
+ )
188
+ if self._current_task:
189
+ task.dependencies.append(self._current_task)
190
+ self.workflow.add_task(task)
191
+ self._current_task = task_id
192
+ return self
193
+
194
+ def wait(
195
+ self, task_id: str, wait_for: Union[timedelta, datetime, str], **kwargs
196
+ ) -> "WorkflowBuilder":
197
+ task = WaitOperator(task_id=task_id, wait_for=wait_for, **kwargs)
198
+ if self._current_task:
199
+ task.dependencies.append(self._current_task)
200
+ self.workflow.add_task(task)
201
+ self._current_task = task_id
202
+ return self
203
+
204
+ def parallel(
205
+ self, task_id: str, branches: Dict[str, List[str]], **kwargs
206
+ ) -> "WorkflowBuilder":
207
+ task = ParallelOperator(task_id=task_id, branches=branches, **kwargs)
208
+ if self._current_task:
209
+ task.dependencies.append(self._current_task)
210
+ self.workflow.add_task(task)
211
+ self._current_task = task_id
212
+ return self
213
+
214
+ def foreach(
215
+ self, task_id: str, items: str, task_chain: List[str], **kwargs
216
+ ) -> "WorkflowBuilder":
217
+ task = ForEachOperator(task_id=task_id, items=items, task_chain=task_chain, **kwargs)
218
+ if self._current_task:
219
+ task.dependencies.append(self._current_task)
220
+ self.workflow.add_task(task)
221
+ self._current_task = task_id
222
+ return self
223
+
224
+ def retry(
225
+ self,
226
+ max_retries: int = 3,
227
+ delay: timedelta = timedelta(seconds=5),
228
+ backoff_factor: float = 2.0,
229
+ ) -> "WorkflowBuilder":
230
+ if self._current_task and isinstance(
231
+ self.workflow.tasks[self._current_task], TaskOperator
232
+ ):
233
+ self.workflow.tasks[self._current_task].retry_policy = RetryPolicy(
234
+ max_retries=max_retries, delay=delay, backoff_factor=backoff_factor
235
+ )
236
+ return self
237
+
238
+ def timeout(
239
+ self, timeout: timedelta, kill_on_timeout: bool = True
240
+ ) -> "WorkflowBuilder":
241
+ if self._current_task and isinstance(
242
+ self.workflow.tasks[self._current_task], TaskOperator
243
+ ):
244
+ self.workflow.tasks[self._current_task].timeout_policy = TimeoutPolicy(
245
+ timeout=timeout, kill_on_timeout=kill_on_timeout
246
+ )
247
+ return self
248
+
249
+ def build(self) -> Workflow:
250
+ if not self.workflow.start_task and self.workflow.tasks:
251
+ self.workflow.start_task = next(iter(self.workflow.tasks.keys()))
252
+ return self.workflow
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: highway_dsl
3
+ Version: 0.0.1
4
+ Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
5
+ Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
8
+ Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pydantic>=2.12.3
16
+ Requires-Dist: pyyaml>=6.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
19
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
20
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
21
+ Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # Highway DSL
25
+
26
+ Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
27
+
28
+ ## Features
29
+
30
+ * **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
31
+ * **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
32
+ * **Rich Task Types:** Supports various operators including:
33
+ * `TaskOperator`: Executes a Python function.
34
+ * `ConditionOperator`: Enables conditional branching based on expressions.
35
+ * `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
36
+ * `ParallelOperator`: Executes multiple branches of tasks concurrently.
37
+ * `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
38
+ * **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
39
+ * **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
40
+ * **Workflow Builder:** A fluent API for constructing workflows programmatically.
41
+
42
+ ## Installation
43
+
44
+ To install Highway DSL, you can use pip:
45
+
46
+ ```bash
47
+ pip install highway-dsl
48
+ ```
49
+
50
+ If you want to install it for development, including testing dependencies:
51
+
52
+ ```bash
53
+ pip install "highway-dsl[dev]"
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ ### Defining a Simple Workflow
59
+
60
+ ```python
61
+ from datetime import timedelta
62
+ from workflow_dsl import WorkflowBuilder
63
+
64
+ def demonstrate_basic_workflow():
65
+ """Show a simple complete workflow using just the builder"""
66
+
67
+ workflow = (
68
+ WorkflowBuilder("simple_etl")
69
+ .task("extract", "etl.extract_data", result_key="raw_data")
70
+ .task(
71
+ "transform",
72
+ "etl.transform_data",
73
+ args=["{{raw_data}}"],
74
+ result_key="transformed_data",
75
+ )
76
+ .retry(max_retries=3, delay=timedelta(seconds=10))
77
+ .task("load", "etl.load_data", args=["{{transformed_data}}"])
78
+ .timeout(timeout=timedelta(minutes=30))
79
+ .wait("wait_next", timedelta(hours=24))
80
+ .task("cleanup", "etl.cleanup")
81
+ .build()
82
+ )
83
+
84
+ workflow.set_variables(
85
+ {"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
86
+ )
87
+
88
+ return workflow
89
+
90
+ if __name__ == "__main__":
91
+ basic_workflow = demonstrate_basic_workflow()
92
+ print(basic_workflow.to_yaml())
93
+ ```
94
+
95
+ ### Defining a Complex Workflow
96
+
97
+ Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
98
+
99
+ ### YAML Configuration
100
+
101
+ You can also define workflows directly in YAML:
102
+
103
+ ```yaml
104
+ name: simple_etl
105
+ version: 1.0.0
106
+ description: Simple ETL workflow with retry and timeout
107
+ variables:
108
+ database_url: postgresql://localhost/mydb
109
+ chunk_size: 1000
110
+ start_task: extract
111
+ tasks:
112
+ extract:
113
+ task_id: extract
114
+ operator_type: task
115
+ function: etl.extract_data
116
+ result_key: raw_data
117
+ dependencies: []
118
+ metadata: {}
119
+
120
+ transform:
121
+ task_id: transform
122
+ operator_type: task
123
+ function: etl.transform_data
124
+ args: ["{{raw_data}}"]
125
+ result_key: transformed_data
126
+ dependencies: ["extract"]
127
+ retry_policy:
128
+ max_retries: 3
129
+ delay: PT10S
130
+ backoff_factor: 2.0
131
+ metadata: {}
132
+
133
+ load:
134
+ task_id: load
135
+ operator_type: task
136
+ function: etl.load_data
137
+ args: ["{{transformed_data}}"]
138
+ dependencies: ["transform"]
139
+ timeout_policy:
140
+ timeout: PT30M
141
+ kill_on_timeout: true
142
+ metadata: {}
143
+
144
+ wait_next:
145
+ task_id: wait_next
146
+ operator_type: wait
147
+ wait_for: "P1D"
148
+ dependencies: ["load"]
149
+ metadata: {}
150
+
151
+ cleanup:
152
+ task_id: cleanup
153
+ operator_type: task
154
+ function: etl.cleanup
155
+ dependencies: ["wait_next"]
156
+ metadata: {}
157
+ ```
158
+
159
+ To load this YAML:
160
+
161
+ ```python
162
+ from workflow_dsl import Workflow
163
+
164
+ yaml_content = """
165
+ # ... (yaml content from above)
166
+ """
167
+
168
+ workflow = Workflow.from_yaml(yaml_content)
169
+ print(workflow.name)
170
+ ```
171
+
172
+ ## Development
173
+
174
+ ### Running Tests
175
+
176
+ To run the unit tests, navigate to the project root and execute:
177
+
178
+ ```bash
179
+ pytest
180
+ ```
181
+
182
+ ### Type Checking
183
+
184
+ To perform static type checking with MyPy:
185
+
186
+ ```bash
187
+ mypy .
188
+ ```
189
+
190
+ ## Project Structure
191
+
192
+ ```
193
+ .highway/
194
+ ├── workflow_dsl.py # Core DSL definitions (Pydantic models)
195
+ ├── example_usage.py # Examples of how to use the DSL
196
+ ├── tests/
197
+ │ ├── __init__.py
198
+ │ ├── conftest.py # Pytest configuration
199
+ │ └── test_workflow_dsl.py # Unit and integration tests
200
+ ├── pyproject.toml # Project metadata and dependencies
201
+ ├── README.md # This file
202
+ └── SUMMARY.md # Summary of changes and future instructions
203
+ ```
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ highway_dsl/__init__.py
5
+ highway_dsl/workflow_dsl.py
6
+ highway_dsl.egg-info/PKG-INFO
7
+ highway_dsl.egg-info/SOURCES.txt
8
+ highway_dsl.egg-info/dependency_links.txt
9
+ highway_dsl.egg-info/requires.txt
10
+ highway_dsl.egg-info/top_level.txt
11
+ tests/test_workflow_dsl.py
@@ -0,0 +1,8 @@
1
+ pydantic>=2.12.3
2
+ pyyaml>=6.0
3
+
4
+ [dev]
5
+ pytest>=7.0.0
6
+ mypy>=1.0.0
7
+ types-PyYAML>=6.0.0
8
+ pytest-cov>=2.12.1
@@ -0,0 +1 @@
1
+ highway_dsl
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "highway_dsl"
7
+ version = "0.0.1"
8
+ authors = [
9
+ { name = "Farseed Ashouri", email = "farseed.ashouri@gmail.com" },
10
+ ]
11
+ description = "A domain specific language (DSL) for defining and managing data processing pipelines."
12
+ readme = "README.md"
13
+ requires-python = ">=3.9"
14
+ license = { text = "MIT" }
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+
21
+ # add dependencies here
22
+ dependencies = [
23
+ "pydantic>=2.12.3",
24
+ "pyyaml>=6.0"
25
+ ]
26
+ # dev dependencies
27
+ [project.optional-dependencies]
28
+ dev = [
29
+ "pytest>=7.0.0",
30
+ "mypy>=1.0.0",
31
+ "types-PyYAML>=6.0.0",
32
+ "pytest-cov>=2.12.1",
33
+ ]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/rodmena-limited/highway_dsl"
37
+ Issues = "https://github.com/rodmena-limited/highway_dsl/issues"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,352 @@
1
+ import pytest
2
+ import json
3
+ from datetime import timedelta, datetime
4
+ from highway_dsl.workflow_dsl import (
5
+ Workflow,
6
+ WorkflowBuilder,
7
+ TaskOperator,
8
+ ConditionOperator,
9
+ ParallelOperator,
10
+ WaitOperator,
11
+ ForEachOperator,
12
+ RetryPolicy,
13
+ TimeoutPolicy,
14
+ OperatorType,
15
+ )
16
+
17
+ def sort_dict_recursively(d):
18
+ if not isinstance(d, dict):
19
+ return d
20
+ return {k: sort_dict_recursively(v) for k, v in sorted(d.items())}
21
+
22
+ def test_workflow_creation():
23
+ workflow = Workflow(name="test_workflow", version="1.0.0", description="A test workflow")
24
+ assert workflow.name == "test_workflow"
25
+ assert workflow.version == "1.0.0"
26
+ assert workflow.description == "A test workflow"
27
+ assert workflow.tasks == {}
28
+ assert workflow.variables == {}
29
+ assert workflow.start_task is None
30
+
31
+ def test_add_task_to_workflow():
32
+ workflow = Workflow(name="test_workflow")
33
+ task = TaskOperator(task_id="task1", function="func1")
34
+ workflow.add_task(task)
35
+ assert "task1" in workflow.tasks
36
+ assert workflow.tasks["task1"] == task
37
+
38
+ def test_set_variables():
39
+ workflow = Workflow(name="test_workflow")
40
+ workflow.set_variables({"key1": "value1"})
41
+ assert workflow.variables == {"key1": "value1"}
42
+ workflow.set_variables({"key2": "value2"})
43
+ assert workflow.variables == {"key1": "value1", "key2": "value2"}
44
+
45
+ def test_set_start_task():
46
+ workflow = Workflow(name="test_workflow")
47
+ workflow.set_start_task("task1")
48
+ assert workflow.start_task == "task1"
49
+
50
+ def test_retry_policy_model():
51
+ policy = RetryPolicy(max_retries=5, delay=timedelta(seconds=10), backoff_factor=2.5)
52
+ assert policy.max_retries == 5
53
+ assert policy.delay == timedelta(seconds=10)
54
+ assert policy.backoff_factor == 2.5
55
+
56
+ def test_timeout_policy_model():
57
+ policy = TimeoutPolicy(timeout=timedelta(minutes=5), kill_on_timeout=False)
58
+ assert policy.timeout == timedelta(minutes=5)
59
+ assert policy.kill_on_timeout is False
60
+
61
+ def test_task_operator_model():
62
+ task = TaskOperator(
63
+ task_id="task1",
64
+ function="func1",
65
+ args=["arg1"],
66
+ kwargs={"kwarg1": "value1"},
67
+ result_key="res1",
68
+ dependencies=["dep1"],
69
+ retry_policy=RetryPolicy(max_retries=1),
70
+ timeout_policy=TimeoutPolicy(timeout=timedelta(seconds=30)),
71
+ metadata={"meta1": "data1"},
72
+ )
73
+ assert task.task_id == "task1"
74
+ assert task.operator_type == OperatorType.TASK
75
+ assert task.function == "func1"
76
+ assert task.args == ["arg1"]
77
+ assert task.kwargs == {"kwarg1": "value1"}
78
+ assert task.result_key == "res1"
79
+ assert task.dependencies == ["dep1"]
80
+ assert task.retry_policy.max_retries == 1
81
+ assert task.timeout_policy.timeout == timedelta(seconds=30)
82
+ assert task.metadata == {"meta1": "data1"}
83
+
84
+ def test_condition_operator_model():
85
+ condition = ConditionOperator(
86
+ task_id="cond1",
87
+ condition="x > 5",
88
+ if_true="task_true",
89
+ if_false="task_false",
90
+ dependencies=["prev_task"],
91
+ )
92
+ assert condition.task_id == "cond1"
93
+ assert condition.operator_type == OperatorType.CONDITION
94
+ assert condition.condition == "x > 5"
95
+ assert condition.if_true == "task_true"
96
+ assert condition.if_false == "task_false"
97
+ assert condition.dependencies == ["prev_task"]
98
+
99
+ def test_wait_operator_model():
100
+ wait_duration = WaitOperator(task_id="wait1", wait_for=timedelta(hours=1))
101
+ assert wait_duration.wait_for == timedelta(hours=1)
102
+ assert wait_duration.operator_type == OperatorType.WAIT
103
+
104
+ now = datetime.now().replace(microsecond=0)
105
+ wait_datetime = WaitOperator(task_id="wait2", wait_for=now)
106
+ assert wait_datetime.wait_for == now
107
+
108
+ wait_string = WaitOperator(task_id="wait3", wait_for="event_name")
109
+ assert wait_string.wait_for == "event_name"
110
+
111
+ def test_parallel_operator_model():
112
+ parallel = ParallelOperator(
113
+ task_id="parallel1",
114
+ branches={"branch_a": ["task_a1", "task_a2"], "branch_b": ["task_b1"]},
115
+ )
116
+ assert parallel.task_id == "parallel1"
117
+ assert parallel.operator_type == OperatorType.PARALLEL
118
+ assert parallel.branches == {"branch_a": ["task_a1", "task_a2"], "branch_b": ["task_b1"]}
119
+
120
+ def test_foreach_operator_model():
121
+ foreach = ForEachOperator(
122
+ task_id="foreach1", items="data_list", task_chain=["process_item"]
123
+ )
124
+ assert foreach.task_id == "foreach1"
125
+ assert foreach.operator_type == OperatorType.FOREACH
126
+ assert foreach.items == "data_list"
127
+ assert foreach.task_chain == ["process_item"]
128
+
129
+ def test_wait_operator_serialization():
130
+ # Test with timedelta
131
+ wait_duration = WaitOperator(task_id="wait1", wait_for=timedelta(hours=1))
132
+ dump = wait_duration.model_dump()
133
+ assert dump["wait_for"] == "duration:3600.0"
134
+
135
+ # Test with datetime
136
+ now = datetime.now().replace(microsecond=0)
137
+ wait_datetime = WaitOperator(task_id="wait2", wait_for=now)
138
+ dump = wait_datetime.model_dump()
139
+ assert dump["wait_for"] == f"datetime:{now.isoformat()}"
140
+
141
+ # Test with string (no conversion)
142
+ wait_string = WaitOperator(task_id="wait3", wait_for="event_name")
143
+ dump = wait_string.model_dump()
144
+ assert dump["wait_for"] == "event_name"
145
+
146
+ # Test parsing of different data types
147
+ assert WaitOperator.model_validate({"task_id": "t", "wait_for": "duration:60"}).wait_for == timedelta(seconds=60)
148
+ now_iso = now.isoformat()
149
+ assert WaitOperator.model_validate({"task_id": "t", "wait_for": f"datetime:{now_iso}"}).wait_for == now
150
+ assert WaitOperator.model_validate({"task_id": "t", "wait_for": "event"}).wait_for == "event"
151
+
152
+ def test_workflow_builder_simple_chain():
153
+ workflow = (
154
+ WorkflowBuilder("simple_chain")
155
+ .task("start", "func_start", result_key="start_res")
156
+ .task("middle", "func_middle", args=["{{start_res}}"])
157
+ .build()
158
+ )
159
+ assert workflow.name == "simple_chain"
160
+ assert "start" in workflow.tasks
161
+ assert "middle" in workflow.tasks
162
+ assert workflow.tasks["middle"].dependencies == ["start"]
163
+ assert workflow.start_task == "start"
164
+
165
+ def test_workflow_builder_with_retry_and_timeout():
166
+ workflow = (
167
+ WorkflowBuilder("retry_timeout_workflow")
168
+ .task("step1", "func1")
169
+ .retry(max_retries=5, delay=timedelta(seconds=15))
170
+ .timeout(timeout=timedelta(minutes=1))
171
+ .build()
172
+ )
173
+ assert workflow.tasks["step1"].retry_policy.max_retries == 5
174
+ assert workflow.tasks["step1"].retry_policy.delay == timedelta(seconds=15)
175
+ assert workflow.tasks["step1"].timeout_policy.timeout == timedelta(minutes=1)
176
+
177
+ def test_workflow_builder_condition():
178
+ workflow = (
179
+ WorkflowBuilder("conditional_workflow")
180
+ .task("initial", "init_func")
181
+ .condition("check", "val > 10", "high", "low")
182
+ .build()
183
+ )
184
+ assert "check" in workflow.tasks
185
+ assert workflow.tasks["check"].dependencies == ["initial"]
186
+ assert workflow.tasks["check"].if_true == "high"
187
+
188
+ def test_workflow_builder_parallel():
189
+ workflow = (
190
+ WorkflowBuilder("parallel_workflow")
191
+ .task("init", "init_func")
192
+ .parallel("parallel_step", {"b1": ["t1"], "b2": ["t2"]})
193
+ .build()
194
+ )
195
+ assert "parallel_step" in workflow.tasks
196
+ assert workflow.tasks["parallel_step"].dependencies == ["init"]
197
+
198
+ def test_workflow_builder_foreach():
199
+ workflow = (
200
+ WorkflowBuilder("foreach_workflow")
201
+ .task("fetch_items", "fetch_func")
202
+ .foreach("loop_items", "items_list", ["process_item"])
203
+ .build()
204
+ )
205
+ assert "loop_items" in workflow.tasks
206
+ assert workflow.tasks["loop_items"].dependencies == ["fetch_items"]
207
+
208
+ def test_workflow_yaml_round_trip():
209
+ original_workflow = (
210
+ WorkflowBuilder("yaml_test")
211
+ .task("start", "func_start", result_key="start_res")
212
+ .retry(max_retries=2, delay=timedelta(seconds=5))
213
+ .wait("wait_step", timedelta(minutes=1))
214
+ .task("end", "func_end", args=["{{start_res}}"])
215
+ .build()
216
+ )
217
+ original_workflow.set_variables({"env": "dev"})
218
+
219
+ yaml_output = original_workflow.to_yaml()
220
+ loaded_workflow = Workflow.from_yaml(yaml_output)
221
+ assert sort_dict_recursively(json.loads(original_workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow.model_dump_json()))
222
+
223
+ def test_workflow_json_round_trip():
224
+ original_workflow = (
225
+ WorkflowBuilder("json_test")
226
+ .task("stepA", "funcA")
227
+ .timeout(timeout=timedelta(seconds=60), kill_on_timeout=False)
228
+ .condition("check_val", "val == 'ok'", "success", "fail")
229
+ .build()
230
+ )
231
+ original_workflow.set_variables({"user": "test"})
232
+
233
+ json_output = original_workflow.to_json()
234
+ loaded_workflow = Workflow.from_json(json_output)
235
+
236
+ assert sort_dict_recursively(json.loads(original_workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow.model_dump_json()))
237
+
238
+ def test_complex_workflow_creation_and_serialization():
239
+ # This test re-uses the logic from example_usage.py's create_complex_workflow
240
+ # to ensure it works with the new Pydantic models and can be serialized/deserialized.
241
+ workflow = (
242
+ WorkflowBuilder("data_processing_pipeline")
243
+ .task("start", "workflows.tasks.initialize", result_key="init_data")
244
+ .task(
245
+ "validate",
246
+ "workflows.tasks.validate_data",
247
+ args=["{{init_data}}"],
248
+ result_key="validated_data",
249
+ )
250
+ .condition(
251
+ "check_quality",
252
+ condition="{{validated_data.quality_score}} > 0.8",
253
+ if_true="high_quality_processing",
254
+ if_false="standard_processing",
255
+ )
256
+ .build()
257
+ )
258
+
259
+ workflow.add_task(
260
+ TaskOperator(
261
+ task_id="high_quality_processing",
262
+ function="workflows.tasks.advanced_processing",
263
+ args=["{{validated_data}}"],
264
+ dependencies=["check_quality"],
265
+ retry_policy=RetryPolicy(max_retries=5, delay=timedelta(seconds=10)),
266
+ )
267
+ )
268
+
269
+ workflow.add_task(
270
+ TaskOperator(
271
+ task_id="standard_processing",
272
+ function="workflows.tasks.basic_processing",
273
+ args=["{{validated_data}}"],
274
+ dependencies=["check_quality"],
275
+ )
276
+ )
277
+
278
+ workflow.add_task(
279
+ ParallelOperator(
280
+ task_id="parallel_processing",
281
+ branches={
282
+ "branch_a": ["transform_a", "enrich_a"],
283
+ "branch_b": ["transform_b", "enrich_b"],
284
+ },
285
+ dependencies=["high_quality_processing", "standard_processing"],
286
+ )
287
+ )
288
+
289
+ for branch in ["a", "b"]:
290
+ workflow.add_task(
291
+ TaskOperator(
292
+ task_id=f"transform_{branch}",
293
+ function=f"workflows.tasks.transform_{branch}",
294
+ dependencies=["parallel_processing"],
295
+ result_key=f"transformed_{branch}",
296
+ )
297
+ )
298
+
299
+ workflow.add_task(
300
+ TaskOperator(
301
+ task_id=f"enrich_{branch}",
302
+ function="workflows.tasks.enrich_data",
303
+ args=[f"{{{{transformed_{branch}}}}}",],
304
+ dependencies=[f"transform_{branch}"],
305
+ result_key=f"enriched_{branch}",
306
+ )
307
+ )
308
+
309
+ builder = WorkflowBuilder(workflow.name, existing_workflow=workflow)
310
+ builder._current_task = "enrich_b" # Manually set current task for builder continuation
311
+
312
+ workflow = (
313
+ builder.task(
314
+ "aggregate",
315
+ "workflows.tasks.aggregate_results",
316
+ dependencies=[
317
+ "enrich_a",
318
+ "enrich_b",
319
+ ],
320
+ result_key="final_result",
321
+ )
322
+ .wait("wait_notification", timedelta(hours=1))
323
+ .task("notify", "workflows.tasks.send_notification", args=["{{final_result}}"])
324
+ .build()
325
+ )
326
+
327
+ workflow.set_variables(
328
+ {
329
+ "environment": "production",
330
+ "batch_size": 1000,
331
+ "notify_email": "team@company.com",
332
+ }
333
+ )
334
+
335
+ # Test serialization and deserialization
336
+ yaml_output = workflow.to_yaml()
337
+ loaded_workflow_from_yaml = Workflow.from_yaml(yaml_output)
338
+ assert sort_dict_recursively(json.loads(workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow_from_yaml.model_dump_json()))
339
+
340
+ json_output = workflow.to_json()
341
+ loaded_workflow_from_json = Workflow.from_json(json_output)
342
+ assert sort_dict_recursively(json.loads(workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow_from_json.model_dump_json()))
343
+
344
+ def test_unknown_operator_type_raises_error():
345
+ yaml_content = """
346
+ name: test
347
+ tasks:
348
+ task1:
349
+ operator_type: unknown_operator
350
+ """
351
+ with pytest.raises(ValueError, match="Unknown operator type: unknown_operator"):
352
+ Workflow.from_yaml(yaml_content)