highway-dsl 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of highway-dsl might be problematic. Click here for more details.
- highway_dsl-0.0.1/LICENSE +21 -0
- highway_dsl-0.0.1/PKG-INFO +203 -0
- highway_dsl-0.0.1/README.md +180 -0
- highway_dsl-0.0.1/highway_dsl/__init__.py +0 -0
- highway_dsl-0.0.1/highway_dsl/workflow_dsl.py +252 -0
- highway_dsl-0.0.1/highway_dsl.egg-info/PKG-INFO +203 -0
- highway_dsl-0.0.1/highway_dsl.egg-info/SOURCES.txt +11 -0
- highway_dsl-0.0.1/highway_dsl.egg-info/dependency_links.txt +1 -0
- highway_dsl-0.0.1/highway_dsl.egg-info/requires.txt +8 -0
- highway_dsl-0.0.1/highway_dsl.egg-info/top_level.txt +1 -0
- highway_dsl-0.0.1/pyproject.toml +37 -0
- highway_dsl-0.0.1/setup.cfg +4 -0
- highway_dsl-0.0.1/tests/test_workflow_dsl.py +352 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Farseed Ashouri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: highway_dsl
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
|
|
5
|
+
Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
|
|
8
|
+
Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: pydantic>=2.12.3
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# Highway DSL
|
|
25
|
+
|
|
26
|
+
Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
|
|
27
|
+
|
|
28
|
+
## Features
|
|
29
|
+
|
|
30
|
+
* **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
|
|
31
|
+
* **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
|
|
32
|
+
* **Rich Task Types:** Supports various operators including:
|
|
33
|
+
* `TaskOperator`: Executes a Python function.
|
|
34
|
+
* `ConditionOperator`: Enables conditional branching based on expressions.
|
|
35
|
+
* `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
|
|
36
|
+
* `ParallelOperator`: Executes multiple branches of tasks concurrently.
|
|
37
|
+
* `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
|
|
38
|
+
* **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
|
|
39
|
+
* **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
|
|
40
|
+
* **Workflow Builder:** A fluent API for constructing workflows programmatically.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
To install Highway DSL, you can use pip:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install highway-dsl
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
If you want to install it for development, including testing dependencies:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install "highway-dsl[dev]"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
### Defining a Simple Workflow
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from datetime import timedelta
|
|
62
|
+
from workflow_dsl import WorkflowBuilder
|
|
63
|
+
|
|
64
|
+
def demonstrate_basic_workflow():
|
|
65
|
+
"""Show a simple complete workflow using just the builder"""
|
|
66
|
+
|
|
67
|
+
workflow = (
|
|
68
|
+
WorkflowBuilder("simple_etl")
|
|
69
|
+
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
70
|
+
.task(
|
|
71
|
+
"transform",
|
|
72
|
+
"etl.transform_data",
|
|
73
|
+
args=["{{raw_data}}"],
|
|
74
|
+
result_key="transformed_data",
|
|
75
|
+
)
|
|
76
|
+
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
77
|
+
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
78
|
+
.timeout(timeout=timedelta(minutes=30))
|
|
79
|
+
.wait("wait_next", timedelta(hours=24))
|
|
80
|
+
.task("cleanup", "etl.cleanup")
|
|
81
|
+
.build()
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
workflow.set_variables(
|
|
85
|
+
{"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return workflow
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
basic_workflow = demonstrate_basic_workflow()
|
|
92
|
+
print(basic_workflow.to_yaml())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Defining a Complex Workflow
|
|
96
|
+
|
|
97
|
+
Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
|
|
98
|
+
|
|
99
|
+
### YAML Configuration
|
|
100
|
+
|
|
101
|
+
You can also define workflows directly in YAML:
|
|
102
|
+
|
|
103
|
+
```yaml
|
|
104
|
+
name: simple_etl
|
|
105
|
+
version: 1.0.0
|
|
106
|
+
description: Simple ETL workflow with retry and timeout
|
|
107
|
+
variables:
|
|
108
|
+
database_url: postgresql://localhost/mydb
|
|
109
|
+
chunk_size: 1000
|
|
110
|
+
start_task: extract
|
|
111
|
+
tasks:
|
|
112
|
+
extract:
|
|
113
|
+
task_id: extract
|
|
114
|
+
operator_type: task
|
|
115
|
+
function: etl.extract_data
|
|
116
|
+
result_key: raw_data
|
|
117
|
+
dependencies: []
|
|
118
|
+
metadata: {}
|
|
119
|
+
|
|
120
|
+
transform:
|
|
121
|
+
task_id: transform
|
|
122
|
+
operator_type: task
|
|
123
|
+
function: etl.transform_data
|
|
124
|
+
args: ["{{raw_data}}"]
|
|
125
|
+
result_key: transformed_data
|
|
126
|
+
dependencies: ["extract"]
|
|
127
|
+
retry_policy:
|
|
128
|
+
max_retries: 3
|
|
129
|
+
delay: PT10S
|
|
130
|
+
backoff_factor: 2.0
|
|
131
|
+
metadata: {}
|
|
132
|
+
|
|
133
|
+
load:
|
|
134
|
+
task_id: load
|
|
135
|
+
operator_type: task
|
|
136
|
+
function: etl.load_data
|
|
137
|
+
args: ["{{transformed_data}}"]
|
|
138
|
+
dependencies: ["transform"]
|
|
139
|
+
timeout_policy:
|
|
140
|
+
timeout: PT30M
|
|
141
|
+
kill_on_timeout: true
|
|
142
|
+
metadata: {}
|
|
143
|
+
|
|
144
|
+
wait_next:
|
|
145
|
+
task_id: wait_next
|
|
146
|
+
operator_type: wait
|
|
147
|
+
wait_for: "P1D"
|
|
148
|
+
dependencies: ["load"]
|
|
149
|
+
metadata: {}
|
|
150
|
+
|
|
151
|
+
cleanup:
|
|
152
|
+
task_id: cleanup
|
|
153
|
+
operator_type: task
|
|
154
|
+
function: etl.cleanup
|
|
155
|
+
dependencies: ["wait_next"]
|
|
156
|
+
metadata: {}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
To load this YAML:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from workflow_dsl import Workflow
|
|
163
|
+
|
|
164
|
+
yaml_content = """
|
|
165
|
+
# ... (yaml content from above)
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
workflow = Workflow.from_yaml(yaml_content)
|
|
169
|
+
print(workflow.name)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Development
|
|
173
|
+
|
|
174
|
+
### Running Tests
|
|
175
|
+
|
|
176
|
+
To run the unit tests, navigate to the project root and execute:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
pytest
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Type Checking
|
|
183
|
+
|
|
184
|
+
To perform static type checking with MyPy:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
mypy .
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Project Structure
|
|
191
|
+
|
|
192
|
+
```
|
|
193
|
+
.highway/
|
|
194
|
+
├── workflow_dsl.py # Core DSL definitions (Pydantic models)
|
|
195
|
+
├── example_usage.py # Examples of how to use the DSL
|
|
196
|
+
├── tests/
|
|
197
|
+
│ ├── __init__.py
|
|
198
|
+
│ ├── conftest.py # Pytest configuration
|
|
199
|
+
│ └── test_workflow_dsl.py # Unit and integration tests
|
|
200
|
+
├── pyproject.toml # Project metadata and dependencies
|
|
201
|
+
├── README.md # This file
|
|
202
|
+
└── SUMMARY.md # Summary of changes and future instructions
|
|
203
|
+
```
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# Highway DSL
|
|
2
|
+
|
|
3
|
+
Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
* **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
|
|
8
|
+
* **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
|
|
9
|
+
* **Rich Task Types:** Supports various operators including:
|
|
10
|
+
* `TaskOperator`: Executes a Python function.
|
|
11
|
+
* `ConditionOperator`: Enables conditional branching based on expressions.
|
|
12
|
+
* `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
|
|
13
|
+
* `ParallelOperator`: Executes multiple branches of tasks concurrently.
|
|
14
|
+
* `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
|
|
15
|
+
* **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
|
|
16
|
+
* **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
|
|
17
|
+
* **Workflow Builder:** A fluent API for constructing workflows programmatically.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
To install Highway DSL, you can use pip:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install highway-dsl
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
If you want to install it for development, including testing dependencies:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install "highway-dsl[dev]"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
### Defining a Simple Workflow
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from datetime import timedelta
|
|
39
|
+
from workflow_dsl import WorkflowBuilder
|
|
40
|
+
|
|
41
|
+
def demonstrate_basic_workflow():
|
|
42
|
+
"""Show a simple complete workflow using just the builder"""
|
|
43
|
+
|
|
44
|
+
workflow = (
|
|
45
|
+
WorkflowBuilder("simple_etl")
|
|
46
|
+
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
47
|
+
.task(
|
|
48
|
+
"transform",
|
|
49
|
+
"etl.transform_data",
|
|
50
|
+
args=["{{raw_data}}"],
|
|
51
|
+
result_key="transformed_data",
|
|
52
|
+
)
|
|
53
|
+
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
54
|
+
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
55
|
+
.timeout(timeout=timedelta(minutes=30))
|
|
56
|
+
.wait("wait_next", timedelta(hours=24))
|
|
57
|
+
.task("cleanup", "etl.cleanup")
|
|
58
|
+
.build()
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
workflow.set_variables(
|
|
62
|
+
{"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return workflow
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
basic_workflow = demonstrate_basic_workflow()
|
|
69
|
+
print(basic_workflow.to_yaml())
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Defining a Complex Workflow
|
|
73
|
+
|
|
74
|
+
Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
|
|
75
|
+
|
|
76
|
+
### YAML Configuration
|
|
77
|
+
|
|
78
|
+
You can also define workflows directly in YAML:
|
|
79
|
+
|
|
80
|
+
```yaml
|
|
81
|
+
name: simple_etl
|
|
82
|
+
version: 1.0.0
|
|
83
|
+
description: Simple ETL workflow with retry and timeout
|
|
84
|
+
variables:
|
|
85
|
+
database_url: postgresql://localhost/mydb
|
|
86
|
+
chunk_size: 1000
|
|
87
|
+
start_task: extract
|
|
88
|
+
tasks:
|
|
89
|
+
extract:
|
|
90
|
+
task_id: extract
|
|
91
|
+
operator_type: task
|
|
92
|
+
function: etl.extract_data
|
|
93
|
+
result_key: raw_data
|
|
94
|
+
dependencies: []
|
|
95
|
+
metadata: {}
|
|
96
|
+
|
|
97
|
+
transform:
|
|
98
|
+
task_id: transform
|
|
99
|
+
operator_type: task
|
|
100
|
+
function: etl.transform_data
|
|
101
|
+
args: ["{{raw_data}}"]
|
|
102
|
+
result_key: transformed_data
|
|
103
|
+
dependencies: ["extract"]
|
|
104
|
+
retry_policy:
|
|
105
|
+
max_retries: 3
|
|
106
|
+
delay: PT10S
|
|
107
|
+
backoff_factor: 2.0
|
|
108
|
+
metadata: {}
|
|
109
|
+
|
|
110
|
+
load:
|
|
111
|
+
task_id: load
|
|
112
|
+
operator_type: task
|
|
113
|
+
function: etl.load_data
|
|
114
|
+
args: ["{{transformed_data}}"]
|
|
115
|
+
dependencies: ["transform"]
|
|
116
|
+
timeout_policy:
|
|
117
|
+
timeout: PT30M
|
|
118
|
+
kill_on_timeout: true
|
|
119
|
+
metadata: {}
|
|
120
|
+
|
|
121
|
+
wait_next:
|
|
122
|
+
task_id: wait_next
|
|
123
|
+
operator_type: wait
|
|
124
|
+
wait_for: "P1D"
|
|
125
|
+
dependencies: ["load"]
|
|
126
|
+
metadata: {}
|
|
127
|
+
|
|
128
|
+
cleanup:
|
|
129
|
+
task_id: cleanup
|
|
130
|
+
operator_type: task
|
|
131
|
+
function: etl.cleanup
|
|
132
|
+
dependencies: ["wait_next"]
|
|
133
|
+
metadata: {}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
To load this YAML:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from workflow_dsl import Workflow
|
|
140
|
+
|
|
141
|
+
yaml_content = """
|
|
142
|
+
# ... (yaml content from above)
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
workflow = Workflow.from_yaml(yaml_content)
|
|
146
|
+
print(workflow.name)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Development
|
|
150
|
+
|
|
151
|
+
### Running Tests
|
|
152
|
+
|
|
153
|
+
To run the unit tests, navigate to the project root and execute:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
pytest
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Type Checking
|
|
160
|
+
|
|
161
|
+
To perform static type checking with MyPy:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
mypy .
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Project Structure
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
.highway/
|
|
171
|
+
├── workflow_dsl.py # Core DSL definitions (Pydantic models)
|
|
172
|
+
├── example_usage.py # Examples of how to use the DSL
|
|
173
|
+
├── tests/
|
|
174
|
+
│ ├── __init__.py
|
|
175
|
+
│ ├── conftest.py # Pytest configuration
|
|
176
|
+
│ └── test_workflow_dsl.py # Unit and integration tests
|
|
177
|
+
├── pyproject.toml # Project metadata and dependencies
|
|
178
|
+
├── README.md # This file
|
|
179
|
+
└── SUMMARY.md # Summary of changes and future instructions
|
|
180
|
+
```
|
|
File without changes
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# workflow_dsl.py
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union, Callable, Type
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
import yaml
|
|
6
|
+
import json
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from pydantic import BaseModel, Field, model_validator, ConfigDict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OperatorType(Enum):
|
|
12
|
+
TASK = "task"
|
|
13
|
+
CONDITION = "condition"
|
|
14
|
+
WAIT = "wait"
|
|
15
|
+
PARALLEL = "parallel"
|
|
16
|
+
FOREACH = "foreach"
|
|
17
|
+
SWITCH = "switch"
|
|
18
|
+
TRY_CATCH = "try_catch"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RetryPolicy(BaseModel):
|
|
22
|
+
max_retries: int = Field(3, description="Maximum number of retries")
|
|
23
|
+
delay: timedelta = Field(timedelta(seconds=5), description="Delay between retries")
|
|
24
|
+
backoff_factor: float = Field(2.0, description="Factor by which to increase delay")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TimeoutPolicy(BaseModel):
|
|
28
|
+
timeout: timedelta = Field(..., description="Timeout duration")
|
|
29
|
+
kill_on_timeout: bool = Field(True, description="Whether to kill the task on timeout")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BaseOperator(BaseModel, ABC):
|
|
33
|
+
task_id: str
|
|
34
|
+
operator_type: OperatorType
|
|
35
|
+
dependencies: List[str] = Field(default_factory=list)
|
|
36
|
+
retry_policy: Optional[RetryPolicy] = None
|
|
37
|
+
timeout_policy: Optional[TimeoutPolicy] = None
|
|
38
|
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TaskOperator(BaseOperator):
|
|
44
|
+
function: str
|
|
45
|
+
args: List[Any] = Field(default_factory=list)
|
|
46
|
+
kwargs: Dict[str, Any] = Field(default_factory=dict)
|
|
47
|
+
result_key: Optional[str] = None
|
|
48
|
+
operator_type: OperatorType = Field(OperatorType.TASK, frozen=True)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ConditionOperator(BaseOperator):
|
|
52
|
+
condition: str
|
|
53
|
+
if_true: str
|
|
54
|
+
if_false: str
|
|
55
|
+
operator_type: OperatorType = Field(OperatorType.CONDITION, frozen=True)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class WaitOperator(BaseOperator):
|
|
59
|
+
wait_for: Union[timedelta, datetime, str]
|
|
60
|
+
operator_type: OperatorType = Field(OperatorType.WAIT, frozen=True)
|
|
61
|
+
|
|
62
|
+
@model_validator(mode='before')
|
|
63
|
+
@classmethod
|
|
64
|
+
def parse_wait_for(cls, data: Any) -> Any:
|
|
65
|
+
if isinstance(data, dict) and "wait_for" in data:
|
|
66
|
+
wait_for = data["wait_for"]
|
|
67
|
+
if isinstance(wait_for, str):
|
|
68
|
+
if wait_for.startswith("duration:"):
|
|
69
|
+
data["wait_for"] = timedelta(seconds=float(wait_for.split(":")[1]))
|
|
70
|
+
elif wait_for.startswith("datetime:"):
|
|
71
|
+
data["wait_for"] = datetime.fromisoformat(wait_for.split(":", 1)[1])
|
|
72
|
+
return data
|
|
73
|
+
|
|
74
|
+
def model_dump(self, **kwargs) -> Dict[str, Any]:
|
|
75
|
+
data = super().model_dump(**kwargs)
|
|
76
|
+
wait_for = data["wait_for"]
|
|
77
|
+
if isinstance(wait_for, timedelta):
|
|
78
|
+
data["wait_for"] = f"duration:{wait_for.total_seconds()}"
|
|
79
|
+
elif isinstance(wait_for, datetime):
|
|
80
|
+
data["wait_for"] = f"datetime:{wait_for.isoformat()}"
|
|
81
|
+
return data
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class ParallelOperator(BaseOperator):
|
|
85
|
+
branches: Dict[str, List[str]] = Field(default_factory=dict)
|
|
86
|
+
operator_type: OperatorType = Field(OperatorType.PARALLEL, frozen=True)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ForEachOperator(BaseOperator):
|
|
90
|
+
items: str
|
|
91
|
+
task_chain: List[str] = Field(default_factory=list)
|
|
92
|
+
operator_type: OperatorType = Field(OperatorType.FOREACH, frozen=True)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class Workflow(BaseModel):
|
|
96
|
+
name: str
|
|
97
|
+
version: str = "1.0.0"
|
|
98
|
+
description: str = ""
|
|
99
|
+
tasks: Dict[str, Union[
|
|
100
|
+
TaskOperator,
|
|
101
|
+
ConditionOperator,
|
|
102
|
+
WaitOperator,
|
|
103
|
+
ParallelOperator,
|
|
104
|
+
ForEachOperator,
|
|
105
|
+
]] = Field(default_factory=dict)
|
|
106
|
+
variables: Dict[str, Any] = Field(default_factory=dict)
|
|
107
|
+
start_task: Optional[str] = None
|
|
108
|
+
|
|
109
|
+
@model_validator(mode='before')
|
|
110
|
+
@classmethod
|
|
111
|
+
def validate_tasks(cls, data: Any) -> Any:
|
|
112
|
+
if isinstance(data, dict) and "tasks" in data:
|
|
113
|
+
validated_tasks = {}
|
|
114
|
+
operator_classes: Dict[str, Type[BaseOperator]] = {
|
|
115
|
+
OperatorType.TASK.value: TaskOperator,
|
|
116
|
+
OperatorType.CONDITION.value: ConditionOperator,
|
|
117
|
+
OperatorType.WAIT.value: WaitOperator,
|
|
118
|
+
OperatorType.PARALLEL.value: ParallelOperator,
|
|
119
|
+
OperatorType.FOREACH.value: ForEachOperator,
|
|
120
|
+
}
|
|
121
|
+
for task_id, task_data in data["tasks"].items():
|
|
122
|
+
operator_type = task_data.get("operator_type")
|
|
123
|
+
if operator_type and operator_type in operator_classes:
|
|
124
|
+
operator_class = operator_classes[operator_type]
|
|
125
|
+
validated_tasks[task_id] = operator_class.model_validate(task_data)
|
|
126
|
+
else:
|
|
127
|
+
raise ValueError(f"Unknown operator type: {operator_type}")
|
|
128
|
+
data["tasks"] = validated_tasks
|
|
129
|
+
return data
|
|
130
|
+
|
|
131
|
+
def add_task(self, task: Union[
|
|
132
|
+
TaskOperator,
|
|
133
|
+
ConditionOperator,
|
|
134
|
+
WaitOperator,
|
|
135
|
+
ParallelOperator,
|
|
136
|
+
ForEachOperator,
|
|
137
|
+
]) -> "Workflow":
|
|
138
|
+
self.tasks[task.task_id] = task
|
|
139
|
+
return self
|
|
140
|
+
|
|
141
|
+
def set_variables(self, variables: Dict[str, Any]) -> "Workflow":
|
|
142
|
+
self.variables.update(variables)
|
|
143
|
+
return self
|
|
144
|
+
|
|
145
|
+
def set_start_task(self, task_id: str) -> "Workflow":
|
|
146
|
+
self.start_task = task_id
|
|
147
|
+
return self
|
|
148
|
+
|
|
149
|
+
def to_yaml(self) -> str:
|
|
150
|
+
data = self.model_dump(mode='json', by_alias=True, exclude_none=True)
|
|
151
|
+
return yaml.dump(data, default_flow_style=False)
|
|
152
|
+
|
|
153
|
+
def to_json(self) -> str:
|
|
154
|
+
return self.model_dump_json(indent=2)
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def from_yaml(cls, yaml_str: str) -> "Workflow":
|
|
158
|
+
data = yaml.safe_load(yaml_str)
|
|
159
|
+
return cls.model_validate(data)
|
|
160
|
+
|
|
161
|
+
@classmethod
|
|
162
|
+
def from_json(cls, json_str: str) -> "Workflow":
|
|
163
|
+
return cls.model_validate_json(json_str)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class WorkflowBuilder:
|
|
167
|
+
def __init__(self, name: str, existing_workflow: Optional[Workflow] = None):
|
|
168
|
+
if existing_workflow:
|
|
169
|
+
self.workflow = existing_workflow
|
|
170
|
+
else:
|
|
171
|
+
self.workflow = Workflow(name=name)
|
|
172
|
+
self._current_task: Optional[str] = None
|
|
173
|
+
|
|
174
|
+
def task(self, task_id: str, function: str, **kwargs) -> "WorkflowBuilder":
|
|
175
|
+
task = TaskOperator(task_id=task_id, function=function, **kwargs)
|
|
176
|
+
if self._current_task:
|
|
177
|
+
task.dependencies.append(self._current_task)
|
|
178
|
+
self.workflow.add_task(task)
|
|
179
|
+
self._current_task = task_id
|
|
180
|
+
return self
|
|
181
|
+
|
|
182
|
+
def condition(
|
|
183
|
+
self, task_id: str, condition: str, if_true: str, if_false: str, **kwargs
|
|
184
|
+
) -> "WorkflowBuilder":
|
|
185
|
+
task = ConditionOperator(
|
|
186
|
+
task_id=task_id, condition=condition, if_true=if_true, if_false=if_false, **kwargs
|
|
187
|
+
)
|
|
188
|
+
if self._current_task:
|
|
189
|
+
task.dependencies.append(self._current_task)
|
|
190
|
+
self.workflow.add_task(task)
|
|
191
|
+
self._current_task = task_id
|
|
192
|
+
return self
|
|
193
|
+
|
|
194
|
+
def wait(
|
|
195
|
+
self, task_id: str, wait_for: Union[timedelta, datetime, str], **kwargs
|
|
196
|
+
) -> "WorkflowBuilder":
|
|
197
|
+
task = WaitOperator(task_id=task_id, wait_for=wait_for, **kwargs)
|
|
198
|
+
if self._current_task:
|
|
199
|
+
task.dependencies.append(self._current_task)
|
|
200
|
+
self.workflow.add_task(task)
|
|
201
|
+
self._current_task = task_id
|
|
202
|
+
return self
|
|
203
|
+
|
|
204
|
+
def parallel(
|
|
205
|
+
self, task_id: str, branches: Dict[str, List[str]], **kwargs
|
|
206
|
+
) -> "WorkflowBuilder":
|
|
207
|
+
task = ParallelOperator(task_id=task_id, branches=branches, **kwargs)
|
|
208
|
+
if self._current_task:
|
|
209
|
+
task.dependencies.append(self._current_task)
|
|
210
|
+
self.workflow.add_task(task)
|
|
211
|
+
self._current_task = task_id
|
|
212
|
+
return self
|
|
213
|
+
|
|
214
|
+
def foreach(
|
|
215
|
+
self, task_id: str, items: str, task_chain: List[str], **kwargs
|
|
216
|
+
) -> "WorkflowBuilder":
|
|
217
|
+
task = ForEachOperator(task_id=task_id, items=items, task_chain=task_chain, **kwargs)
|
|
218
|
+
if self._current_task:
|
|
219
|
+
task.dependencies.append(self._current_task)
|
|
220
|
+
self.workflow.add_task(task)
|
|
221
|
+
self._current_task = task_id
|
|
222
|
+
return self
|
|
223
|
+
|
|
224
|
+
def retry(
|
|
225
|
+
self,
|
|
226
|
+
max_retries: int = 3,
|
|
227
|
+
delay: timedelta = timedelta(seconds=5),
|
|
228
|
+
backoff_factor: float = 2.0,
|
|
229
|
+
) -> "WorkflowBuilder":
|
|
230
|
+
if self._current_task and isinstance(
|
|
231
|
+
self.workflow.tasks[self._current_task], TaskOperator
|
|
232
|
+
):
|
|
233
|
+
self.workflow.tasks[self._current_task].retry_policy = RetryPolicy(
|
|
234
|
+
max_retries=max_retries, delay=delay, backoff_factor=backoff_factor
|
|
235
|
+
)
|
|
236
|
+
return self
|
|
237
|
+
|
|
238
|
+
def timeout(
|
|
239
|
+
self, timeout: timedelta, kill_on_timeout: bool = True
|
|
240
|
+
) -> "WorkflowBuilder":
|
|
241
|
+
if self._current_task and isinstance(
|
|
242
|
+
self.workflow.tasks[self._current_task], TaskOperator
|
|
243
|
+
):
|
|
244
|
+
self.workflow.tasks[self._current_task].timeout_policy = TimeoutPolicy(
|
|
245
|
+
timeout=timeout, kill_on_timeout=kill_on_timeout
|
|
246
|
+
)
|
|
247
|
+
return self
|
|
248
|
+
|
|
249
|
+
def build(self) -> Workflow:
|
|
250
|
+
if not self.workflow.start_task and self.workflow.tasks:
|
|
251
|
+
self.workflow.start_task = next(iter(self.workflow.tasks.keys()))
|
|
252
|
+
return self.workflow
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: highway_dsl
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
|
|
5
|
+
Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
|
|
8
|
+
Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: pydantic>=2.12.3
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# Highway DSL
|
|
25
|
+
|
|
26
|
+
Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
|
|
27
|
+
|
|
28
|
+
## Features
|
|
29
|
+
|
|
30
|
+
* **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
|
|
31
|
+
* **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
|
|
32
|
+
* **Rich Task Types:** Supports various operators including:
|
|
33
|
+
* `TaskOperator`: Executes a Python function.
|
|
34
|
+
* `ConditionOperator`: Enables conditional branching based on expressions.
|
|
35
|
+
* `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
|
|
36
|
+
* `ParallelOperator`: Executes multiple branches of tasks concurrently.
|
|
37
|
+
* `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
|
|
38
|
+
* **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
|
|
39
|
+
* **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
|
|
40
|
+
* **Workflow Builder:** A fluent API for constructing workflows programmatically.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
To install Highway DSL, you can use pip:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install highway-dsl
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
If you want to install it for development, including testing dependencies:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install "highway-dsl[dev]"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
### Defining a Simple Workflow
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from datetime import timedelta
|
|
62
|
+
from workflow_dsl import WorkflowBuilder
|
|
63
|
+
|
|
64
|
+
def demonstrate_basic_workflow():
|
|
65
|
+
"""Show a simple complete workflow using just the builder"""
|
|
66
|
+
|
|
67
|
+
workflow = (
|
|
68
|
+
WorkflowBuilder("simple_etl")
|
|
69
|
+
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
70
|
+
.task(
|
|
71
|
+
"transform",
|
|
72
|
+
"etl.transform_data",
|
|
73
|
+
args=["{{raw_data}}"],
|
|
74
|
+
result_key="transformed_data",
|
|
75
|
+
)
|
|
76
|
+
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
77
|
+
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
78
|
+
.timeout(timeout=timedelta(minutes=30))
|
|
79
|
+
.wait("wait_next", timedelta(hours=24))
|
|
80
|
+
.task("cleanup", "etl.cleanup")
|
|
81
|
+
.build()
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
workflow.set_variables(
|
|
85
|
+
{"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return workflow
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
basic_workflow = demonstrate_basic_workflow()
|
|
92
|
+
print(basic_workflow.to_yaml())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Defining a Complex Workflow
|
|
96
|
+
|
|
97
|
+
Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
|
|
98
|
+
|
|
99
|
+
### YAML Configuration
|
|
100
|
+
|
|
101
|
+
You can also define workflows directly in YAML:
|
|
102
|
+
|
|
103
|
+
```yaml
|
|
104
|
+
name: simple_etl
|
|
105
|
+
version: 1.0.0
|
|
106
|
+
description: Simple ETL workflow with retry and timeout
|
|
107
|
+
variables:
|
|
108
|
+
database_url: postgresql://localhost/mydb
|
|
109
|
+
chunk_size: 1000
|
|
110
|
+
start_task: extract
|
|
111
|
+
tasks:
|
|
112
|
+
extract:
|
|
113
|
+
task_id: extract
|
|
114
|
+
operator_type: task
|
|
115
|
+
function: etl.extract_data
|
|
116
|
+
result_key: raw_data
|
|
117
|
+
dependencies: []
|
|
118
|
+
metadata: {}
|
|
119
|
+
|
|
120
|
+
transform:
|
|
121
|
+
task_id: transform
|
|
122
|
+
operator_type: task
|
|
123
|
+
function: etl.transform_data
|
|
124
|
+
args: ["{{raw_data}}"]
|
|
125
|
+
result_key: transformed_data
|
|
126
|
+
dependencies: ["extract"]
|
|
127
|
+
retry_policy:
|
|
128
|
+
max_retries: 3
|
|
129
|
+
delay: PT10S
|
|
130
|
+
backoff_factor: 2.0
|
|
131
|
+
metadata: {}
|
|
132
|
+
|
|
133
|
+
load:
|
|
134
|
+
task_id: load
|
|
135
|
+
operator_type: task
|
|
136
|
+
function: etl.load_data
|
|
137
|
+
args: ["{{transformed_data}}"]
|
|
138
|
+
dependencies: ["transform"]
|
|
139
|
+
timeout_policy:
|
|
140
|
+
timeout: PT30M
|
|
141
|
+
kill_on_timeout: true
|
|
142
|
+
metadata: {}
|
|
143
|
+
|
|
144
|
+
wait_next:
|
|
145
|
+
task_id: wait_next
|
|
146
|
+
operator_type: wait
|
|
147
|
+
wait_for: "P1D"
|
|
148
|
+
dependencies: ["load"]
|
|
149
|
+
metadata: {}
|
|
150
|
+
|
|
151
|
+
cleanup:
|
|
152
|
+
task_id: cleanup
|
|
153
|
+
operator_type: task
|
|
154
|
+
function: etl.cleanup
|
|
155
|
+
dependencies: ["wait_next"]
|
|
156
|
+
metadata: {}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
To load this YAML:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from workflow_dsl import Workflow
|
|
163
|
+
|
|
164
|
+
yaml_content = """
|
|
165
|
+
# ... (yaml content from above)
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
workflow = Workflow.from_yaml(yaml_content)
|
|
169
|
+
print(workflow.name)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Development
|
|
173
|
+
|
|
174
|
+
### Running Tests
|
|
175
|
+
|
|
176
|
+
To run the unit tests, navigate to the project root and execute:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
pytest
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Type Checking
|
|
183
|
+
|
|
184
|
+
To perform static type checking with MyPy:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
mypy .
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Project Structure
|
|
191
|
+
|
|
192
|
+
```
|
|
193
|
+
.highway/
|
|
194
|
+
├── workflow_dsl.py # Core DSL definitions (Pydantic models)
|
|
195
|
+
├── example_usage.py # Examples of how to use the DSL
|
|
196
|
+
├── tests/
|
|
197
|
+
│ ├── __init__.py
|
|
198
|
+
│ ├── conftest.py # Pytest configuration
|
|
199
|
+
│ └── test_workflow_dsl.py # Unit and integration tests
|
|
200
|
+
├── pyproject.toml # Project metadata and dependencies
|
|
201
|
+
├── README.md # This file
|
|
202
|
+
└── SUMMARY.md # Summary of changes and future instructions
|
|
203
|
+
```
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
highway_dsl/__init__.py
|
|
5
|
+
highway_dsl/workflow_dsl.py
|
|
6
|
+
highway_dsl.egg-info/PKG-INFO
|
|
7
|
+
highway_dsl.egg-info/SOURCES.txt
|
|
8
|
+
highway_dsl.egg-info/dependency_links.txt
|
|
9
|
+
highway_dsl.egg-info/requires.txt
|
|
10
|
+
highway_dsl.egg-info/top_level.txt
|
|
11
|
+
tests/test_workflow_dsl.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
highway_dsl
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "highway_dsl"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Farseed Ashouri", email = "farseed.ashouri@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "A domain specific language (DSL) for defining and managing data processing pipelines."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.9"
|
|
14
|
+
license = { text = "MIT" }
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
# add dependencies here
|
|
22
|
+
dependencies = [
|
|
23
|
+
"pydantic>=2.12.3",
|
|
24
|
+
"pyyaml>=6.0"
|
|
25
|
+
]
|
|
26
|
+
# dev dependencies
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = [
|
|
29
|
+
"pytest>=7.0.0",
|
|
30
|
+
"mypy>=1.0.0",
|
|
31
|
+
"types-PyYAML>=6.0.0",
|
|
32
|
+
"pytest-cov>=2.12.1",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/rodmena-limited/highway_dsl"
|
|
37
|
+
Issues = "https://github.com/rodmena-limited/highway_dsl/issues"
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import json
|
|
3
|
+
from datetime import timedelta, datetime
|
|
4
|
+
from highway_dsl.workflow_dsl import (
|
|
5
|
+
Workflow,
|
|
6
|
+
WorkflowBuilder,
|
|
7
|
+
TaskOperator,
|
|
8
|
+
ConditionOperator,
|
|
9
|
+
ParallelOperator,
|
|
10
|
+
WaitOperator,
|
|
11
|
+
ForEachOperator,
|
|
12
|
+
RetryPolicy,
|
|
13
|
+
TimeoutPolicy,
|
|
14
|
+
OperatorType,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
def sort_dict_recursively(d):
|
|
18
|
+
if not isinstance(d, dict):
|
|
19
|
+
return d
|
|
20
|
+
return {k: sort_dict_recursively(v) for k, v in sorted(d.items())}
|
|
21
|
+
|
|
22
|
+
def test_workflow_creation():
|
|
23
|
+
workflow = Workflow(name="test_workflow", version="1.0.0", description="A test workflow")
|
|
24
|
+
assert workflow.name == "test_workflow"
|
|
25
|
+
assert workflow.version == "1.0.0"
|
|
26
|
+
assert workflow.description == "A test workflow"
|
|
27
|
+
assert workflow.tasks == {}
|
|
28
|
+
assert workflow.variables == {}
|
|
29
|
+
assert workflow.start_task is None
|
|
30
|
+
|
|
31
|
+
def test_add_task_to_workflow():
|
|
32
|
+
workflow = Workflow(name="test_workflow")
|
|
33
|
+
task = TaskOperator(task_id="task1", function="func1")
|
|
34
|
+
workflow.add_task(task)
|
|
35
|
+
assert "task1" in workflow.tasks
|
|
36
|
+
assert workflow.tasks["task1"] == task
|
|
37
|
+
|
|
38
|
+
def test_set_variables():
|
|
39
|
+
workflow = Workflow(name="test_workflow")
|
|
40
|
+
workflow.set_variables({"key1": "value1"})
|
|
41
|
+
assert workflow.variables == {"key1": "value1"}
|
|
42
|
+
workflow.set_variables({"key2": "value2"})
|
|
43
|
+
assert workflow.variables == {"key1": "value1", "key2": "value2"}
|
|
44
|
+
|
|
45
|
+
def test_set_start_task():
|
|
46
|
+
workflow = Workflow(name="test_workflow")
|
|
47
|
+
workflow.set_start_task("task1")
|
|
48
|
+
assert workflow.start_task == "task1"
|
|
49
|
+
|
|
50
|
+
def test_retry_policy_model():
|
|
51
|
+
policy = RetryPolicy(max_retries=5, delay=timedelta(seconds=10), backoff_factor=2.5)
|
|
52
|
+
assert policy.max_retries == 5
|
|
53
|
+
assert policy.delay == timedelta(seconds=10)
|
|
54
|
+
assert policy.backoff_factor == 2.5
|
|
55
|
+
|
|
56
|
+
def test_timeout_policy_model():
|
|
57
|
+
policy = TimeoutPolicy(timeout=timedelta(minutes=5), kill_on_timeout=False)
|
|
58
|
+
assert policy.timeout == timedelta(minutes=5)
|
|
59
|
+
assert policy.kill_on_timeout is False
|
|
60
|
+
|
|
61
|
+
def test_task_operator_model():
|
|
62
|
+
task = TaskOperator(
|
|
63
|
+
task_id="task1",
|
|
64
|
+
function="func1",
|
|
65
|
+
args=["arg1"],
|
|
66
|
+
kwargs={"kwarg1": "value1"},
|
|
67
|
+
result_key="res1",
|
|
68
|
+
dependencies=["dep1"],
|
|
69
|
+
retry_policy=RetryPolicy(max_retries=1),
|
|
70
|
+
timeout_policy=TimeoutPolicy(timeout=timedelta(seconds=30)),
|
|
71
|
+
metadata={"meta1": "data1"},
|
|
72
|
+
)
|
|
73
|
+
assert task.task_id == "task1"
|
|
74
|
+
assert task.operator_type == OperatorType.TASK
|
|
75
|
+
assert task.function == "func1"
|
|
76
|
+
assert task.args == ["arg1"]
|
|
77
|
+
assert task.kwargs == {"kwarg1": "value1"}
|
|
78
|
+
assert task.result_key == "res1"
|
|
79
|
+
assert task.dependencies == ["dep1"]
|
|
80
|
+
assert task.retry_policy.max_retries == 1
|
|
81
|
+
assert task.timeout_policy.timeout == timedelta(seconds=30)
|
|
82
|
+
assert task.metadata == {"meta1": "data1"}
|
|
83
|
+
|
|
84
|
+
def test_condition_operator_model():
|
|
85
|
+
condition = ConditionOperator(
|
|
86
|
+
task_id="cond1",
|
|
87
|
+
condition="x > 5",
|
|
88
|
+
if_true="task_true",
|
|
89
|
+
if_false="task_false",
|
|
90
|
+
dependencies=["prev_task"],
|
|
91
|
+
)
|
|
92
|
+
assert condition.task_id == "cond1"
|
|
93
|
+
assert condition.operator_type == OperatorType.CONDITION
|
|
94
|
+
assert condition.condition == "x > 5"
|
|
95
|
+
assert condition.if_true == "task_true"
|
|
96
|
+
assert condition.if_false == "task_false"
|
|
97
|
+
assert condition.dependencies == ["prev_task"]
|
|
98
|
+
|
|
99
|
+
def test_wait_operator_model():
|
|
100
|
+
wait_duration = WaitOperator(task_id="wait1", wait_for=timedelta(hours=1))
|
|
101
|
+
assert wait_duration.wait_for == timedelta(hours=1)
|
|
102
|
+
assert wait_duration.operator_type == OperatorType.WAIT
|
|
103
|
+
|
|
104
|
+
now = datetime.now().replace(microsecond=0)
|
|
105
|
+
wait_datetime = WaitOperator(task_id="wait2", wait_for=now)
|
|
106
|
+
assert wait_datetime.wait_for == now
|
|
107
|
+
|
|
108
|
+
wait_string = WaitOperator(task_id="wait3", wait_for="event_name")
|
|
109
|
+
assert wait_string.wait_for == "event_name"
|
|
110
|
+
|
|
111
|
+
def test_parallel_operator_model():
|
|
112
|
+
parallel = ParallelOperator(
|
|
113
|
+
task_id="parallel1",
|
|
114
|
+
branches={"branch_a": ["task_a1", "task_a2"], "branch_b": ["task_b1"]},
|
|
115
|
+
)
|
|
116
|
+
assert parallel.task_id == "parallel1"
|
|
117
|
+
assert parallel.operator_type == OperatorType.PARALLEL
|
|
118
|
+
assert parallel.branches == {"branch_a": ["task_a1", "task_a2"], "branch_b": ["task_b1"]}
|
|
119
|
+
|
|
120
|
+
def test_foreach_operator_model():
|
|
121
|
+
foreach = ForEachOperator(
|
|
122
|
+
task_id="foreach1", items="data_list", task_chain=["process_item"]
|
|
123
|
+
)
|
|
124
|
+
assert foreach.task_id == "foreach1"
|
|
125
|
+
assert foreach.operator_type == OperatorType.FOREACH
|
|
126
|
+
assert foreach.items == "data_list"
|
|
127
|
+
assert foreach.task_chain == ["process_item"]
|
|
128
|
+
|
|
129
|
+
def test_wait_operator_serialization():
|
|
130
|
+
# Test with timedelta
|
|
131
|
+
wait_duration = WaitOperator(task_id="wait1", wait_for=timedelta(hours=1))
|
|
132
|
+
dump = wait_duration.model_dump()
|
|
133
|
+
assert dump["wait_for"] == "duration:3600.0"
|
|
134
|
+
|
|
135
|
+
# Test with datetime
|
|
136
|
+
now = datetime.now().replace(microsecond=0)
|
|
137
|
+
wait_datetime = WaitOperator(task_id="wait2", wait_for=now)
|
|
138
|
+
dump = wait_datetime.model_dump()
|
|
139
|
+
assert dump["wait_for"] == f"datetime:{now.isoformat()}"
|
|
140
|
+
|
|
141
|
+
# Test with string (no conversion)
|
|
142
|
+
wait_string = WaitOperator(task_id="wait3", wait_for="event_name")
|
|
143
|
+
dump = wait_string.model_dump()
|
|
144
|
+
assert dump["wait_for"] == "event_name"
|
|
145
|
+
|
|
146
|
+
# Test parsing of different data types
|
|
147
|
+
assert WaitOperator.model_validate({"task_id": "t", "wait_for": "duration:60"}).wait_for == timedelta(seconds=60)
|
|
148
|
+
now_iso = now.isoformat()
|
|
149
|
+
assert WaitOperator.model_validate({"task_id": "t", "wait_for": f"datetime:{now_iso}"}).wait_for == now
|
|
150
|
+
assert WaitOperator.model_validate({"task_id": "t", "wait_for": "event"}).wait_for == "event"
|
|
151
|
+
|
|
152
|
+
def test_workflow_builder_simple_chain():
|
|
153
|
+
workflow = (
|
|
154
|
+
WorkflowBuilder("simple_chain")
|
|
155
|
+
.task("start", "func_start", result_key="start_res")
|
|
156
|
+
.task("middle", "func_middle", args=["{{start_res}}"])
|
|
157
|
+
.build()
|
|
158
|
+
)
|
|
159
|
+
assert workflow.name == "simple_chain"
|
|
160
|
+
assert "start" in workflow.tasks
|
|
161
|
+
assert "middle" in workflow.tasks
|
|
162
|
+
assert workflow.tasks["middle"].dependencies == ["start"]
|
|
163
|
+
assert workflow.start_task == "start"
|
|
164
|
+
|
|
165
|
+
def test_workflow_builder_with_retry_and_timeout():
|
|
166
|
+
workflow = (
|
|
167
|
+
WorkflowBuilder("retry_timeout_workflow")
|
|
168
|
+
.task("step1", "func1")
|
|
169
|
+
.retry(max_retries=5, delay=timedelta(seconds=15))
|
|
170
|
+
.timeout(timeout=timedelta(minutes=1))
|
|
171
|
+
.build()
|
|
172
|
+
)
|
|
173
|
+
assert workflow.tasks["step1"].retry_policy.max_retries == 5
|
|
174
|
+
assert workflow.tasks["step1"].retry_policy.delay == timedelta(seconds=15)
|
|
175
|
+
assert workflow.tasks["step1"].timeout_policy.timeout == timedelta(minutes=1)
|
|
176
|
+
|
|
177
|
+
def test_workflow_builder_condition():
|
|
178
|
+
workflow = (
|
|
179
|
+
WorkflowBuilder("conditional_workflow")
|
|
180
|
+
.task("initial", "init_func")
|
|
181
|
+
.condition("check", "val > 10", "high", "low")
|
|
182
|
+
.build()
|
|
183
|
+
)
|
|
184
|
+
assert "check" in workflow.tasks
|
|
185
|
+
assert workflow.tasks["check"].dependencies == ["initial"]
|
|
186
|
+
assert workflow.tasks["check"].if_true == "high"
|
|
187
|
+
|
|
188
|
+
def test_workflow_builder_parallel():
|
|
189
|
+
workflow = (
|
|
190
|
+
WorkflowBuilder("parallel_workflow")
|
|
191
|
+
.task("init", "init_func")
|
|
192
|
+
.parallel("parallel_step", {"b1": ["t1"], "b2": ["t2"]})
|
|
193
|
+
.build()
|
|
194
|
+
)
|
|
195
|
+
assert "parallel_step" in workflow.tasks
|
|
196
|
+
assert workflow.tasks["parallel_step"].dependencies == ["init"]
|
|
197
|
+
|
|
198
|
+
def test_workflow_builder_foreach():
|
|
199
|
+
workflow = (
|
|
200
|
+
WorkflowBuilder("foreach_workflow")
|
|
201
|
+
.task("fetch_items", "fetch_func")
|
|
202
|
+
.foreach("loop_items", "items_list", ["process_item"])
|
|
203
|
+
.build()
|
|
204
|
+
)
|
|
205
|
+
assert "loop_items" in workflow.tasks
|
|
206
|
+
assert workflow.tasks["loop_items"].dependencies == ["fetch_items"]
|
|
207
|
+
|
|
208
|
+
def test_workflow_yaml_round_trip():
|
|
209
|
+
original_workflow = (
|
|
210
|
+
WorkflowBuilder("yaml_test")
|
|
211
|
+
.task("start", "func_start", result_key="start_res")
|
|
212
|
+
.retry(max_retries=2, delay=timedelta(seconds=5))
|
|
213
|
+
.wait("wait_step", timedelta(minutes=1))
|
|
214
|
+
.task("end", "func_end", args=["{{start_res}}"])
|
|
215
|
+
.build()
|
|
216
|
+
)
|
|
217
|
+
original_workflow.set_variables({"env": "dev"})
|
|
218
|
+
|
|
219
|
+
yaml_output = original_workflow.to_yaml()
|
|
220
|
+
loaded_workflow = Workflow.from_yaml(yaml_output)
|
|
221
|
+
assert sort_dict_recursively(json.loads(original_workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow.model_dump_json()))
|
|
222
|
+
|
|
223
|
+
def test_workflow_json_round_trip():
|
|
224
|
+
original_workflow = (
|
|
225
|
+
WorkflowBuilder("json_test")
|
|
226
|
+
.task("stepA", "funcA")
|
|
227
|
+
.timeout(timeout=timedelta(seconds=60), kill_on_timeout=False)
|
|
228
|
+
.condition("check_val", "val == 'ok'", "success", "fail")
|
|
229
|
+
.build()
|
|
230
|
+
)
|
|
231
|
+
original_workflow.set_variables({"user": "test"})
|
|
232
|
+
|
|
233
|
+
json_output = original_workflow.to_json()
|
|
234
|
+
loaded_workflow = Workflow.from_json(json_output)
|
|
235
|
+
|
|
236
|
+
assert sort_dict_recursively(json.loads(original_workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow.model_dump_json()))
|
|
237
|
+
|
|
238
|
+
def test_complex_workflow_creation_and_serialization():
|
|
239
|
+
# This test re-uses the logic from example_usage.py's create_complex_workflow
|
|
240
|
+
# to ensure it works with the new Pydantic models and can be serialized/deserialized.
|
|
241
|
+
workflow = (
|
|
242
|
+
WorkflowBuilder("data_processing_pipeline")
|
|
243
|
+
.task("start", "workflows.tasks.initialize", result_key="init_data")
|
|
244
|
+
.task(
|
|
245
|
+
"validate",
|
|
246
|
+
"workflows.tasks.validate_data",
|
|
247
|
+
args=["{{init_data}}"],
|
|
248
|
+
result_key="validated_data",
|
|
249
|
+
)
|
|
250
|
+
.condition(
|
|
251
|
+
"check_quality",
|
|
252
|
+
condition="{{validated_data.quality_score}} > 0.8",
|
|
253
|
+
if_true="high_quality_processing",
|
|
254
|
+
if_false="standard_processing",
|
|
255
|
+
)
|
|
256
|
+
.build()
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
workflow.add_task(
|
|
260
|
+
TaskOperator(
|
|
261
|
+
task_id="high_quality_processing",
|
|
262
|
+
function="workflows.tasks.advanced_processing",
|
|
263
|
+
args=["{{validated_data}}"],
|
|
264
|
+
dependencies=["check_quality"],
|
|
265
|
+
retry_policy=RetryPolicy(max_retries=5, delay=timedelta(seconds=10)),
|
|
266
|
+
)
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
workflow.add_task(
|
|
270
|
+
TaskOperator(
|
|
271
|
+
task_id="standard_processing",
|
|
272
|
+
function="workflows.tasks.basic_processing",
|
|
273
|
+
args=["{{validated_data}}"],
|
|
274
|
+
dependencies=["check_quality"],
|
|
275
|
+
)
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
workflow.add_task(
|
|
279
|
+
ParallelOperator(
|
|
280
|
+
task_id="parallel_processing",
|
|
281
|
+
branches={
|
|
282
|
+
"branch_a": ["transform_a", "enrich_a"],
|
|
283
|
+
"branch_b": ["transform_b", "enrich_b"],
|
|
284
|
+
},
|
|
285
|
+
dependencies=["high_quality_processing", "standard_processing"],
|
|
286
|
+
)
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
for branch in ["a", "b"]:
|
|
290
|
+
workflow.add_task(
|
|
291
|
+
TaskOperator(
|
|
292
|
+
task_id=f"transform_{branch}",
|
|
293
|
+
function=f"workflows.tasks.transform_{branch}",
|
|
294
|
+
dependencies=["parallel_processing"],
|
|
295
|
+
result_key=f"transformed_{branch}",
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
workflow.add_task(
|
|
300
|
+
TaskOperator(
|
|
301
|
+
task_id=f"enrich_{branch}",
|
|
302
|
+
function="workflows.tasks.enrich_data",
|
|
303
|
+
args=[f"{{{{transformed_{branch}}}}}",],
|
|
304
|
+
dependencies=[f"transform_{branch}"],
|
|
305
|
+
result_key=f"enriched_{branch}",
|
|
306
|
+
)
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
builder = WorkflowBuilder(workflow.name, existing_workflow=workflow)
|
|
310
|
+
builder._current_task = "enrich_b" # Manually set current task for builder continuation
|
|
311
|
+
|
|
312
|
+
workflow = (
|
|
313
|
+
builder.task(
|
|
314
|
+
"aggregate",
|
|
315
|
+
"workflows.tasks.aggregate_results",
|
|
316
|
+
dependencies=[
|
|
317
|
+
"enrich_a",
|
|
318
|
+
"enrich_b",
|
|
319
|
+
],
|
|
320
|
+
result_key="final_result",
|
|
321
|
+
)
|
|
322
|
+
.wait("wait_notification", timedelta(hours=1))
|
|
323
|
+
.task("notify", "workflows.tasks.send_notification", args=["{{final_result}}"])
|
|
324
|
+
.build()
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
workflow.set_variables(
|
|
328
|
+
{
|
|
329
|
+
"environment": "production",
|
|
330
|
+
"batch_size": 1000,
|
|
331
|
+
"notify_email": "team@company.com",
|
|
332
|
+
}
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Test serialization and deserialization
|
|
336
|
+
yaml_output = workflow.to_yaml()
|
|
337
|
+
loaded_workflow_from_yaml = Workflow.from_yaml(yaml_output)
|
|
338
|
+
assert sort_dict_recursively(json.loads(workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow_from_yaml.model_dump_json()))
|
|
339
|
+
|
|
340
|
+
json_output = workflow.to_json()
|
|
341
|
+
loaded_workflow_from_json = Workflow.from_json(json_output)
|
|
342
|
+
assert sort_dict_recursively(json.loads(workflow.model_dump_json())) == sort_dict_recursively(json.loads(loaded_workflow_from_json.model_dump_json()))
|
|
343
|
+
|
|
344
|
+
def test_unknown_operator_type_raises_error():
|
|
345
|
+
yaml_content = """
|
|
346
|
+
name: test
|
|
347
|
+
tasks:
|
|
348
|
+
task1:
|
|
349
|
+
operator_type: unknown_operator
|
|
350
|
+
"""
|
|
351
|
+
with pytest.raises(ValueError, match="Unknown operator type: unknown_operator"):
|
|
352
|
+
Workflow.from_yaml(yaml_content)
|