highway-dsl 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of highway-dsl might be problematic. Click here for more details.

File without changes
@@ -0,0 +1,252 @@
1
+ # workflow_dsl.py
2
+ from typing import Any, Dict, List, Optional, Union, Callable, Type
3
+ from enum import Enum
4
+ from datetime import datetime, timedelta
5
+ import yaml
6
+ import json
7
+ from abc import ABC, abstractmethod
8
+ from pydantic import BaseModel, Field, model_validator, ConfigDict
9
+
10
+
11
+ class OperatorType(Enum):
12
+ TASK = "task"
13
+ CONDITION = "condition"
14
+ WAIT = "wait"
15
+ PARALLEL = "parallel"
16
+ FOREACH = "foreach"
17
+ SWITCH = "switch"
18
+ TRY_CATCH = "try_catch"
19
+
20
+
21
+ class RetryPolicy(BaseModel):
22
+ max_retries: int = Field(3, description="Maximum number of retries")
23
+ delay: timedelta = Field(timedelta(seconds=5), description="Delay between retries")
24
+ backoff_factor: float = Field(2.0, description="Factor by which to increase delay")
25
+
26
+
27
+ class TimeoutPolicy(BaseModel):
28
+ timeout: timedelta = Field(..., description="Timeout duration")
29
+ kill_on_timeout: bool = Field(True, description="Whether to kill the task on timeout")
30
+
31
+
32
+ class BaseOperator(BaseModel, ABC):
33
+ task_id: str
34
+ operator_type: OperatorType
35
+ dependencies: List[str] = Field(default_factory=list)
36
+ retry_policy: Optional[RetryPolicy] = None
37
+ timeout_policy: Optional[TimeoutPolicy] = None
38
+ metadata: Dict[str, Any] = Field(default_factory=dict)
39
+
40
+ model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
41
+
42
+
43
+ class TaskOperator(BaseOperator):
44
+ function: str
45
+ args: List[Any] = Field(default_factory=list)
46
+ kwargs: Dict[str, Any] = Field(default_factory=dict)
47
+ result_key: Optional[str] = None
48
+ operator_type: OperatorType = Field(OperatorType.TASK, frozen=True)
49
+
50
+
51
+ class ConditionOperator(BaseOperator):
52
+ condition: str
53
+ if_true: str
54
+ if_false: str
55
+ operator_type: OperatorType = Field(OperatorType.CONDITION, frozen=True)
56
+
57
+
58
+ class WaitOperator(BaseOperator):
59
+ wait_for: Union[timedelta, datetime, str]
60
+ operator_type: OperatorType = Field(OperatorType.WAIT, frozen=True)
61
+
62
+ @model_validator(mode='before')
63
+ @classmethod
64
+ def parse_wait_for(cls, data: Any) -> Any:
65
+ if isinstance(data, dict) and "wait_for" in data:
66
+ wait_for = data["wait_for"]
67
+ if isinstance(wait_for, str):
68
+ if wait_for.startswith("duration:"):
69
+ data["wait_for"] = timedelta(seconds=float(wait_for.split(":")[1]))
70
+ elif wait_for.startswith("datetime:"):
71
+ data["wait_for"] = datetime.fromisoformat(wait_for.split(":", 1)[1])
72
+ return data
73
+
74
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
75
+ data = super().model_dump(**kwargs)
76
+ wait_for = data["wait_for"]
77
+ if isinstance(wait_for, timedelta):
78
+ data["wait_for"] = f"duration:{wait_for.total_seconds()}"
79
+ elif isinstance(wait_for, datetime):
80
+ data["wait_for"] = f"datetime:{wait_for.isoformat()}"
81
+ return data
82
+
83
+
84
+ class ParallelOperator(BaseOperator):
85
+ branches: Dict[str, List[str]] = Field(default_factory=dict)
86
+ operator_type: OperatorType = Field(OperatorType.PARALLEL, frozen=True)
87
+
88
+
89
+ class ForEachOperator(BaseOperator):
90
+ items: str
91
+ task_chain: List[str] = Field(default_factory=list)
92
+ operator_type: OperatorType = Field(OperatorType.FOREACH, frozen=True)
93
+
94
+
95
+ class Workflow(BaseModel):
96
+ name: str
97
+ version: str = "1.0.0"
98
+ description: str = ""
99
+ tasks: Dict[str, Union[
100
+ TaskOperator,
101
+ ConditionOperator,
102
+ WaitOperator,
103
+ ParallelOperator,
104
+ ForEachOperator,
105
+ ]] = Field(default_factory=dict)
106
+ variables: Dict[str, Any] = Field(default_factory=dict)
107
+ start_task: Optional[str] = None
108
+
109
+ @model_validator(mode='before')
110
+ @classmethod
111
+ def validate_tasks(cls, data: Any) -> Any:
112
+ if isinstance(data, dict) and "tasks" in data:
113
+ validated_tasks = {}
114
+ operator_classes: Dict[str, Type[BaseOperator]] = {
115
+ OperatorType.TASK.value: TaskOperator,
116
+ OperatorType.CONDITION.value: ConditionOperator,
117
+ OperatorType.WAIT.value: WaitOperator,
118
+ OperatorType.PARALLEL.value: ParallelOperator,
119
+ OperatorType.FOREACH.value: ForEachOperator,
120
+ }
121
+ for task_id, task_data in data["tasks"].items():
122
+ operator_type = task_data.get("operator_type")
123
+ if operator_type and operator_type in operator_classes:
124
+ operator_class = operator_classes[operator_type]
125
+ validated_tasks[task_id] = operator_class.model_validate(task_data)
126
+ else:
127
+ raise ValueError(f"Unknown operator type: {operator_type}")
128
+ data["tasks"] = validated_tasks
129
+ return data
130
+
131
+ def add_task(self, task: Union[
132
+ TaskOperator,
133
+ ConditionOperator,
134
+ WaitOperator,
135
+ ParallelOperator,
136
+ ForEachOperator,
137
+ ]) -> "Workflow":
138
+ self.tasks[task.task_id] = task
139
+ return self
140
+
141
+ def set_variables(self, variables: Dict[str, Any]) -> "Workflow":
142
+ self.variables.update(variables)
143
+ return self
144
+
145
+ def set_start_task(self, task_id: str) -> "Workflow":
146
+ self.start_task = task_id
147
+ return self
148
+
149
+ def to_yaml(self) -> str:
150
+ data = self.model_dump(mode='json', by_alias=True, exclude_none=True)
151
+ return yaml.dump(data, default_flow_style=False)
152
+
153
+ def to_json(self) -> str:
154
+ return self.model_dump_json(indent=2)
155
+
156
+ @classmethod
157
+ def from_yaml(cls, yaml_str: str) -> "Workflow":
158
+ data = yaml.safe_load(yaml_str)
159
+ return cls.model_validate(data)
160
+
161
+ @classmethod
162
+ def from_json(cls, json_str: str) -> "Workflow":
163
+ return cls.model_validate_json(json_str)
164
+
165
+
166
+ class WorkflowBuilder:
167
+ def __init__(self, name: str, existing_workflow: Optional[Workflow] = None):
168
+ if existing_workflow:
169
+ self.workflow = existing_workflow
170
+ else:
171
+ self.workflow = Workflow(name=name)
172
+ self._current_task: Optional[str] = None
173
+
174
+ def task(self, task_id: str, function: str, **kwargs) -> "WorkflowBuilder":
175
+ task = TaskOperator(task_id=task_id, function=function, **kwargs)
176
+ if self._current_task:
177
+ task.dependencies.append(self._current_task)
178
+ self.workflow.add_task(task)
179
+ self._current_task = task_id
180
+ return self
181
+
182
+ def condition(
183
+ self, task_id: str, condition: str, if_true: str, if_false: str, **kwargs
184
+ ) -> "WorkflowBuilder":
185
+ task = ConditionOperator(
186
+ task_id=task_id, condition=condition, if_true=if_true, if_false=if_false, **kwargs
187
+ )
188
+ if self._current_task:
189
+ task.dependencies.append(self._current_task)
190
+ self.workflow.add_task(task)
191
+ self._current_task = task_id
192
+ return self
193
+
194
+ def wait(
195
+ self, task_id: str, wait_for: Union[timedelta, datetime, str], **kwargs
196
+ ) -> "WorkflowBuilder":
197
+ task = WaitOperator(task_id=task_id, wait_for=wait_for, **kwargs)
198
+ if self._current_task:
199
+ task.dependencies.append(self._current_task)
200
+ self.workflow.add_task(task)
201
+ self._current_task = task_id
202
+ return self
203
+
204
+ def parallel(
205
+ self, task_id: str, branches: Dict[str, List[str]], **kwargs
206
+ ) -> "WorkflowBuilder":
207
+ task = ParallelOperator(task_id=task_id, branches=branches, **kwargs)
208
+ if self._current_task:
209
+ task.dependencies.append(self._current_task)
210
+ self.workflow.add_task(task)
211
+ self._current_task = task_id
212
+ return self
213
+
214
+ def foreach(
215
+ self, task_id: str, items: str, task_chain: List[str], **kwargs
216
+ ) -> "WorkflowBuilder":
217
+ task = ForEachOperator(task_id=task_id, items=items, task_chain=task_chain, **kwargs)
218
+ if self._current_task:
219
+ task.dependencies.append(self._current_task)
220
+ self.workflow.add_task(task)
221
+ self._current_task = task_id
222
+ return self
223
+
224
+ def retry(
225
+ self,
226
+ max_retries: int = 3,
227
+ delay: timedelta = timedelta(seconds=5),
228
+ backoff_factor: float = 2.0,
229
+ ) -> "WorkflowBuilder":
230
+ if self._current_task and isinstance(
231
+ self.workflow.tasks[self._current_task], TaskOperator
232
+ ):
233
+ self.workflow.tasks[self._current_task].retry_policy = RetryPolicy(
234
+ max_retries=max_retries, delay=delay, backoff_factor=backoff_factor
235
+ )
236
+ return self
237
+
238
+ def timeout(
239
+ self, timeout: timedelta, kill_on_timeout: bool = True
240
+ ) -> "WorkflowBuilder":
241
+ if self._current_task and isinstance(
242
+ self.workflow.tasks[self._current_task], TaskOperator
243
+ ):
244
+ self.workflow.tasks[self._current_task].timeout_policy = TimeoutPolicy(
245
+ timeout=timeout, kill_on_timeout=kill_on_timeout
246
+ )
247
+ return self
248
+
249
+ def build(self) -> Workflow:
250
+ if not self.workflow.start_task and self.workflow.tasks:
251
+ self.workflow.start_task = next(iter(self.workflow.tasks.keys()))
252
+ return self.workflow
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: highway_dsl
3
+ Version: 0.0.1
4
+ Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
5
+ Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
8
+ Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pydantic>=2.12.3
16
+ Requires-Dist: pyyaml>=6.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
19
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
20
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
21
+ Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # Highway DSL
25
+
26
+ Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
27
+
28
+ ## Features
29
+
30
+ * **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
31
+ * **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
32
+ * **Rich Task Types:** Supports various operators including:
33
+ * `TaskOperator`: Executes a Python function.
34
+ * `ConditionOperator`: Enables conditional branching based on expressions.
35
+ * `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
36
+ * `ParallelOperator`: Executes multiple branches of tasks concurrently.
37
+ * `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
38
+ * **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
39
+ * **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
40
+ * **Workflow Builder:** A fluent API for constructing workflows programmatically.
41
+
42
+ ## Installation
43
+
44
+ To install Highway DSL, you can use pip:
45
+
46
+ ```bash
47
+ pip install highway-dsl
48
+ ```
49
+
50
+ If you want to install it for development, including testing dependencies:
51
+
52
+ ```bash
53
+ pip install "highway-dsl[dev]"
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ ### Defining a Simple Workflow
59
+
60
+ ```python
61
+ from datetime import timedelta
62
+ from workflow_dsl import WorkflowBuilder
63
+
64
+ def demonstrate_basic_workflow():
65
+ """Show a simple complete workflow using just the builder"""
66
+
67
+ workflow = (
68
+ WorkflowBuilder("simple_etl")
69
+ .task("extract", "etl.extract_data", result_key="raw_data")
70
+ .task(
71
+ "transform",
72
+ "etl.transform_data",
73
+ args=["{{raw_data}}"],
74
+ result_key="transformed_data",
75
+ )
76
+ .retry(max_retries=3, delay=timedelta(seconds=10))
77
+ .task("load", "etl.load_data", args=["{{transformed_data}}"])
78
+ .timeout(timeout=timedelta(minutes=30))
79
+ .wait("wait_next", timedelta(hours=24))
80
+ .task("cleanup", "etl.cleanup")
81
+ .build()
82
+ )
83
+
84
+ workflow.set_variables(
85
+ {"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
86
+ )
87
+
88
+ return workflow
89
+
90
+ if __name__ == "__main__":
91
+ basic_workflow = demonstrate_basic_workflow()
92
+ print(basic_workflow.to_yaml())
93
+ ```
94
+
95
+ ### Defining a Complex Workflow
96
+
97
+ Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
98
+
99
+ ### YAML Configuration
100
+
101
+ You can also define workflows directly in YAML:
102
+
103
+ ```yaml
104
+ name: simple_etl
105
+ version: 1.0.0
106
+ description: Simple ETL workflow with retry and timeout
107
+ variables:
108
+ database_url: postgresql://localhost/mydb
109
+ chunk_size: 1000
110
+ start_task: extract
111
+ tasks:
112
+ extract:
113
+ task_id: extract
114
+ operator_type: task
115
+ function: etl.extract_data
116
+ result_key: raw_data
117
+ dependencies: []
118
+ metadata: {}
119
+
120
+ transform:
121
+ task_id: transform
122
+ operator_type: task
123
+ function: etl.transform_data
124
+ args: ["{{raw_data}}"]
125
+ result_key: transformed_data
126
+ dependencies: ["extract"]
127
+ retry_policy:
128
+ max_retries: 3
129
+ delay: PT10S
130
+ backoff_factor: 2.0
131
+ metadata: {}
132
+
133
+ load:
134
+ task_id: load
135
+ operator_type: task
136
+ function: etl.load_data
137
+ args: ["{{transformed_data}}"]
138
+ dependencies: ["transform"]
139
+ timeout_policy:
140
+ timeout: PT30M
141
+ kill_on_timeout: true
142
+ metadata: {}
143
+
144
+ wait_next:
145
+ task_id: wait_next
146
+ operator_type: wait
147
+ wait_for: "P1D"
148
+ dependencies: ["load"]
149
+ metadata: {}
150
+
151
+ cleanup:
152
+ task_id: cleanup
153
+ operator_type: task
154
+ function: etl.cleanup
155
+ dependencies: ["wait_next"]
156
+ metadata: {}
157
+ ```
158
+
159
+ To load this YAML:
160
+
161
+ ```python
162
+ from workflow_dsl import Workflow
163
+
164
+ yaml_content = """
165
+ # ... (yaml content from above)
166
+ """
167
+
168
+ workflow = Workflow.from_yaml(yaml_content)
169
+ print(workflow.name)
170
+ ```
171
+
172
+ ## Development
173
+
174
+ ### Running Tests
175
+
176
+ To run the unit tests, navigate to the project root and execute:
177
+
178
+ ```bash
179
+ pytest
180
+ ```
181
+
182
+ ### Type Checking
183
+
184
+ To perform static type checking with MyPy:
185
+
186
+ ```bash
187
+ mypy .
188
+ ```
189
+
190
+ ## Project Structure
191
+
192
+ ```
193
+ .highway/
194
+ ├── workflow_dsl.py # Core DSL definitions (Pydantic models)
195
+ ├── example_usage.py # Examples of how to use the DSL
196
+ ├── tests/
197
+ │ ├── __init__.py
198
+ │ ├── conftest.py # Pytest configuration
199
+ │ └── test_workflow_dsl.py # Unit and integration tests
200
+ ├── pyproject.toml # Project metadata and dependencies
201
+ ├── README.md # This file
202
+ └── SUMMARY.md # Summary of changes and future instructions
203
+ ```
@@ -0,0 +1,7 @@
1
+ highway_dsl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ highway_dsl/workflow_dsl.py,sha256=PDUCYFBt0SHSZxXv6HKZXlVOmjUcYWiX4i6Kwhsn4h8,9026
3
+ highway_dsl-0.0.1.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
4
+ highway_dsl-0.0.1.dist-info/METADATA,sha256=3DgCxoxYJki8WYd4gfDB9IWIq1iAK-liAmWQ7fWxFvU,5838
5
+ highway_dsl-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ highway_dsl-0.0.1.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
7
+ highway_dsl-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Farseed Ashouri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ highway_dsl