highway-dsl 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of highway-dsl might be problematic. Click here for more details.
- highway_dsl/__init__.py +28 -0
- highway_dsl/workflow_dsl.py +132 -26
- highway_dsl-0.0.3.dist-info/METADATA +160 -0
- highway_dsl-0.0.3.dist-info/RECORD +7 -0
- highway_dsl-0.0.1.dist-info/METADATA +0 -203
- highway_dsl-0.0.1.dist-info/RECORD +0 -7
- {highway_dsl-0.0.1.dist-info → highway_dsl-0.0.3.dist-info}/WHEEL +0 -0
- {highway_dsl-0.0.1.dist-info → highway_dsl-0.0.3.dist-info}/licenses/LICENSE +0 -0
- {highway_dsl-0.0.1.dist-info → highway_dsl-0.0.3.dist-info}/top_level.txt +0 -0
highway_dsl/__init__.py
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from .workflow_dsl import (
|
|
2
|
+
Workflow,
|
|
3
|
+
WorkflowBuilder,
|
|
4
|
+
TaskOperator,
|
|
5
|
+
ConditionOperator,
|
|
6
|
+
ParallelOperator,
|
|
7
|
+
WaitOperator,
|
|
8
|
+
ForEachOperator,
|
|
9
|
+
WhileOperator,
|
|
10
|
+
RetryPolicy,
|
|
11
|
+
TimeoutPolicy,
|
|
12
|
+
OperatorType,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"Workflow",
|
|
17
|
+
"WorkflowBuilder",
|
|
18
|
+
"BaseOperator",
|
|
19
|
+
"TaskOperator",
|
|
20
|
+
"ConditionOperator",
|
|
21
|
+
"ParallelOperator",
|
|
22
|
+
"WaitOperator",
|
|
23
|
+
"ForEachOperator",
|
|
24
|
+
"WhileOperator",
|
|
25
|
+
"RetryPolicy",
|
|
26
|
+
"TimeoutPolicy",
|
|
27
|
+
"OperatorType",
|
|
28
|
+
]
|
highway_dsl/workflow_dsl.py
CHANGED
|
@@ -16,6 +16,7 @@ class OperatorType(Enum):
|
|
|
16
16
|
FOREACH = "foreach"
|
|
17
17
|
SWITCH = "switch"
|
|
18
18
|
TRY_CATCH = "try_catch"
|
|
19
|
+
WHILE = "while"
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class RetryPolicy(BaseModel):
|
|
@@ -26,7 +27,9 @@ class RetryPolicy(BaseModel):
|
|
|
26
27
|
|
|
27
28
|
class TimeoutPolicy(BaseModel):
|
|
28
29
|
timeout: timedelta = Field(..., description="Timeout duration")
|
|
29
|
-
kill_on_timeout: bool = Field(
|
|
30
|
+
kill_on_timeout: bool = Field(
|
|
31
|
+
True, description="Whether to kill the task on timeout"
|
|
32
|
+
)
|
|
30
33
|
|
|
31
34
|
|
|
32
35
|
class BaseOperator(BaseModel, ABC):
|
|
@@ -50,8 +53,8 @@ class TaskOperator(BaseOperator):
|
|
|
50
53
|
|
|
51
54
|
class ConditionOperator(BaseOperator):
|
|
52
55
|
condition: str
|
|
53
|
-
if_true: str
|
|
54
|
-
if_false: str
|
|
56
|
+
if_true: Optional[str]
|
|
57
|
+
if_false: Optional[str]
|
|
55
58
|
operator_type: OperatorType = Field(OperatorType.CONDITION, frozen=True)
|
|
56
59
|
|
|
57
60
|
|
|
@@ -59,7 +62,7 @@ class WaitOperator(BaseOperator):
|
|
|
59
62
|
wait_for: Union[timedelta, datetime, str]
|
|
60
63
|
operator_type: OperatorType = Field(OperatorType.WAIT, frozen=True)
|
|
61
64
|
|
|
62
|
-
@model_validator(mode=
|
|
65
|
+
@model_validator(mode="before")
|
|
63
66
|
@classmethod
|
|
64
67
|
def parse_wait_for(cls, data: Any) -> Any:
|
|
65
68
|
if isinstance(data, dict) and "wait_for" in data:
|
|
@@ -92,21 +95,40 @@ class ForEachOperator(BaseOperator):
|
|
|
92
95
|
operator_type: OperatorType = Field(OperatorType.FOREACH, frozen=True)
|
|
93
96
|
|
|
94
97
|
|
|
98
|
+
class WhileOperator(BaseOperator):
|
|
99
|
+
condition: str
|
|
100
|
+
loop_body: List[
|
|
101
|
+
Union[
|
|
102
|
+
TaskOperator,
|
|
103
|
+
ConditionOperator,
|
|
104
|
+
WaitOperator,
|
|
105
|
+
ParallelOperator,
|
|
106
|
+
ForEachOperator,
|
|
107
|
+
"WhileOperator",
|
|
108
|
+
]
|
|
109
|
+
] = Field(default_factory=list)
|
|
110
|
+
operator_type: OperatorType = Field(OperatorType.WHILE, frozen=True)
|
|
111
|
+
|
|
112
|
+
|
|
95
113
|
class Workflow(BaseModel):
|
|
96
114
|
name: str
|
|
97
115
|
version: str = "1.0.0"
|
|
98
116
|
description: str = ""
|
|
99
|
-
tasks: Dict[
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
117
|
+
tasks: Dict[
|
|
118
|
+
str,
|
|
119
|
+
Union[
|
|
120
|
+
TaskOperator,
|
|
121
|
+
ConditionOperator,
|
|
122
|
+
WaitOperator,
|
|
123
|
+
ParallelOperator,
|
|
124
|
+
ForEachOperator,
|
|
125
|
+
WhileOperator,
|
|
126
|
+
],
|
|
127
|
+
] = Field(default_factory=dict)
|
|
106
128
|
variables: Dict[str, Any] = Field(default_factory=dict)
|
|
107
129
|
start_task: Optional[str] = None
|
|
108
130
|
|
|
109
|
-
@model_validator(mode=
|
|
131
|
+
@model_validator(mode="before")
|
|
110
132
|
@classmethod
|
|
111
133
|
def validate_tasks(cls, data: Any) -> Any:
|
|
112
134
|
if isinstance(data, dict) and "tasks" in data:
|
|
@@ -117,6 +139,7 @@ class Workflow(BaseModel):
|
|
|
117
139
|
OperatorType.WAIT.value: WaitOperator,
|
|
118
140
|
OperatorType.PARALLEL.value: ParallelOperator,
|
|
119
141
|
OperatorType.FOREACH.value: ForEachOperator,
|
|
142
|
+
OperatorType.WHILE.value: WhileOperator,
|
|
120
143
|
}
|
|
121
144
|
for task_id, task_data in data["tasks"].items():
|
|
122
145
|
operator_type = task_data.get("operator_type")
|
|
@@ -128,13 +151,17 @@ class Workflow(BaseModel):
|
|
|
128
151
|
data["tasks"] = validated_tasks
|
|
129
152
|
return data
|
|
130
153
|
|
|
131
|
-
def add_task(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
154
|
+
def add_task(
|
|
155
|
+
self,
|
|
156
|
+
task: Union[
|
|
157
|
+
TaskOperator,
|
|
158
|
+
ConditionOperator,
|
|
159
|
+
WaitOperator,
|
|
160
|
+
ParallelOperator,
|
|
161
|
+
ForEachOperator,
|
|
162
|
+
WhileOperator,
|
|
163
|
+
],
|
|
164
|
+
) -> "Workflow":
|
|
138
165
|
self.tasks[task.task_id] = task
|
|
139
166
|
return self
|
|
140
167
|
|
|
@@ -147,7 +174,7 @@ class Workflow(BaseModel):
|
|
|
147
174
|
return self
|
|
148
175
|
|
|
149
176
|
def to_yaml(self) -> str:
|
|
150
|
-
data = self.model_dump(mode=
|
|
177
|
+
data = self.model_dump(mode="json", by_alias=True, exclude_none=True)
|
|
151
178
|
return yaml.dump(data, default_flow_style=False)
|
|
152
179
|
|
|
153
180
|
def to_json(self) -> str:
|
|
@@ -164,12 +191,18 @@ class Workflow(BaseModel):
|
|
|
164
191
|
|
|
165
192
|
|
|
166
193
|
class WorkflowBuilder:
|
|
167
|
-
def __init__(
|
|
194
|
+
def __init__(
|
|
195
|
+
self,
|
|
196
|
+
name: str,
|
|
197
|
+
existing_workflow: Optional[Workflow] = None,
|
|
198
|
+
parent: Optional["WorkflowBuilder"] = None,
|
|
199
|
+
):
|
|
168
200
|
if existing_workflow:
|
|
169
201
|
self.workflow = existing_workflow
|
|
170
202
|
else:
|
|
171
203
|
self.workflow = Workflow(name=name)
|
|
172
204
|
self._current_task: Optional[str] = None
|
|
205
|
+
self.parent = parent
|
|
173
206
|
|
|
174
207
|
def task(self, task_id: str, function: str, **kwargs) -> "WorkflowBuilder":
|
|
175
208
|
task = TaskOperator(task_id=task_id, function=function, **kwargs)
|
|
@@ -180,14 +213,37 @@ class WorkflowBuilder:
|
|
|
180
213
|
return self
|
|
181
214
|
|
|
182
215
|
def condition(
|
|
183
|
-
self,
|
|
216
|
+
self,
|
|
217
|
+
task_id: str,
|
|
218
|
+
condition: str,
|
|
219
|
+
if_true: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
220
|
+
if_false: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
221
|
+
**kwargs,
|
|
184
222
|
) -> "WorkflowBuilder":
|
|
223
|
+
true_builder = if_true(WorkflowBuilder(f"{{task_id}}_true", parent=self))
|
|
224
|
+
false_builder = if_false(WorkflowBuilder(f"{{task_id}}_false", parent=self))
|
|
225
|
+
|
|
226
|
+
true_tasks = list(true_builder.workflow.tasks.keys())
|
|
227
|
+
false_tasks = list(false_builder.workflow.tasks.keys())
|
|
228
|
+
|
|
185
229
|
task = ConditionOperator(
|
|
186
|
-
task_id=task_id,
|
|
230
|
+
task_id=task_id,
|
|
231
|
+
condition=condition,
|
|
232
|
+
if_true=true_tasks[0] if true_tasks else None,
|
|
233
|
+
if_false=false_tasks[0] if false_tasks else None,
|
|
234
|
+
**kwargs,
|
|
187
235
|
)
|
|
236
|
+
|
|
188
237
|
if self._current_task:
|
|
189
238
|
task.dependencies.append(self._current_task)
|
|
239
|
+
|
|
190
240
|
self.workflow.add_task(task)
|
|
241
|
+
|
|
242
|
+
for task_obj in true_builder.workflow.tasks.values():
|
|
243
|
+
self.workflow.add_task(task_obj)
|
|
244
|
+
for task_obj in false_builder.workflow.tasks.values():
|
|
245
|
+
self.workflow.add_task(task_obj)
|
|
246
|
+
|
|
191
247
|
self._current_task = task_id
|
|
192
248
|
return self
|
|
193
249
|
|
|
@@ -202,22 +258,72 @@ class WorkflowBuilder:
|
|
|
202
258
|
return self
|
|
203
259
|
|
|
204
260
|
def parallel(
|
|
205
|
-
self,
|
|
261
|
+
self,
|
|
262
|
+
task_id: str,
|
|
263
|
+
branches: Dict[str, Callable[["WorkflowBuilder"], "WorkflowBuilder"]],
|
|
264
|
+
**kwargs,
|
|
206
265
|
) -> "WorkflowBuilder":
|
|
207
|
-
|
|
266
|
+
branch_builders = {
|
|
267
|
+
name: branch_func(WorkflowBuilder(f"{{task_id}}_{{name}}", parent=self))
|
|
268
|
+
for name, branch_func in branches.items()
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
branch_tasks = {
|
|
272
|
+
name: list(builder.workflow.tasks.keys())
|
|
273
|
+
for name, builder in branch_builders.items()
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
task = ParallelOperator(task_id=task_id, branches=branch_tasks, **kwargs)
|
|
277
|
+
|
|
208
278
|
if self._current_task:
|
|
209
279
|
task.dependencies.append(self._current_task)
|
|
280
|
+
|
|
210
281
|
self.workflow.add_task(task)
|
|
282
|
+
|
|
283
|
+
for builder in branch_builders.values():
|
|
284
|
+
for task_obj in builder.workflow.tasks.values():
|
|
285
|
+
self.workflow.add_task(task_obj)
|
|
286
|
+
|
|
211
287
|
self._current_task = task_id
|
|
212
288
|
return self
|
|
213
289
|
|
|
214
290
|
def foreach(
|
|
215
291
|
self, task_id: str, items: str, task_chain: List[str], **kwargs
|
|
216
292
|
) -> "WorkflowBuilder":
|
|
217
|
-
task = ForEachOperator(
|
|
293
|
+
task = ForEachOperator(
|
|
294
|
+
task_id=task_id, items=items, task_chain=task_chain, **kwargs
|
|
295
|
+
)
|
|
296
|
+
if self._current_task:
|
|
297
|
+
task.dependencies.append(self._current_task)
|
|
298
|
+
self.workflow.add_task(task)
|
|
299
|
+
self._current_task = task_id
|
|
300
|
+
return self
|
|
301
|
+
|
|
302
|
+
def while_loop(
|
|
303
|
+
self,
|
|
304
|
+
task_id: str,
|
|
305
|
+
condition: str,
|
|
306
|
+
loop_body: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
307
|
+
**kwargs,
|
|
308
|
+
) -> "WorkflowBuilder":
|
|
309
|
+
loop_builder = loop_body(WorkflowBuilder(f"{{task_id}}_loop", parent=self))
|
|
310
|
+
loop_tasks = list(loop_builder.workflow.tasks.values())
|
|
311
|
+
|
|
312
|
+
task = WhileOperator(
|
|
313
|
+
task_id=task_id,
|
|
314
|
+
condition=condition,
|
|
315
|
+
loop_body=loop_tasks,
|
|
316
|
+
**kwargs,
|
|
317
|
+
)
|
|
318
|
+
|
|
218
319
|
if self._current_task:
|
|
219
320
|
task.dependencies.append(self._current_task)
|
|
321
|
+
|
|
220
322
|
self.workflow.add_task(task)
|
|
323
|
+
|
|
324
|
+
for task_obj in loop_tasks:
|
|
325
|
+
self.workflow.add_task(task_obj)
|
|
326
|
+
|
|
221
327
|
self._current_task = task_id
|
|
222
328
|
return self
|
|
223
329
|
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: highway_dsl
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
|
|
5
|
+
Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
|
|
8
|
+
Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: pydantic>=2.12.3
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# Highway DSL
|
|
25
|
+
|
|
26
|
+
[](https://badge.fury.io/py/highway-dsl)
|
|
27
|
+
[](https://opensource.org/licenses/MIT)
|
|
28
|
+
|
|
29
|
+
**Highway DSL** is a Python-based domain-specific language for defining complex workflows in a clear, concise, and fluent manner. It is part of the larger **Highway** project, an advanced workflow engine capable of running complex DAG-based workflows.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
* **Fluent API:** A powerful and intuitive `WorkflowBuilder` for defining workflows programmatically.
|
|
34
|
+
* **Pydantic-based:** All models are built on Pydantic, providing robust data validation, serialization, and documentation.
|
|
35
|
+
* **Rich Operators:** A comprehensive set of operators for handling various workflow scenarios:
|
|
36
|
+
* `Task`
|
|
37
|
+
* `Condition` (if/else)
|
|
38
|
+
* `Parallel`
|
|
39
|
+
* `ForEach`
|
|
40
|
+
* `Wait`
|
|
41
|
+
* `While`
|
|
42
|
+
* **YAML/JSON Interoperability:** Workflows can be defined in Python and exported to YAML or JSON, and vice-versa.
|
|
43
|
+
* **Extensible:** The DSL is designed to be extensible with custom operators and policies.
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install highway-dsl
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
Here's a simple example of how to define a workflow using the `WorkflowBuilder`:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from datetime import timedelta
|
|
57
|
+
from highway_dsl import WorkflowBuilder
|
|
58
|
+
|
|
59
|
+
workflow = (
|
|
60
|
+
WorkflowBuilder("simple_etl")
|
|
61
|
+
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
62
|
+
.task(
|
|
63
|
+
"transform",
|
|
64
|
+
"etl.transform_data",
|
|
65
|
+
args=["{{raw_data}}"],
|
|
66
|
+
result_key="transformed_data",
|
|
67
|
+
)
|
|
68
|
+
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
69
|
+
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
70
|
+
.timeout(timeout=timedelta(minutes=30))
|
|
71
|
+
.wait("wait_next", timedelta(hours=24))
|
|
72
|
+
.task("cleanup", "etl.cleanup")
|
|
73
|
+
.build()
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
print(workflow.to_yaml())
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Advanced Usage
|
|
80
|
+
|
|
81
|
+
### Conditional Logic
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from highway_dsl import WorkflowBuilder, RetryPolicy
|
|
85
|
+
from datetime import timedelta
|
|
86
|
+
|
|
87
|
+
builder = WorkflowBuilder("data_processing_pipeline")
|
|
88
|
+
|
|
89
|
+
builder.task("start", "workflows.tasks.initialize", result_key="init_data")
|
|
90
|
+
builder.task(
|
|
91
|
+
"validate",
|
|
92
|
+
"workflows.tasks.validate_data",
|
|
93
|
+
args=["{{init_data}}"],
|
|
94
|
+
result_key="validated_data",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
builder.condition(
|
|
98
|
+
"check_quality",
|
|
99
|
+
condition="{{validated_data.quality_score}} > 0.8",
|
|
100
|
+
if_true=lambda b: b.task(
|
|
101
|
+
"high_quality_processing",
|
|
102
|
+
"workflows.tasks.advanced_processing",
|
|
103
|
+
args=["{{validated_data}}"],
|
|
104
|
+
retry_policy=RetryPolicy(max_retries=5, delay=timedelta(seconds=10), backoff_factor=2.0),
|
|
105
|
+
),
|
|
106
|
+
if_false=lambda b: b.task(
|
|
107
|
+
"standard_processing",
|
|
108
|
+
"workflows.tasks.basic_processing",
|
|
109
|
+
args=["{{validated_data}}"],
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
workflow = builder.build()
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### While Loops
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from highway_dsl import WorkflowBuilder
|
|
120
|
+
|
|
121
|
+
builder = WorkflowBuilder("qa_rework_workflow")
|
|
122
|
+
|
|
123
|
+
builder.task("start_qa", "workflows.tasks.start_qa", result_key="qa_results")
|
|
124
|
+
|
|
125
|
+
builder.while_loop(
|
|
126
|
+
"qa_rework_loop",
|
|
127
|
+
condition="{{qa_results.status}} == 'failed'",
|
|
128
|
+
loop_body=lambda b: b.task("perform_rework", "workflows.tasks.perform_rework").task(
|
|
129
|
+
"re_run_qa", "workflows.tasks.run_qa", result_key="qa_results"
|
|
130
|
+
),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
builder.task("finalize_product", "workflows.tasks.finalize_product", dependencies=["qa_rework_loop"])
|
|
134
|
+
|
|
135
|
+
workflow = builder.build()
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Development
|
|
139
|
+
|
|
140
|
+
To set up the development environment:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
git clone https://github.com/your-username/highway.git
|
|
144
|
+
cd highway
|
|
145
|
+
python -m venv .venv
|
|
146
|
+
source .venv/bin/activate
|
|
147
|
+
pip install -e .[dev]
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Running Tests
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
pytest
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Type Checking
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
mypy .
|
|
160
|
+
```
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
highway_dsl/__init__.py,sha256=mr1oMylxliFwu2VO2qpyM3sVQwYIoPL2P6JE-6ZuF7M,507
|
|
2
|
+
highway_dsl/workflow_dsl.py,sha256=yMTmFr5bbjxfVTleCvSsDZ__n9C7qH39RdzajkUEmiI,11882
|
|
3
|
+
highway_dsl-0.0.3.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
|
|
4
|
+
highway_dsl-0.0.3.dist-info/METADATA,sha256=--TFErjeBDZ1mAyNHk30CQavLuKWAaoxgHK7xFpT-Ok,4612
|
|
5
|
+
highway_dsl-0.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
highway_dsl-0.0.3.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
|
|
7
|
+
highway_dsl-0.0.3.dist-info/RECORD,,
|
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: highway_dsl
|
|
3
|
-
Version: 0.0.1
|
|
4
|
-
Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
|
|
5
|
-
Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
|
|
8
|
-
Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Requires-Python: >=3.9
|
|
13
|
-
Description-Content-Type: text/markdown
|
|
14
|
-
License-File: LICENSE
|
|
15
|
-
Requires-Dist: pydantic>=2.12.3
|
|
16
|
-
Requires-Dist: pyyaml>=6.0
|
|
17
|
-
Provides-Extra: dev
|
|
18
|
-
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
19
|
-
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
20
|
-
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
|
|
21
|
-
Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
|
|
22
|
-
Dynamic: license-file
|
|
23
|
-
|
|
24
|
-
# Highway DSL
|
|
25
|
-
|
|
26
|
-
Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
|
|
27
|
-
|
|
28
|
-
## Features
|
|
29
|
-
|
|
30
|
-
* **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
|
|
31
|
-
* **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
|
|
32
|
-
* **Rich Task Types:** Supports various operators including:
|
|
33
|
-
* `TaskOperator`: Executes a Python function.
|
|
34
|
-
* `ConditionOperator`: Enables conditional branching based on expressions.
|
|
35
|
-
* `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
|
|
36
|
-
* `ParallelOperator`: Executes multiple branches of tasks concurrently.
|
|
37
|
-
* `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
|
|
38
|
-
* **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
|
|
39
|
-
* **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
|
|
40
|
-
* **Workflow Builder:** A fluent API for constructing workflows programmatically.
|
|
41
|
-
|
|
42
|
-
## Installation
|
|
43
|
-
|
|
44
|
-
To install Highway DSL, you can use pip:
|
|
45
|
-
|
|
46
|
-
```bash
|
|
47
|
-
pip install highway-dsl
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
If you want to install it for development, including testing dependencies:
|
|
51
|
-
|
|
52
|
-
```bash
|
|
53
|
-
pip install "highway-dsl[dev]"
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
## Usage
|
|
57
|
-
|
|
58
|
-
### Defining a Simple Workflow
|
|
59
|
-
|
|
60
|
-
```python
|
|
61
|
-
from datetime import timedelta
|
|
62
|
-
from workflow_dsl import WorkflowBuilder
|
|
63
|
-
|
|
64
|
-
def demonstrate_basic_workflow():
|
|
65
|
-
"""Show a simple complete workflow using just the builder"""
|
|
66
|
-
|
|
67
|
-
workflow = (
|
|
68
|
-
WorkflowBuilder("simple_etl")
|
|
69
|
-
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
70
|
-
.task(
|
|
71
|
-
"transform",
|
|
72
|
-
"etl.transform_data",
|
|
73
|
-
args=["{{raw_data}}"],
|
|
74
|
-
result_key="transformed_data",
|
|
75
|
-
)
|
|
76
|
-
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
77
|
-
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
78
|
-
.timeout(timeout=timedelta(minutes=30))
|
|
79
|
-
.wait("wait_next", timedelta(hours=24))
|
|
80
|
-
.task("cleanup", "etl.cleanup")
|
|
81
|
-
.build()
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
workflow.set_variables(
|
|
85
|
-
{"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
return workflow
|
|
89
|
-
|
|
90
|
-
if __name__ == "__main__":
|
|
91
|
-
basic_workflow = demonstrate_basic_workflow()
|
|
92
|
-
print(basic_workflow.to_yaml())
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
### Defining a Complex Workflow
|
|
96
|
-
|
|
97
|
-
Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
|
|
98
|
-
|
|
99
|
-
### YAML Configuration
|
|
100
|
-
|
|
101
|
-
You can also define workflows directly in YAML:
|
|
102
|
-
|
|
103
|
-
```yaml
|
|
104
|
-
name: simple_etl
|
|
105
|
-
version: 1.0.0
|
|
106
|
-
description: Simple ETL workflow with retry and timeout
|
|
107
|
-
variables:
|
|
108
|
-
database_url: postgresql://localhost/mydb
|
|
109
|
-
chunk_size: 1000
|
|
110
|
-
start_task: extract
|
|
111
|
-
tasks:
|
|
112
|
-
extract:
|
|
113
|
-
task_id: extract
|
|
114
|
-
operator_type: task
|
|
115
|
-
function: etl.extract_data
|
|
116
|
-
result_key: raw_data
|
|
117
|
-
dependencies: []
|
|
118
|
-
metadata: {}
|
|
119
|
-
|
|
120
|
-
transform:
|
|
121
|
-
task_id: transform
|
|
122
|
-
operator_type: task
|
|
123
|
-
function: etl.transform_data
|
|
124
|
-
args: ["{{raw_data}}"]
|
|
125
|
-
result_key: transformed_data
|
|
126
|
-
dependencies: ["extract"]
|
|
127
|
-
retry_policy:
|
|
128
|
-
max_retries: 3
|
|
129
|
-
delay: PT10S
|
|
130
|
-
backoff_factor: 2.0
|
|
131
|
-
metadata: {}
|
|
132
|
-
|
|
133
|
-
load:
|
|
134
|
-
task_id: load
|
|
135
|
-
operator_type: task
|
|
136
|
-
function: etl.load_data
|
|
137
|
-
args: ["{{transformed_data}}"]
|
|
138
|
-
dependencies: ["transform"]
|
|
139
|
-
timeout_policy:
|
|
140
|
-
timeout: PT30M
|
|
141
|
-
kill_on_timeout: true
|
|
142
|
-
metadata: {}
|
|
143
|
-
|
|
144
|
-
wait_next:
|
|
145
|
-
task_id: wait_next
|
|
146
|
-
operator_type: wait
|
|
147
|
-
wait_for: "P1D"
|
|
148
|
-
dependencies: ["load"]
|
|
149
|
-
metadata: {}
|
|
150
|
-
|
|
151
|
-
cleanup:
|
|
152
|
-
task_id: cleanup
|
|
153
|
-
operator_type: task
|
|
154
|
-
function: etl.cleanup
|
|
155
|
-
dependencies: ["wait_next"]
|
|
156
|
-
metadata: {}
|
|
157
|
-
```
|
|
158
|
-
|
|
159
|
-
To load this YAML:
|
|
160
|
-
|
|
161
|
-
```python
|
|
162
|
-
from workflow_dsl import Workflow
|
|
163
|
-
|
|
164
|
-
yaml_content = """
|
|
165
|
-
# ... (yaml content from above)
|
|
166
|
-
"""
|
|
167
|
-
|
|
168
|
-
workflow = Workflow.from_yaml(yaml_content)
|
|
169
|
-
print(workflow.name)
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
## Development
|
|
173
|
-
|
|
174
|
-
### Running Tests
|
|
175
|
-
|
|
176
|
-
To run the unit tests, navigate to the project root and execute:
|
|
177
|
-
|
|
178
|
-
```bash
|
|
179
|
-
pytest
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
### Type Checking
|
|
183
|
-
|
|
184
|
-
To perform static type checking with MyPy:
|
|
185
|
-
|
|
186
|
-
```bash
|
|
187
|
-
mypy .
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
## Project Structure
|
|
191
|
-
|
|
192
|
-
```
|
|
193
|
-
.highway/
|
|
194
|
-
├── workflow_dsl.py # Core DSL definitions (Pydantic models)
|
|
195
|
-
├── example_usage.py # Examples of how to use the DSL
|
|
196
|
-
├── tests/
|
|
197
|
-
│ ├── __init__.py
|
|
198
|
-
│ ├── conftest.py # Pytest configuration
|
|
199
|
-
│ └── test_workflow_dsl.py # Unit and integration tests
|
|
200
|
-
├── pyproject.toml # Project metadata and dependencies
|
|
201
|
-
├── README.md # This file
|
|
202
|
-
└── SUMMARY.md # Summary of changes and future instructions
|
|
203
|
-
```
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
highway_dsl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
highway_dsl/workflow_dsl.py,sha256=PDUCYFBt0SHSZxXv6HKZXlVOmjUcYWiX4i6Kwhsn4h8,9026
|
|
3
|
-
highway_dsl-0.0.1.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
|
|
4
|
-
highway_dsl-0.0.1.dist-info/METADATA,sha256=3DgCxoxYJki8WYd4gfDB9IWIq1iAK-liAmWQ7fWxFvU,5838
|
|
5
|
-
highway_dsl-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
-
highway_dsl-0.0.1.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
|
|
7
|
-
highway_dsl-0.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|