highway-dsl 0.0.2__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of highway-dsl might be problematic. Click here for more details.
- highway_dsl/__init__.py +3 -0
- highway_dsl/workflow_dsl.py +194 -28
- highway_dsl-1.0.2.dist-info/METADATA +228 -0
- highway_dsl-1.0.2.dist-info/RECORD +7 -0
- highway_dsl-0.0.2.dist-info/METADATA +0 -227
- highway_dsl-0.0.2.dist-info/RECORD +0 -7
- {highway_dsl-0.0.2.dist-info → highway_dsl-1.0.2.dist-info}/WHEEL +0 -0
- {highway_dsl-0.0.2.dist-info → highway_dsl-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {highway_dsl-0.0.2.dist-info → highway_dsl-1.0.2.dist-info}/top_level.txt +0 -0
highway_dsl/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from .workflow_dsl import (
|
|
|
6
6
|
ParallelOperator,
|
|
7
7
|
WaitOperator,
|
|
8
8
|
ForEachOperator,
|
|
9
|
+
WhileOperator,
|
|
9
10
|
RetryPolicy,
|
|
10
11
|
TimeoutPolicy,
|
|
11
12
|
OperatorType,
|
|
@@ -14,11 +15,13 @@ from .workflow_dsl import (
|
|
|
14
15
|
__all__ = [
|
|
15
16
|
"Workflow",
|
|
16
17
|
"WorkflowBuilder",
|
|
18
|
+
"BaseOperator",
|
|
17
19
|
"TaskOperator",
|
|
18
20
|
"ConditionOperator",
|
|
19
21
|
"ParallelOperator",
|
|
20
22
|
"WaitOperator",
|
|
21
23
|
"ForEachOperator",
|
|
24
|
+
"WhileOperator",
|
|
22
25
|
"RetryPolicy",
|
|
23
26
|
"TimeoutPolicy",
|
|
24
27
|
"OperatorType",
|
highway_dsl/workflow_dsl.py
CHANGED
|
@@ -16,6 +16,7 @@ class OperatorType(Enum):
|
|
|
16
16
|
FOREACH = "foreach"
|
|
17
17
|
SWITCH = "switch"
|
|
18
18
|
TRY_CATCH = "try_catch"
|
|
19
|
+
WHILE = "while"
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class RetryPolicy(BaseModel):
|
|
@@ -38,6 +39,9 @@ class BaseOperator(BaseModel, ABC):
|
|
|
38
39
|
retry_policy: Optional[RetryPolicy] = None
|
|
39
40
|
timeout_policy: Optional[TimeoutPolicy] = None
|
|
40
41
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
42
|
+
is_internal_loop_task: bool = Field(
|
|
43
|
+
default=False, exclude=True
|
|
44
|
+
) # Mark if task is internal to a loop
|
|
41
45
|
|
|
42
46
|
model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
|
|
43
47
|
|
|
@@ -52,8 +56,8 @@ class TaskOperator(BaseOperator):
|
|
|
52
56
|
|
|
53
57
|
class ConditionOperator(BaseOperator):
|
|
54
58
|
condition: str
|
|
55
|
-
if_true: str
|
|
56
|
-
if_false: str
|
|
59
|
+
if_true: Optional[str]
|
|
60
|
+
if_false: Optional[str]
|
|
57
61
|
operator_type: OperatorType = Field(OperatorType.CONDITION, frozen=True)
|
|
58
62
|
|
|
59
63
|
|
|
@@ -90,10 +94,34 @@ class ParallelOperator(BaseOperator):
|
|
|
90
94
|
|
|
91
95
|
class ForEachOperator(BaseOperator):
|
|
92
96
|
items: str
|
|
93
|
-
|
|
97
|
+
loop_body: List[
|
|
98
|
+
Union[
|
|
99
|
+
TaskOperator,
|
|
100
|
+
ConditionOperator,
|
|
101
|
+
WaitOperator,
|
|
102
|
+
ParallelOperator,
|
|
103
|
+
"ForEachOperator",
|
|
104
|
+
"WhileOperator",
|
|
105
|
+
]
|
|
106
|
+
] = Field(default_factory=list)
|
|
94
107
|
operator_type: OperatorType = Field(OperatorType.FOREACH, frozen=True)
|
|
95
108
|
|
|
96
109
|
|
|
110
|
+
class WhileOperator(BaseOperator):
|
|
111
|
+
condition: str
|
|
112
|
+
loop_body: List[
|
|
113
|
+
Union[
|
|
114
|
+
TaskOperator,
|
|
115
|
+
ConditionOperator,
|
|
116
|
+
WaitOperator,
|
|
117
|
+
ParallelOperator,
|
|
118
|
+
ForEachOperator,
|
|
119
|
+
"WhileOperator",
|
|
120
|
+
]
|
|
121
|
+
] = Field(default_factory=list)
|
|
122
|
+
operator_type: OperatorType = Field(OperatorType.WHILE, frozen=True)
|
|
123
|
+
|
|
124
|
+
|
|
97
125
|
class Workflow(BaseModel):
|
|
98
126
|
name: str
|
|
99
127
|
version: str = "1.0.0"
|
|
@@ -106,6 +134,7 @@ class Workflow(BaseModel):
|
|
|
106
134
|
WaitOperator,
|
|
107
135
|
ParallelOperator,
|
|
108
136
|
ForEachOperator,
|
|
137
|
+
WhileOperator,
|
|
109
138
|
],
|
|
110
139
|
] = Field(default_factory=dict)
|
|
111
140
|
variables: Dict[str, Any] = Field(default_factory=dict)
|
|
@@ -122,6 +151,7 @@ class Workflow(BaseModel):
|
|
|
122
151
|
OperatorType.WAIT.value: WaitOperator,
|
|
123
152
|
OperatorType.PARALLEL.value: ParallelOperator,
|
|
124
153
|
OperatorType.FOREACH.value: ForEachOperator,
|
|
154
|
+
OperatorType.WHILE.value: WhileOperator,
|
|
125
155
|
}
|
|
126
156
|
for task_id, task_data in data["tasks"].items():
|
|
127
157
|
operator_type = task_data.get("operator_type")
|
|
@@ -141,6 +171,7 @@ class Workflow(BaseModel):
|
|
|
141
171
|
WaitOperator,
|
|
142
172
|
ParallelOperator,
|
|
143
173
|
ForEachOperator,
|
|
174
|
+
WhileOperator,
|
|
144
175
|
],
|
|
145
176
|
) -> "Workflow":
|
|
146
177
|
self.tasks[task.task_id] = task
|
|
@@ -172,34 +203,80 @@ class Workflow(BaseModel):
|
|
|
172
203
|
|
|
173
204
|
|
|
174
205
|
class WorkflowBuilder:
|
|
175
|
-
def __init__(
|
|
206
|
+
def __init__(
|
|
207
|
+
self,
|
|
208
|
+
name: str,
|
|
209
|
+
existing_workflow: Optional[Workflow] = None,
|
|
210
|
+
parent: Optional["WorkflowBuilder"] = None,
|
|
211
|
+
):
|
|
176
212
|
if existing_workflow:
|
|
177
213
|
self.workflow = existing_workflow
|
|
178
214
|
else:
|
|
179
215
|
self.workflow = Workflow(name=name)
|
|
180
216
|
self._current_task: Optional[str] = None
|
|
217
|
+
self.parent = parent
|
|
218
|
+
|
|
219
|
+
def _add_task(
|
|
220
|
+
self,
|
|
221
|
+
task: Union[
|
|
222
|
+
TaskOperator,
|
|
223
|
+
ConditionOperator,
|
|
224
|
+
WaitOperator,
|
|
225
|
+
ParallelOperator,
|
|
226
|
+
ForEachOperator,
|
|
227
|
+
WhileOperator,
|
|
228
|
+
],
|
|
229
|
+
**kwargs,
|
|
230
|
+
) -> None:
|
|
231
|
+
dependencies = kwargs.get("dependencies", [])
|
|
232
|
+
if self._current_task and not dependencies:
|
|
233
|
+
dependencies.append(self._current_task)
|
|
234
|
+
|
|
235
|
+
task.dependencies = sorted(list(set(dependencies)))
|
|
236
|
+
|
|
237
|
+
self.workflow.add_task(task)
|
|
238
|
+
self._current_task = task.task_id
|
|
181
239
|
|
|
182
240
|
def task(self, task_id: str, function: str, **kwargs) -> "WorkflowBuilder":
|
|
183
241
|
task = TaskOperator(task_id=task_id, function=function, **kwargs)
|
|
184
|
-
|
|
185
|
-
task.dependencies.append(self._current_task)
|
|
186
|
-
self.workflow.add_task(task)
|
|
187
|
-
self._current_task = task_id
|
|
242
|
+
self._add_task(task, **kwargs)
|
|
188
243
|
return self
|
|
189
244
|
|
|
190
245
|
def condition(
|
|
191
|
-
self,
|
|
246
|
+
self,
|
|
247
|
+
task_id: str,
|
|
248
|
+
condition: str,
|
|
249
|
+
if_true: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
250
|
+
if_false: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
251
|
+
**kwargs,
|
|
192
252
|
) -> "WorkflowBuilder":
|
|
253
|
+
true_builder = if_true(WorkflowBuilder(f"{task_id}_true", parent=self))
|
|
254
|
+
false_builder = if_false(WorkflowBuilder(f"{task_id}_false", parent=self))
|
|
255
|
+
|
|
256
|
+
true_tasks = list(true_builder.workflow.tasks.keys())
|
|
257
|
+
false_tasks = list(false_builder.workflow.tasks.keys())
|
|
258
|
+
|
|
193
259
|
task = ConditionOperator(
|
|
194
260
|
task_id=task_id,
|
|
195
261
|
condition=condition,
|
|
196
|
-
if_true=
|
|
197
|
-
if_false=
|
|
262
|
+
if_true=true_tasks[0] if true_tasks else None,
|
|
263
|
+
if_false=false_tasks[0] if false_tasks else None,
|
|
198
264
|
**kwargs,
|
|
199
265
|
)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
266
|
+
|
|
267
|
+
self._add_task(task, **kwargs)
|
|
268
|
+
|
|
269
|
+
for task_obj in true_builder.workflow.tasks.values():
|
|
270
|
+
# Only add the condition task as dependency, preserve original dependencies
|
|
271
|
+
if task_id not in task_obj.dependencies:
|
|
272
|
+
task_obj.dependencies.append(task_id)
|
|
273
|
+
self.workflow.add_task(task_obj)
|
|
274
|
+
for task_obj in false_builder.workflow.tasks.values():
|
|
275
|
+
# Only add the condition task as dependency, preserve original dependencies
|
|
276
|
+
if task_id not in task_obj.dependencies:
|
|
277
|
+
task_obj.dependencies.append(task_id)
|
|
278
|
+
self.workflow.add_task(task_obj)
|
|
279
|
+
|
|
203
280
|
self._current_task = task_id
|
|
204
281
|
return self
|
|
205
282
|
|
|
@@ -207,31 +284,120 @@ class WorkflowBuilder:
|
|
|
207
284
|
self, task_id: str, wait_for: Union[timedelta, datetime, str], **kwargs
|
|
208
285
|
) -> "WorkflowBuilder":
|
|
209
286
|
task = WaitOperator(task_id=task_id, wait_for=wait_for, **kwargs)
|
|
210
|
-
|
|
211
|
-
task.dependencies.append(self._current_task)
|
|
212
|
-
self.workflow.add_task(task)
|
|
213
|
-
self._current_task = task_id
|
|
287
|
+
self._add_task(task, **kwargs)
|
|
214
288
|
return self
|
|
215
289
|
|
|
216
290
|
def parallel(
|
|
217
|
-
self,
|
|
291
|
+
self,
|
|
292
|
+
task_id: str,
|
|
293
|
+
branches: Dict[str, Callable[["WorkflowBuilder"], "WorkflowBuilder"]],
|
|
294
|
+
**kwargs,
|
|
218
295
|
) -> "WorkflowBuilder":
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
296
|
+
branch_builders = {}
|
|
297
|
+
for name, branch_func in branches.items():
|
|
298
|
+
branch_builder = branch_func(
|
|
299
|
+
WorkflowBuilder(f"{task_id}_{name}", parent=self)
|
|
300
|
+
)
|
|
301
|
+
branch_builders[name] = branch_builder
|
|
302
|
+
|
|
303
|
+
branch_tasks = {
|
|
304
|
+
name: list(builder.workflow.tasks.keys())
|
|
305
|
+
for name, builder in branch_builders.items()
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
task = ParallelOperator(task_id=task_id, branches=branch_tasks, **kwargs)
|
|
309
|
+
|
|
310
|
+
self._add_task(task, **kwargs)
|
|
311
|
+
|
|
312
|
+
for builder in branch_builders.values():
|
|
313
|
+
for task_obj in builder.workflow.tasks.values():
|
|
314
|
+
# Only add the parallel task as dependency to non-internal tasks,
|
|
315
|
+
# preserve original dependencies
|
|
316
|
+
if (
|
|
317
|
+
not getattr(task_obj, "is_internal_loop_task", False)
|
|
318
|
+
and task_id not in task_obj.dependencies
|
|
319
|
+
):
|
|
320
|
+
task_obj.dependencies.append(task_id)
|
|
321
|
+
self.workflow.add_task(task_obj)
|
|
322
|
+
|
|
223
323
|
self._current_task = task_id
|
|
224
324
|
return self
|
|
225
325
|
|
|
226
326
|
def foreach(
|
|
227
|
-
self,
|
|
327
|
+
self,
|
|
328
|
+
task_id: str,
|
|
329
|
+
items: str,
|
|
330
|
+
loop_body: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
331
|
+
**kwargs,
|
|
228
332
|
) -> "WorkflowBuilder":
|
|
333
|
+
# Create a temporary builder for the loop body.
|
|
334
|
+
temp_builder = WorkflowBuilder(f"{task_id}_loop", parent=self)
|
|
335
|
+
loop_builder = loop_body(temp_builder)
|
|
336
|
+
loop_tasks = list(loop_builder.workflow.tasks.values())
|
|
337
|
+
|
|
338
|
+
# Mark all loop body tasks as internal to prevent parallel dependency injection
|
|
339
|
+
for task_obj in loop_tasks:
|
|
340
|
+
task_obj.is_internal_loop_task = True
|
|
341
|
+
|
|
342
|
+
# Create the foreach operator
|
|
229
343
|
task = ForEachOperator(
|
|
230
|
-
task_id=task_id,
|
|
344
|
+
task_id=task_id,
|
|
345
|
+
items=items,
|
|
346
|
+
loop_body=loop_tasks,
|
|
347
|
+
**kwargs,
|
|
231
348
|
)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
self.
|
|
349
|
+
|
|
350
|
+
# Add the foreach task to workflow to establish initial dependencies
|
|
351
|
+
self._add_task(task, **kwargs)
|
|
352
|
+
|
|
353
|
+
# Add the foreach task as dependency to the FIRST task in the loop body
|
|
354
|
+
# and preserve the original dependency chain within the loop
|
|
355
|
+
if loop_tasks:
|
|
356
|
+
first_task = loop_tasks[0]
|
|
357
|
+
if task_id not in first_task.dependencies:
|
|
358
|
+
first_task.dependencies.append(task_id)
|
|
359
|
+
|
|
360
|
+
# Add all loop tasks to workflow
|
|
361
|
+
for task_obj in loop_tasks:
|
|
362
|
+
self.workflow.add_task(task_obj)
|
|
363
|
+
|
|
364
|
+
self._current_task = task_id
|
|
365
|
+
return self
|
|
366
|
+
|
|
367
|
+
def while_loop(
|
|
368
|
+
self,
|
|
369
|
+
task_id: str,
|
|
370
|
+
condition: str,
|
|
371
|
+
loop_body: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
|
|
372
|
+
**kwargs,
|
|
373
|
+
) -> "WorkflowBuilder":
|
|
374
|
+
loop_builder = loop_body(WorkflowBuilder(f"{task_id}_loop", parent=self))
|
|
375
|
+
loop_tasks = list(loop_builder.workflow.tasks.values())
|
|
376
|
+
|
|
377
|
+
# Mark all loop body tasks as internal to prevent parallel dependency injection
|
|
378
|
+
for task_obj in loop_tasks:
|
|
379
|
+
task_obj.is_internal_loop_task = True
|
|
380
|
+
|
|
381
|
+
task = WhileOperator(
|
|
382
|
+
task_id=task_id,
|
|
383
|
+
condition=condition,
|
|
384
|
+
loop_body=loop_tasks,
|
|
385
|
+
**kwargs,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
self._add_task(task, **kwargs)
|
|
389
|
+
|
|
390
|
+
# Fix: Only add the while task as dependency to the FIRST task in the loop body
|
|
391
|
+
# and preserve the original dependency chain within the loop
|
|
392
|
+
if loop_tasks:
|
|
393
|
+
first_task = loop_tasks[0]
|
|
394
|
+
if task_id not in first_task.dependencies:
|
|
395
|
+
first_task.dependencies.append(task_id)
|
|
396
|
+
|
|
397
|
+
# Add all loop tasks to workflow without modifying their dependencies further
|
|
398
|
+
for task_obj in loop_tasks:
|
|
399
|
+
self.workflow.add_task(task_obj)
|
|
400
|
+
|
|
235
401
|
self._current_task = task_id
|
|
236
402
|
return self
|
|
237
403
|
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: highway_dsl
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: A stable domain specific language (DSL) for defining and managing data processing pipelines and workflow engines.
|
|
5
|
+
Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
|
|
8
|
+
Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: pydantic>=2.12.3
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# Highway DSL
|
|
25
|
+
|
|
26
|
+
[](https://badge.fury.io/py/highway-dsl)
|
|
27
|
+
[](https://opensource.org/licenses/MIT)
|
|
28
|
+
[](https://pypi.org/project/highway-dsl/)
|
|
29
|
+
|
|
30
|
+
**Highway DSL** is a stable, Python-based domain-specific language for defining complex workflows in a clear, concise, and fluent manner. It is part of the larger **Highway** project, an advanced workflow engine capable of running complex DAG-based workflows.
|
|
31
|
+
|
|
32
|
+
## Version 1.0.2 - Stable Release
|
|
33
|
+
|
|
34
|
+
This is a stable release with important bug fixes and enhancements, including a critical fix for the ForEach operator dependency management issue.
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
* **Fluent API:** A powerful and intuitive `WorkflowBuilder` for defining workflows programmatically.
|
|
39
|
+
* **Pydantic-based:** All models are built on Pydantic, providing robust data validation, serialization, and documentation.
|
|
40
|
+
* **Rich Operators:** A comprehensive set of operators for handling various workflow scenarios:
|
|
41
|
+
* `Task` - Basic workflow steps
|
|
42
|
+
* `Condition` (if/else) - Conditional branching
|
|
43
|
+
* `Parallel` - Execute multiple branches simultaneously
|
|
44
|
+
* `ForEach` - Iterate over collections with proper dependency management
|
|
45
|
+
* `Wait` - Pause execution for scheduled tasks
|
|
46
|
+
* `While` - Execute loops based on conditions
|
|
47
|
+
* **Fixed ForEach Bug:** Proper encapsulation of loop body tasks to prevent unwanted "grandparent" dependencies from containing parallel operators.
|
|
48
|
+
* **YAML/JSON Interoperability:** Workflows can be defined in Python and exported to YAML or JSON, and vice-versa.
|
|
49
|
+
* **Retry and Timeout Policies:** Built-in error handling and execution time management.
|
|
50
|
+
* **Extensible:** The DSL is designed to be extensible with custom operators and policies.
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install highway-dsl
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
Here's a simple example of how to define a workflow using the `WorkflowBuilder`:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from datetime import timedelta
|
|
64
|
+
from highway_dsl import WorkflowBuilder
|
|
65
|
+
|
|
66
|
+
workflow = (
|
|
67
|
+
WorkflowBuilder("simple_etl")
|
|
68
|
+
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
69
|
+
.task(
|
|
70
|
+
"transform",
|
|
71
|
+
"etl.transform_data",
|
|
72
|
+
args=["{{raw_data}}"],
|
|
73
|
+
result_key="transformed_data",
|
|
74
|
+
)
|
|
75
|
+
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
76
|
+
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
77
|
+
.timeout(timeout=timedelta(minutes=30))
|
|
78
|
+
.wait("wait_next", timedelta(hours=24))
|
|
79
|
+
.task("cleanup", "etl.cleanup")
|
|
80
|
+
.build()
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
print(workflow.to_yaml())
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Advanced Usage
|
|
87
|
+
|
|
88
|
+
### Conditional Logic
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from highway_dsl import WorkflowBuilder, RetryPolicy
|
|
92
|
+
from datetime import timedelta
|
|
93
|
+
|
|
94
|
+
builder = WorkflowBuilder("data_processing_pipeline")
|
|
95
|
+
|
|
96
|
+
builder.task("start", "workflows.tasks.initialize", result_key="init_data")
|
|
97
|
+
builder.task(
|
|
98
|
+
"validate",
|
|
99
|
+
"workflows.tasks.validate_data",
|
|
100
|
+
args=["{{init_data}}"],
|
|
101
|
+
result_key="validated_data",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
builder.condition(
|
|
105
|
+
"check_quality",
|
|
106
|
+
condition="{{validated_data.quality_score}} > 0.8",
|
|
107
|
+
if_true=lambda b: b.task(
|
|
108
|
+
"high_quality_processing",
|
|
109
|
+
"workflows.tasks.advanced_processing",
|
|
110
|
+
args=["{{validated_data}}"],
|
|
111
|
+
retry_policy=RetryPolicy(max_retries=5, delay=timedelta(seconds=10), backoff_factor=2.0),
|
|
112
|
+
),
|
|
113
|
+
if_false=lambda b: b.task(
|
|
114
|
+
"standard_processing",
|
|
115
|
+
"workflows.tasks.basic_processing",
|
|
116
|
+
args=["{{validated_data}}"],
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
workflow = builder.build()
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### While Loops
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from highway_dsl import WorkflowBuilder
|
|
127
|
+
|
|
128
|
+
builder = WorkflowBuilder("qa_rework_workflow")
|
|
129
|
+
|
|
130
|
+
builder.task("start_qa", "workflows.tasks.start_qa", result_key="qa_results")
|
|
131
|
+
|
|
132
|
+
builder.while_loop(
|
|
133
|
+
"qa_rework_loop",
|
|
134
|
+
condition="{{qa_results.status}} == 'failed'",
|
|
135
|
+
loop_body=lambda b: b.task("perform_rework", "workflows.tasks.perform_rework").task(
|
|
136
|
+
"re_run_qa", "workflows.tasks.run_qa", result_key="qa_results"
|
|
137
|
+
),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
builder.task("finalize_product", "workflows.tasks.finalize_product", dependencies=["qa_rework_loop"])
|
|
141
|
+
|
|
142
|
+
workflow = builder.build()
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### For-Each Loops with Proper Dependency Management
|
|
146
|
+
|
|
147
|
+
Fixed bug where foreach loops were incorrectly inheriting dependencies from containing parallel operators:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# This loop now properly encapsulates its internal tasks
|
|
151
|
+
builder.foreach(
|
|
152
|
+
"process_items",
|
|
153
|
+
items="{{data.items}}",
|
|
154
|
+
loop_body=lambda fb: fb.task("process_item", "processor.handle_item", args=["{{item.id}}"])
|
|
155
|
+
# Loop body tasks only have proper dependencies, not unwanted "grandparent" dependencies
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Retry Policies
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from highway_dsl import RetryPolicy
|
|
163
|
+
from datetime import timedelta
|
|
164
|
+
|
|
165
|
+
builder.task(
|
|
166
|
+
"reliable_task",
|
|
167
|
+
"service.operation",
|
|
168
|
+
retry_policy=RetryPolicy(
|
|
169
|
+
max_retries=5,
|
|
170
|
+
delay=timedelta(seconds=10),
|
|
171
|
+
backoff_factor=2.0
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Timeout Policies
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from highway_dsl import TimeoutPolicy
|
|
180
|
+
from datetime import timedelta
|
|
181
|
+
|
|
182
|
+
builder.task(
|
|
183
|
+
"timed_task",
|
|
184
|
+
"service.operation",
|
|
185
|
+
timeout_policy=TimeoutPolicy(
|
|
186
|
+
timeout=timedelta(hours=1),
|
|
187
|
+
kill_on_timeout=True
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## What's New in Version 1.0.2
|
|
193
|
+
|
|
194
|
+
### Bug Fixes
|
|
195
|
+
* **Fixed ForEach Operator Bug**: Resolved issue where foreach loops were incorrectly getting "grandparent" dependencies from containing parallel operators. Loop body tasks are now properly encapsulated and only depend on their parent loop operator and internal chain dependencies.
|
|
196
|
+
|
|
197
|
+
### Enhancements
|
|
198
|
+
* **Improved Loop Dependency Management**: While loops and ForEach loops now properly encapsulate their internal dependencies without being affected by containing parallel operators.
|
|
199
|
+
* **Better Error Handling**: Enhanced error handling throughout the DSL.
|
|
200
|
+
* **Comprehensive Test Suite**: Added functional tests for all example workflows to ensure consistency.
|
|
201
|
+
|
|
202
|
+
## Development
|
|
203
|
+
|
|
204
|
+
To set up the development environment:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
git clone https://github.com/your-username/highway.git
|
|
208
|
+
cd highway
|
|
209
|
+
python -m venv .venv
|
|
210
|
+
source .venv/bin/activate
|
|
211
|
+
pip install -e .[dev]
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Running Tests
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
pytest
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Type Checking
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
mypy .
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## License
|
|
227
|
+
|
|
228
|
+
MIT License
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
highway_dsl/__init__.py,sha256=mr1oMylxliFwu2VO2qpyM3sVQwYIoPL2P6JE-6ZuF7M,507
|
|
2
|
+
highway_dsl/workflow_dsl.py,sha256=bhCKDPrMaIkEI4HduKoeqd2VlZsK8wjr8RURifPufGU,14700
|
|
3
|
+
highway_dsl-1.0.2.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
|
|
4
|
+
highway_dsl-1.0.2.dist-info/METADATA,sha256=uCUL4xLYOkZ10TzFzxn6jZ3rjtcL_h7f5fLX5RD41Kk,7187
|
|
5
|
+
highway_dsl-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
highway_dsl-1.0.2.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
|
|
7
|
+
highway_dsl-1.0.2.dist-info/RECORD,,
|
|
@@ -1,227 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: highway_dsl
|
|
3
|
-
Version: 0.0.2
|
|
4
|
-
Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
|
|
5
|
-
Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
|
|
8
|
-
Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Requires-Python: >=3.9
|
|
13
|
-
Description-Content-Type: text/markdown
|
|
14
|
-
License-File: LICENSE
|
|
15
|
-
Requires-Dist: pydantic>=2.12.3
|
|
16
|
-
Requires-Dist: pyyaml>=6.0
|
|
17
|
-
Provides-Extra: dev
|
|
18
|
-
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
19
|
-
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
20
|
-
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
|
|
21
|
-
Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
|
|
22
|
-
Dynamic: license-file
|
|
23
|
-
|
|
24
|
-
# Highway DSL
|
|
25
|
-
|
|
26
|
-
Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
|
|
27
|
-
|
|
28
|
-
## Features
|
|
29
|
-
|
|
30
|
-
* **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
|
|
31
|
-
* **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
|
|
32
|
-
* **Rich Task Types:** Supports various operators including:
|
|
33
|
-
* `TaskOperator`: Executes a Python function.
|
|
34
|
-
* `ConditionOperator`: Enables conditional branching based on expressions.
|
|
35
|
-
* `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
|
|
36
|
-
* `ParallelOperator`: Executes multiple branches of tasks concurrently.
|
|
37
|
-
* `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
|
|
38
|
-
* **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
|
|
39
|
-
* **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
|
|
40
|
-
* **Workflow Builder:** A fluent API for constructing workflows programmatically.
|
|
41
|
-
|
|
42
|
-
### Feature Overview
|
|
43
|
-
|
|
44
|
-
```mermaid
|
|
45
|
-
graph TD
|
|
46
|
-
A[Workflow] --> B{TaskOperator};
|
|
47
|
-
A --> C{ConditionOperator};
|
|
48
|
-
A --> D{WaitOperator};
|
|
49
|
-
A --> E{ParallelOperator};
|
|
50
|
-
A --> F{ForEachOperator};
|
|
51
|
-
|
|
52
|
-
B --> G[Executes Python Function];
|
|
53
|
-
C --> H{If/Else Branching};
|
|
54
|
-
D --> I[Pauses Execution];
|
|
55
|
-
E --> J[Concurrent Branches];
|
|
56
|
-
F --> K[Iterates Over Items];
|
|
57
|
-
|
|
58
|
-
subgraph Policies
|
|
59
|
-
B --> L[RetryPolicy];
|
|
60
|
-
B --> M[TimeoutPolicy];
|
|
61
|
-
end
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
## Installation
|
|
65
|
-
|
|
66
|
-
To install Highway DSL, you can use pip:
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
pip install highway-dsl
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
If you want to install it for development, including testing dependencies:
|
|
73
|
-
|
|
74
|
-
```bash
|
|
75
|
-
pip install "highway-dsl[dev]"
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
## Usage
|
|
79
|
-
|
|
80
|
-
### Defining a Simple Workflow
|
|
81
|
-
|
|
82
|
-
```python
|
|
83
|
-
from datetime import timedelta
|
|
84
|
-
from highway_dsl import WorkflowBuilder
|
|
85
|
-
|
|
86
|
-
def demonstrate_basic_workflow():
|
|
87
|
-
"""Show a simple complete workflow using just the builder"""
|
|
88
|
-
|
|
89
|
-
workflow = (
|
|
90
|
-
WorkflowBuilder("simple_etl")
|
|
91
|
-
.task("extract", "etl.extract_data", result_key="raw_data")
|
|
92
|
-
.task(
|
|
93
|
-
"transform",
|
|
94
|
-
"etl.transform_data",
|
|
95
|
-
args=["{{raw_data}}"],
|
|
96
|
-
result_key="transformed_data",
|
|
97
|
-
)
|
|
98
|
-
.retry(max_retries=3, delay=timedelta(seconds=10))
|
|
99
|
-
.task("load", "etl.load_data", args=["{{transformed_data}}"])
|
|
100
|
-
.timeout(timeout=timedelta(minutes=30))
|
|
101
|
-
.wait("wait_next", timedelta(hours=24))
|
|
102
|
-
.task("cleanup", "etl.cleanup")
|
|
103
|
-
.build()
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
workflow.set_variables(
|
|
107
|
-
{"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
return workflow
|
|
111
|
-
|
|
112
|
-
if __name__ == "__main__":
|
|
113
|
-
basic_workflow = demonstrate_basic_workflow()
|
|
114
|
-
print(basic_workflow.to_yaml())
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
### Defining a Complex Workflow
|
|
118
|
-
|
|
119
|
-
Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
|
|
120
|
-
|
|
121
|
-
### YAML Configuration
|
|
122
|
-
|
|
123
|
-
You can also define workflows directly in YAML:
|
|
124
|
-
|
|
125
|
-
```yaml
|
|
126
|
-
name: simple_etl
|
|
127
|
-
version: 1.0.0
|
|
128
|
-
description: Simple ETL workflow with retry and timeout
|
|
129
|
-
variables:
|
|
130
|
-
database_url: postgresql://localhost/mydb
|
|
131
|
-
chunk_size: 1000
|
|
132
|
-
start_task: extract
|
|
133
|
-
tasks:
|
|
134
|
-
extract:
|
|
135
|
-
task_id: extract
|
|
136
|
-
operator_type: task
|
|
137
|
-
function: etl.extract_data
|
|
138
|
-
result_key: raw_data
|
|
139
|
-
dependencies: []
|
|
140
|
-
metadata: {}
|
|
141
|
-
|
|
142
|
-
transform:
|
|
143
|
-
task_id: transform
|
|
144
|
-
operator_type: task
|
|
145
|
-
function: etl.transform_data
|
|
146
|
-
args: ["{{raw_data}}"]
|
|
147
|
-
result_key: transformed_data
|
|
148
|
-
dependencies: ["extract"]
|
|
149
|
-
retry_policy:
|
|
150
|
-
max_retries: 3
|
|
151
|
-
delay: PT10S
|
|
152
|
-
backoff_factor: 2.0
|
|
153
|
-
metadata: {}
|
|
154
|
-
|
|
155
|
-
load:
|
|
156
|
-
task_id: load
|
|
157
|
-
operator_type: task
|
|
158
|
-
function: etl.load_data
|
|
159
|
-
args: ["{{transformed_data}}"]
|
|
160
|
-
dependencies: ["transform"]
|
|
161
|
-
timeout_policy:
|
|
162
|
-
timeout: PT30M
|
|
163
|
-
kill_on_timeout: true
|
|
164
|
-
metadata: {}
|
|
165
|
-
|
|
166
|
-
wait_next:
|
|
167
|
-
task_id: wait_next
|
|
168
|
-
operator_type: wait
|
|
169
|
-
wait_for: "P1D"
|
|
170
|
-
dependencies: ["load"]
|
|
171
|
-
metadata: {}
|
|
172
|
-
|
|
173
|
-
cleanup:
|
|
174
|
-
task_id: cleanup
|
|
175
|
-
operator_type: task
|
|
176
|
-
function: etl.cleanup
|
|
177
|
-
dependencies: ["wait_next"]
|
|
178
|
-
metadata: {}
|
|
179
|
-
```
|
|
180
|
-
|
|
181
|
-
To load this YAML:
|
|
182
|
-
|
|
183
|
-
```python
|
|
184
|
-
from highway_dsl import Workflow
|
|
185
|
-
|
|
186
|
-
yaml_content = """
|
|
187
|
-
# ... (yaml content from above)
|
|
188
|
-
"""
|
|
189
|
-
|
|
190
|
-
workflow = Workflow.from_yaml(yaml_content)
|
|
191
|
-
print(workflow.name)
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
## Development
|
|
195
|
-
|
|
196
|
-
### Running Tests
|
|
197
|
-
|
|
198
|
-
To run the unit tests, navigate to the project root and execute:
|
|
199
|
-
|
|
200
|
-
```bash
|
|
201
|
-
pytest
|
|
202
|
-
```
|
|
203
|
-
|
|
204
|
-
### Type Checking
|
|
205
|
-
|
|
206
|
-
To perform static type checking with MyPy:
|
|
207
|
-
|
|
208
|
-
```bash
|
|
209
|
-
mypy .
|
|
210
|
-
```
|
|
211
|
-
|
|
212
|
-
## Project Structure
|
|
213
|
-
|
|
214
|
-
```
|
|
215
|
-
.highway/
|
|
216
|
-
├── highway_dsl/
|
|
217
|
-
│ ├── __init__.py # Exposes the public API
|
|
218
|
-
│ └── workflow_dsl.py # Core DSL definitions (Pydantic models)
|
|
219
|
-
├── example_usage.py # Examples of how to use the DSL
|
|
220
|
-
├── tests/
|
|
221
|
-
│ ├── __init__.py
|
|
222
|
-
│ ├── conftest.py # Pytest configuration
|
|
223
|
-
│ └── test_workflow_dsl.py # Unit and integration tests
|
|
224
|
-
├── pyproject.toml # Project metadata and dependencies
|
|
225
|
-
├── README.md # This file
|
|
226
|
-
└── SUMMARY.md # Summary of changes and future instructions
|
|
227
|
-
```
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
highway_dsl/__init__.py,sha256=8qmPd9ZZNgwPGZuWwPYvMOljg73BJIT2SSM7iIRycmw,447
|
|
2
|
-
highway_dsl/workflow_dsl.py,sha256=2QWDhbXLPulq_kTZk_Yjs6L3BNwws_H6EDV0S1CjOXs,9205
|
|
3
|
-
highway_dsl-0.0.2.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
|
|
4
|
-
highway_dsl-0.0.2.dist-info/METADATA,sha256=uLLXSVlLWM8H6F5wR1huiAtgXfkIVdmLV-XsYwZkW6s,6390
|
|
5
|
-
highway_dsl-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
-
highway_dsl-0.0.2.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
|
|
7
|
-
highway_dsl-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|