highway-dsl 0.0.2__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of highway-dsl might be problematic. Click here for more details.

highway_dsl/__init__.py CHANGED
@@ -6,6 +6,7 @@ from .workflow_dsl import (
6
6
  ParallelOperator,
7
7
  WaitOperator,
8
8
  ForEachOperator,
9
+ WhileOperator,
9
10
  RetryPolicy,
10
11
  TimeoutPolicy,
11
12
  OperatorType,
@@ -14,11 +15,13 @@ from .workflow_dsl import (
14
15
  __all__ = [
15
16
  "Workflow",
16
17
  "WorkflowBuilder",
18
+ "BaseOperator",
17
19
  "TaskOperator",
18
20
  "ConditionOperator",
19
21
  "ParallelOperator",
20
22
  "WaitOperator",
21
23
  "ForEachOperator",
24
+ "WhileOperator",
22
25
  "RetryPolicy",
23
26
  "TimeoutPolicy",
24
27
  "OperatorType",
@@ -16,6 +16,7 @@ class OperatorType(Enum):
16
16
  FOREACH = "foreach"
17
17
  SWITCH = "switch"
18
18
  TRY_CATCH = "try_catch"
19
+ WHILE = "while"
19
20
 
20
21
 
21
22
  class RetryPolicy(BaseModel):
@@ -38,6 +39,9 @@ class BaseOperator(BaseModel, ABC):
38
39
  retry_policy: Optional[RetryPolicy] = None
39
40
  timeout_policy: Optional[TimeoutPolicy] = None
40
41
  metadata: Dict[str, Any] = Field(default_factory=dict)
42
+ is_internal_loop_task: bool = Field(
43
+ default=False, exclude=True
44
+ ) # Mark if task is internal to a loop
41
45
 
42
46
  model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
43
47
 
@@ -52,8 +56,8 @@ class TaskOperator(BaseOperator):
52
56
 
53
57
  class ConditionOperator(BaseOperator):
54
58
  condition: str
55
- if_true: str
56
- if_false: str
59
+ if_true: Optional[str]
60
+ if_false: Optional[str]
57
61
  operator_type: OperatorType = Field(OperatorType.CONDITION, frozen=True)
58
62
 
59
63
 
@@ -90,10 +94,34 @@ class ParallelOperator(BaseOperator):
90
94
 
91
95
  class ForEachOperator(BaseOperator):
92
96
  items: str
93
- task_chain: List[str] = Field(default_factory=list)
97
+ loop_body: List[
98
+ Union[
99
+ TaskOperator,
100
+ ConditionOperator,
101
+ WaitOperator,
102
+ ParallelOperator,
103
+ "ForEachOperator",
104
+ "WhileOperator",
105
+ ]
106
+ ] = Field(default_factory=list)
94
107
  operator_type: OperatorType = Field(OperatorType.FOREACH, frozen=True)
95
108
 
96
109
 
110
+ class WhileOperator(BaseOperator):
111
+ condition: str
112
+ loop_body: List[
113
+ Union[
114
+ TaskOperator,
115
+ ConditionOperator,
116
+ WaitOperator,
117
+ ParallelOperator,
118
+ ForEachOperator,
119
+ "WhileOperator",
120
+ ]
121
+ ] = Field(default_factory=list)
122
+ operator_type: OperatorType = Field(OperatorType.WHILE, frozen=True)
123
+
124
+
97
125
  class Workflow(BaseModel):
98
126
  name: str
99
127
  version: str = "1.0.0"
@@ -106,6 +134,7 @@ class Workflow(BaseModel):
106
134
  WaitOperator,
107
135
  ParallelOperator,
108
136
  ForEachOperator,
137
+ WhileOperator,
109
138
  ],
110
139
  ] = Field(default_factory=dict)
111
140
  variables: Dict[str, Any] = Field(default_factory=dict)
@@ -122,6 +151,7 @@ class Workflow(BaseModel):
122
151
  OperatorType.WAIT.value: WaitOperator,
123
152
  OperatorType.PARALLEL.value: ParallelOperator,
124
153
  OperatorType.FOREACH.value: ForEachOperator,
154
+ OperatorType.WHILE.value: WhileOperator,
125
155
  }
126
156
  for task_id, task_data in data["tasks"].items():
127
157
  operator_type = task_data.get("operator_type")
@@ -141,6 +171,7 @@ class Workflow(BaseModel):
141
171
  WaitOperator,
142
172
  ParallelOperator,
143
173
  ForEachOperator,
174
+ WhileOperator,
144
175
  ],
145
176
  ) -> "Workflow":
146
177
  self.tasks[task.task_id] = task
@@ -172,34 +203,80 @@ class Workflow(BaseModel):
172
203
 
173
204
 
174
205
  class WorkflowBuilder:
175
- def __init__(self, name: str, existing_workflow: Optional[Workflow] = None):
206
+ def __init__(
207
+ self,
208
+ name: str,
209
+ existing_workflow: Optional[Workflow] = None,
210
+ parent: Optional["WorkflowBuilder"] = None,
211
+ ):
176
212
  if existing_workflow:
177
213
  self.workflow = existing_workflow
178
214
  else:
179
215
  self.workflow = Workflow(name=name)
180
216
  self._current_task: Optional[str] = None
217
+ self.parent = parent
218
+
219
+ def _add_task(
220
+ self,
221
+ task: Union[
222
+ TaskOperator,
223
+ ConditionOperator,
224
+ WaitOperator,
225
+ ParallelOperator,
226
+ ForEachOperator,
227
+ WhileOperator,
228
+ ],
229
+ **kwargs,
230
+ ) -> None:
231
+ dependencies = kwargs.get("dependencies", [])
232
+ if self._current_task and not dependencies:
233
+ dependencies.append(self._current_task)
234
+
235
+ task.dependencies = sorted(list(set(dependencies)))
236
+
237
+ self.workflow.add_task(task)
238
+ self._current_task = task.task_id
181
239
 
182
240
  def task(self, task_id: str, function: str, **kwargs) -> "WorkflowBuilder":
183
241
  task = TaskOperator(task_id=task_id, function=function, **kwargs)
184
- if self._current_task:
185
- task.dependencies.append(self._current_task)
186
- self.workflow.add_task(task)
187
- self._current_task = task_id
242
+ self._add_task(task, **kwargs)
188
243
  return self
189
244
 
190
245
  def condition(
191
- self, task_id: str, condition: str, if_true: str, if_false: str, **kwargs
246
+ self,
247
+ task_id: str,
248
+ condition: str,
249
+ if_true: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
250
+ if_false: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
251
+ **kwargs,
192
252
  ) -> "WorkflowBuilder":
253
+ true_builder = if_true(WorkflowBuilder(f"{task_id}_true", parent=self))
254
+ false_builder = if_false(WorkflowBuilder(f"{task_id}_false", parent=self))
255
+
256
+ true_tasks = list(true_builder.workflow.tasks.keys())
257
+ false_tasks = list(false_builder.workflow.tasks.keys())
258
+
193
259
  task = ConditionOperator(
194
260
  task_id=task_id,
195
261
  condition=condition,
196
- if_true=if_true,
197
- if_false=if_false,
262
+ if_true=true_tasks[0] if true_tasks else None,
263
+ if_false=false_tasks[0] if false_tasks else None,
198
264
  **kwargs,
199
265
  )
200
- if self._current_task:
201
- task.dependencies.append(self._current_task)
202
- self.workflow.add_task(task)
266
+
267
+ self._add_task(task, **kwargs)
268
+
269
+ for task_obj in true_builder.workflow.tasks.values():
270
+ # Only add the condition task as dependency, preserve original dependencies
271
+ if task_id not in task_obj.dependencies:
272
+ task_obj.dependencies.append(task_id)
273
+ self.workflow.add_task(task_obj)
274
+ for task_obj in false_builder.workflow.tasks.values():
275
+ # Only add the condition task as dependency, preserve original dependencies
276
+ if task_id not in task_obj.dependencies:
277
+ task_obj.dependencies.append(task_id)
278
+ self.workflow.add_task(task_obj)
279
+
203
280
  self._current_task = task_id
204
281
  return self
205
282
 
@@ -207,31 +284,120 @@ class WorkflowBuilder:
207
284
  self, task_id: str, wait_for: Union[timedelta, datetime, str], **kwargs
208
285
  ) -> "WorkflowBuilder":
209
286
  task = WaitOperator(task_id=task_id, wait_for=wait_for, **kwargs)
210
- if self._current_task:
211
- task.dependencies.append(self._current_task)
212
- self.workflow.add_task(task)
213
- self._current_task = task_id
287
+ self._add_task(task, **kwargs)
214
288
  return self
215
289
 
216
290
  def parallel(
217
- self, task_id: str, branches: Dict[str, List[str]], **kwargs
291
+ self,
292
+ task_id: str,
293
+ branches: Dict[str, Callable[["WorkflowBuilder"], "WorkflowBuilder"]],
294
+ **kwargs,
218
295
  ) -> "WorkflowBuilder":
219
- task = ParallelOperator(task_id=task_id, branches=branches, **kwargs)
220
- if self._current_task:
221
- task.dependencies.append(self._current_task)
222
- self.workflow.add_task(task)
296
+ branch_builders = {}
297
+ for name, branch_func in branches.items():
298
+ branch_builder = branch_func(
299
+ WorkflowBuilder(f"{task_id}_{name}", parent=self)
300
+ )
301
+ branch_builders[name] = branch_builder
302
+
303
+ branch_tasks = {
304
+ name: list(builder.workflow.tasks.keys())
305
+ for name, builder in branch_builders.items()
306
+ }
307
+
308
+ task = ParallelOperator(task_id=task_id, branches=branch_tasks, **kwargs)
309
+
310
+ self._add_task(task, **kwargs)
311
+
312
+ for builder in branch_builders.values():
313
+ for task_obj in builder.workflow.tasks.values():
314
+ # Only add the parallel task as dependency to non-internal tasks,
315
+ # preserve original dependencies
316
+ if (
317
+ not getattr(task_obj, "is_internal_loop_task", False)
318
+ and task_id not in task_obj.dependencies
319
+ ):
320
+ task_obj.dependencies.append(task_id)
321
+ self.workflow.add_task(task_obj)
322
+
223
323
  self._current_task = task_id
224
324
  return self
225
325
 
226
326
  def foreach(
227
- self, task_id: str, items: str, task_chain: List[str], **kwargs
327
+ self,
328
+ task_id: str,
329
+ items: str,
330
+ loop_body: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
331
+ **kwargs,
228
332
  ) -> "WorkflowBuilder":
333
+ # Create a temporary builder for the loop body.
334
+ temp_builder = WorkflowBuilder(f"{task_id}_loop", parent=self)
335
+ loop_builder = loop_body(temp_builder)
336
+ loop_tasks = list(loop_builder.workflow.tasks.values())
337
+
338
+ # Mark all loop body tasks as internal to prevent parallel dependency injection
339
+ for task_obj in loop_tasks:
340
+ task_obj.is_internal_loop_task = True
341
+
342
+ # Create the foreach operator
229
343
  task = ForEachOperator(
230
- task_id=task_id, items=items, task_chain=task_chain, **kwargs
344
+ task_id=task_id,
345
+ items=items,
346
+ loop_body=loop_tasks,
347
+ **kwargs,
231
348
  )
232
- if self._current_task:
233
- task.dependencies.append(self._current_task)
234
- self.workflow.add_task(task)
349
+
350
+ # Add the foreach task to workflow to establish initial dependencies
351
+ self._add_task(task, **kwargs)
352
+
353
+ # Add the foreach task as dependency to the FIRST task in the loop body
354
+ # and preserve the original dependency chain within the loop
355
+ if loop_tasks:
356
+ first_task = loop_tasks[0]
357
+ if task_id not in first_task.dependencies:
358
+ first_task.dependencies.append(task_id)
359
+
360
+ # Add all loop tasks to workflow
361
+ for task_obj in loop_tasks:
362
+ self.workflow.add_task(task_obj)
363
+
364
+ self._current_task = task_id
365
+ return self
366
+
367
+ def while_loop(
368
+ self,
369
+ task_id: str,
370
+ condition: str,
371
+ loop_body: Callable[["WorkflowBuilder"], "WorkflowBuilder"],
372
+ **kwargs,
373
+ ) -> "WorkflowBuilder":
374
+ loop_builder = loop_body(WorkflowBuilder(f"{task_id}_loop", parent=self))
375
+ loop_tasks = list(loop_builder.workflow.tasks.values())
376
+
377
+ # Mark all loop body tasks as internal to prevent parallel dependency injection
378
+ for task_obj in loop_tasks:
379
+ task_obj.is_internal_loop_task = True
380
+
381
+ task = WhileOperator(
382
+ task_id=task_id,
383
+ condition=condition,
384
+ loop_body=loop_tasks,
385
+ **kwargs,
386
+ )
387
+
388
+ self._add_task(task, **kwargs)
389
+
390
+ # Fix: Only add the while task as dependency to the FIRST task in the loop body
391
+ # and preserve the original dependency chain within the loop
392
+ if loop_tasks:
393
+ first_task = loop_tasks[0]
394
+ if task_id not in first_task.dependencies:
395
+ first_task.dependencies.append(task_id)
396
+
397
+ # Add all loop tasks to workflow without modifying their dependencies further
398
+ for task_obj in loop_tasks:
399
+ self.workflow.add_task(task_obj)
400
+
235
401
  self._current_task = task_id
236
402
  return self
237
403
 
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: highway_dsl
3
+ Version: 1.0.2
4
+ Summary: A stable domain specific language (DSL) for defining and managing data processing pipelines and workflow engines.
5
+ Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
8
+ Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pydantic>=2.12.3
16
+ Requires-Dist: pyyaml>=6.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
19
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
20
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
21
+ Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # Highway DSL
25
+
26
+ [![PyPI version](https://badge.fury.io/py/highway-dsl.svg)](https://badge.fury.io/py/highway-dsl)
27
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
28
+ [![Stable](https://img.shields.io/badge/Status-Stable-brightgreen)](https://pypi.org/project/highway-dsl/)
29
+
30
+ **Highway DSL** is a stable, Python-based domain-specific language for defining complex workflows in a clear, concise, and fluent manner. It is part of the larger **Highway** project, an advanced workflow engine capable of running complex DAG-based workflows.
31
+
32
+ ## Version 1.0.2 - Stable Release
33
+
34
+ This is a stable release with important bug fixes and enhancements, including a critical fix for the ForEach operator dependency management issue.
35
+
36
+ ## Features
37
+
38
+ * **Fluent API:** A powerful and intuitive `WorkflowBuilder` for defining workflows programmatically.
39
+ * **Pydantic-based:** All models are built on Pydantic, providing robust data validation, serialization, and documentation.
40
+ * **Rich Operators:** A comprehensive set of operators for handling various workflow scenarios:
41
+ * `Task` - Basic workflow steps
42
+ * `Condition` (if/else) - Conditional branching
43
+ * `Parallel` - Execute multiple branches simultaneously
44
+ * `ForEach` - Iterate over collections with proper dependency management
45
+ * `Wait` - Pause execution for scheduled tasks
46
+ * `While` - Execute loops based on conditions
47
+ * **Fixed ForEach Bug:** Proper encapsulation of loop body tasks to prevent unwanted "grandparent" dependencies from containing parallel operators.
48
+ * **YAML/JSON Interoperability:** Workflows can be defined in Python and exported to YAML or JSON, and vice-versa.
49
+ * **Retry and Timeout Policies:** Built-in error handling and execution time management.
50
+ * **Extensible:** The DSL is designed to be extensible with custom operators and policies.
51
+
52
+ ## Installation
53
+
54
+ ```bash
55
+ pip install highway-dsl
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ Here's a simple example of how to define a workflow using the `WorkflowBuilder`:
61
+
62
+ ```python
63
+ from datetime import timedelta
64
+ from highway_dsl import WorkflowBuilder
65
+
66
+ workflow = (
67
+ WorkflowBuilder("simple_etl")
68
+ .task("extract", "etl.extract_data", result_key="raw_data")
69
+ .task(
70
+ "transform",
71
+ "etl.transform_data",
72
+ args=["{{raw_data}}"],
73
+ result_key="transformed_data",
74
+ )
75
+ .retry(max_retries=3, delay=timedelta(seconds=10))
76
+ .task("load", "etl.load_data", args=["{{transformed_data}}"])
77
+ .timeout(timeout=timedelta(minutes=30))
78
+ .wait("wait_next", timedelta(hours=24))
79
+ .task("cleanup", "etl.cleanup")
80
+ .build()
81
+ )
82
+
83
+ print(workflow.to_yaml())
84
+ ```
85
+
86
+ ## Advanced Usage
87
+
88
+ ### Conditional Logic
89
+
90
+ ```python
91
+ from highway_dsl import WorkflowBuilder, RetryPolicy
92
+ from datetime import timedelta
93
+
94
+ builder = WorkflowBuilder("data_processing_pipeline")
95
+
96
+ builder.task("start", "workflows.tasks.initialize", result_key="init_data")
97
+ builder.task(
98
+ "validate",
99
+ "workflows.tasks.validate_data",
100
+ args=["{{init_data}}"],
101
+ result_key="validated_data",
102
+ )
103
+
104
+ builder.condition(
105
+ "check_quality",
106
+ condition="{{validated_data.quality_score}} > 0.8",
107
+ if_true=lambda b: b.task(
108
+ "high_quality_processing",
109
+ "workflows.tasks.advanced_processing",
110
+ args=["{{validated_data}}"],
111
+ retry_policy=RetryPolicy(max_retries=5, delay=timedelta(seconds=10), backoff_factor=2.0),
112
+ ),
113
+ if_false=lambda b: b.task(
114
+ "standard_processing",
115
+ "workflows.tasks.basic_processing",
116
+ args=["{{validated_data}}"],
117
+ ),
118
+ )
119
+
120
+ workflow = builder.build()
121
+ ```
122
+
123
+ ### While Loops
124
+
125
+ ```python
126
+ from highway_dsl import WorkflowBuilder
127
+
128
+ builder = WorkflowBuilder("qa_rework_workflow")
129
+
130
+ builder.task("start_qa", "workflows.tasks.start_qa", result_key="qa_results")
131
+
132
+ builder.while_loop(
133
+ "qa_rework_loop",
134
+ condition="{{qa_results.status}} == 'failed'",
135
+ loop_body=lambda b: b.task("perform_rework", "workflows.tasks.perform_rework").task(
136
+ "re_run_qa", "workflows.tasks.run_qa", result_key="qa_results"
137
+ ),
138
+ )
139
+
140
+ builder.task("finalize_product", "workflows.tasks.finalize_product", dependencies=["qa_rework_loop"])
141
+
142
+ workflow = builder.build()
143
+ ```
144
+
145
+ ### For-Each Loops with Proper Dependency Management
146
+
147
+ Fixed bug where foreach loops were incorrectly inheriting dependencies from containing parallel operators:
148
+
149
+ ```python
150
+ # This loop now properly encapsulates its internal tasks
151
+ builder.foreach(
152
+ "process_items",
153
+ items="{{data.items}}",
154
+ loop_body=lambda fb: fb.task("process_item", "processor.handle_item", args=["{{item.id}}"])
155
+ # Loop body tasks only have proper dependencies, not unwanted "grandparent" dependencies
156
+ )
157
+ ```
158
+
159
+ ### Retry Policies
160
+
161
+ ```python
162
+ from highway_dsl import RetryPolicy
163
+ from datetime import timedelta
164
+
165
+ builder.task(
166
+ "reliable_task",
167
+ "service.operation",
168
+ retry_policy=RetryPolicy(
169
+ max_retries=5,
170
+ delay=timedelta(seconds=10),
171
+ backoff_factor=2.0
172
+ )
173
+ )
174
+ ```
175
+
176
+ ### Timeout Policies
177
+
178
+ ```python
179
+ from highway_dsl import TimeoutPolicy
180
+ from datetime import timedelta
181
+
182
+ builder.task(
183
+ "timed_task",
184
+ "service.operation",
185
+ timeout_policy=TimeoutPolicy(
186
+ timeout=timedelta(hours=1),
187
+ kill_on_timeout=True
188
+ )
189
+ )
190
+ ```
191
+
192
+ ## What's New in Version 1.0.2
193
+
194
+ ### Bug Fixes
195
+ * **Fixed ForEach Operator Bug**: Resolved issue where foreach loops were incorrectly getting "grandparent" dependencies from containing parallel operators. Loop body tasks are now properly encapsulated and only depend on their parent loop operator and internal chain dependencies.
196
+
197
+ ### Enhancements
198
+ * **Improved Loop Dependency Management**: While loops and ForEach loops now properly encapsulate their internal dependencies without being affected by containing parallel operators.
199
+ * **Better Error Handling**: Enhanced error handling throughout the DSL.
200
+ * **Comprehensive Test Suite**: Added functional tests for all example workflows to ensure consistency.
201
+
202
+ ## Development
203
+
204
+ To set up the development environment:
205
+
206
+ ```bash
207
+ git clone https://github.com/your-username/highway.git
208
+ cd highway
209
+ python -m venv .venv
210
+ source .venv/bin/activate
211
+ pip install -e .[dev]
212
+ ```
213
+
214
+ ### Running Tests
215
+
216
+ ```bash
217
+ pytest
218
+ ```
219
+
220
+ ### Type Checking
221
+
222
+ ```bash
223
+ mypy .
224
+ ```
225
+
226
+ ## License
227
+
228
+ MIT License
@@ -0,0 +1,7 @@
1
+ highway_dsl/__init__.py,sha256=mr1oMylxliFwu2VO2qpyM3sVQwYIoPL2P6JE-6ZuF7M,507
2
+ highway_dsl/workflow_dsl.py,sha256=bhCKDPrMaIkEI4HduKoeqd2VlZsK8wjr8RURifPufGU,14700
3
+ highway_dsl-1.0.2.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
4
+ highway_dsl-1.0.2.dist-info/METADATA,sha256=uCUL4xLYOkZ10TzFzxn6jZ3rjtcL_h7f5fLX5RD41Kk,7187
5
+ highway_dsl-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ highway_dsl-1.0.2.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
7
+ highway_dsl-1.0.2.dist-info/RECORD,,
@@ -1,227 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: highway_dsl
3
- Version: 0.0.2
4
- Summary: A domain specific language (DSL) for defining and managing data processing pipelines.
5
- Author-email: Farseed Ashouri <farseed.ashouri@gmail.com>
6
- License: MIT
7
- Project-URL: Homepage, https://github.com/rodmena-limited/highway_dsl
8
- Project-URL: Issues, https://github.com/rodmena-limited/highway_dsl/issues
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.9
13
- Description-Content-Type: text/markdown
14
- License-File: LICENSE
15
- Requires-Dist: pydantic>=2.12.3
16
- Requires-Dist: pyyaml>=6.0
17
- Provides-Extra: dev
18
- Requires-Dist: pytest>=7.0.0; extra == "dev"
19
- Requires-Dist: mypy>=1.0.0; extra == "dev"
20
- Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
21
- Requires-Dist: pytest-cov>=2.12.1; extra == "dev"
22
- Dynamic: license-file
23
-
24
- # Highway DSL
25
-
26
- Highway DSL is a Python-based Domain Specific Language (DSL) for defining and managing complex workflows. It allows users to declaratively specify tasks, dependencies, and execution parameters, supporting various control flow mechanisms like conditions, parallel execution, and retries.
27
-
28
- ## Features
29
-
30
- * **Declarative Workflow Definition:** Define workflows using a clear and concise Python API or through YAML/JSON configurations.
31
- * **Pydantic Models:** Leverages Pydantic for robust data validation and serialization/deserialization of workflow definitions.
32
- * **Rich Task Types:** Supports various operators including:
33
- * `TaskOperator`: Executes a Python function.
34
- * `ConditionOperator`: Enables conditional branching based on expressions.
35
- * `WaitOperator`: Pauses workflow execution for a specified duration or until a specific datetime.
36
- * `ParallelOperator`: Executes multiple branches of tasks concurrently.
37
- * `ForEachOperator`: Iterates over a collection, executing a chain of tasks for each item.
38
- * **Retry and Timeout Policies:** Define retry strategies and timeout limits for individual tasks.
39
- * **Serialization/Deserialization:** Seamless conversion of workflow definitions between Python objects, YAML, and JSON formats.
40
- * **Workflow Builder:** A fluent API for constructing workflows programmatically.
41
-
42
- ### Feature Overview
43
-
44
- ```mermaid
45
- graph TD
46
- A[Workflow] --> B{TaskOperator};
47
- A --> C{ConditionOperator};
48
- A --> D{WaitOperator};
49
- A --> E{ParallelOperator};
50
- A --> F{ForEachOperator};
51
-
52
- B --> G[Executes Python Function];
53
- C --> H{If/Else Branching};
54
- D --> I[Pauses Execution];
55
- E --> J[Concurrent Branches];
56
- F --> K[Iterates Over Items];
57
-
58
- subgraph Policies
59
- B --> L[RetryPolicy];
60
- B --> M[TimeoutPolicy];
61
- end
62
- ```
63
-
64
- ## Installation
65
-
66
- To install Highway DSL, you can use pip:
67
-
68
- ```bash
69
- pip install highway-dsl
70
- ```
71
-
72
- If you want to install it for development, including testing dependencies:
73
-
74
- ```bash
75
- pip install "highway-dsl[dev]"
76
- ```
77
-
78
- ## Usage
79
-
80
- ### Defining a Simple Workflow
81
-
82
- ```python
83
- from datetime import timedelta
84
- from highway_dsl import WorkflowBuilder
85
-
86
- def demonstrate_basic_workflow():
87
- """Show a simple complete workflow using just the builder"""
88
-
89
- workflow = (
90
- WorkflowBuilder("simple_etl")
91
- .task("extract", "etl.extract_data", result_key="raw_data")
92
- .task(
93
- "transform",
94
- "etl.transform_data",
95
- args=["{{raw_data}}"],
96
- result_key="transformed_data",
97
- )
98
- .retry(max_retries=3, delay=timedelta(seconds=10))
99
- .task("load", "etl.load_data", args=["{{transformed_data}}"])
100
- .timeout(timeout=timedelta(minutes=30))
101
- .wait("wait_next", timedelta(hours=24))
102
- .task("cleanup", "etl.cleanup")
103
- .build()
104
- )
105
-
106
- workflow.set_variables(
107
- {"database_url": "postgresql://localhost/mydb", "chunk_size": 1000}
108
- )
109
-
110
- return workflow
111
-
112
- if __name__ == "__main__":
113
- basic_workflow = demonstrate_basic_workflow()
114
- print(basic_workflow.to_yaml())
115
- ```
116
-
117
- ### Defining a Complex Workflow
118
-
119
- Refer to `example_usage.py` for a more complex example demonstrating conditional logic, parallel execution, and iteration.
120
-
121
- ### YAML Configuration
122
-
123
- You can also define workflows directly in YAML:
124
-
125
- ```yaml
126
- name: simple_etl
127
- version: 1.0.0
128
- description: Simple ETL workflow with retry and timeout
129
- variables:
130
- database_url: postgresql://localhost/mydb
131
- chunk_size: 1000
132
- start_task: extract
133
- tasks:
134
- extract:
135
- task_id: extract
136
- operator_type: task
137
- function: etl.extract_data
138
- result_key: raw_data
139
- dependencies: []
140
- metadata: {}
141
-
142
- transform:
143
- task_id: transform
144
- operator_type: task
145
- function: etl.transform_data
146
- args: ["{{raw_data}}"]
147
- result_key: transformed_data
148
- dependencies: ["extract"]
149
- retry_policy:
150
- max_retries: 3
151
- delay: PT10S
152
- backoff_factor: 2.0
153
- metadata: {}
154
-
155
- load:
156
- task_id: load
157
- operator_type: task
158
- function: etl.load_data
159
- args: ["{{transformed_data}}"]
160
- dependencies: ["transform"]
161
- timeout_policy:
162
- timeout: PT30M
163
- kill_on_timeout: true
164
- metadata: {}
165
-
166
- wait_next:
167
- task_id: wait_next
168
- operator_type: wait
169
- wait_for: "P1D"
170
- dependencies: ["load"]
171
- metadata: {}
172
-
173
- cleanup:
174
- task_id: cleanup
175
- operator_type: task
176
- function: etl.cleanup
177
- dependencies: ["wait_next"]
178
- metadata: {}
179
- ```
180
-
181
- To load this YAML:
182
-
183
- ```python
184
- from highway_dsl import Workflow
185
-
186
- yaml_content = """
187
- # ... (yaml content from above)
188
- """
189
-
190
- workflow = Workflow.from_yaml(yaml_content)
191
- print(workflow.name)
192
- ```
193
-
194
- ## Development
195
-
196
- ### Running Tests
197
-
198
- To run the unit tests, navigate to the project root and execute:
199
-
200
- ```bash
201
- pytest
202
- ```
203
-
204
- ### Type Checking
205
-
206
- To perform static type checking with MyPy:
207
-
208
- ```bash
209
- mypy .
210
- ```
211
-
212
- ## Project Structure
213
-
214
- ```
215
- .highway/
216
- ├── highway_dsl/
217
- │ ├── __init__.py # Exposes the public API
218
- │ └── workflow_dsl.py # Core DSL definitions (Pydantic models)
219
- ├── example_usage.py # Examples of how to use the DSL
220
- ├── tests/
221
- │ ├── __init__.py
222
- │ ├── conftest.py # Pytest configuration
223
- │ └── test_workflow_dsl.py # Unit and integration tests
224
- ├── pyproject.toml # Project metadata and dependencies
225
- ├── README.md # This file
226
- └── SUMMARY.md # Summary of changes and future instructions
227
- ```
@@ -1,7 +0,0 @@
1
- highway_dsl/__init__.py,sha256=8qmPd9ZZNgwPGZuWwPYvMOljg73BJIT2SSM7iIRycmw,447
2
- highway_dsl/workflow_dsl.py,sha256=2QWDhbXLPulq_kTZk_Yjs6L3BNwws_H6EDV0S1CjOXs,9205
3
- highway_dsl-0.0.2.dist-info/licenses/LICENSE,sha256=qdFq1H66BvKg67mf4-WGpFwtG2u_dNknxuJDQ1_ubaY,1072
4
- highway_dsl-0.0.2.dist-info/METADATA,sha256=uLLXSVlLWM8H6F5wR1huiAtgXfkIVdmLV-XsYwZkW6s,6390
5
- highway_dsl-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- highway_dsl-0.0.2.dist-info/top_level.txt,sha256=_5uX-bbBsQ2rsi1XMr7WRyKbr6ack5GqVBcy-QjF1C8,12
7
- highway_dsl-0.0.2.dist-info/RECORD,,