flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,354 @@
1
+ """Executor Module - Execute pipeline steps with retry and error handling."""
2
+
3
+ import time
4
+ import traceback
5
+ import contextlib
6
+ from typing import Any
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+
10
+
11
+ @dataclass
12
+ class ExecutionResult:
13
+ """Result of step execution."""
14
+
15
+ step_name: str
16
+ success: bool
17
+ output: Any = None
18
+ error: str | None = None
19
+ duration_seconds: float = 0.0
20
+ cached: bool = False
21
+ skipped: bool = False
22
+ artifact_uri: str | None = None
23
+ retries: int = 0
24
+ timestamp: datetime = None
25
+
26
+ def __post_init__(self):
27
+ if self.timestamp is None:
28
+ self.timestamp = datetime.now()
29
+
30
+
31
+ class Executor:
32
+ """Base executor for running pipeline steps."""
33
+
34
+ def execute_step(
35
+ self,
36
+ step,
37
+ inputs: dict[str, Any],
38
+ context_params: dict[str, Any],
39
+ cache_store: Any | None = None,
40
+ ) -> ExecutionResult:
41
+ """Execute a single step.
42
+
43
+ Args:
44
+ step: Step to execute
45
+ inputs: Input data for the step
46
+ context_params: Parameters from context
47
+ cache_store: Cache store for caching
48
+
49
+ Returns:
50
+ ExecutionResult with output or error
51
+ """
52
+ raise NotImplementedError
53
+
54
+ def execute_step_group(
55
+ self,
56
+ step_group, # StepGroup
57
+ inputs: dict[str, Any],
58
+ context_params: dict[str, Any],
59
+ cache_store: Any | None = None,
60
+ artifact_store: Any | None = None,
61
+ run_id: str | None = None,
62
+ project_name: str = "default",
63
+ ) -> list[ExecutionResult]:
64
+ """Execute a group of steps together.
65
+
66
+ Args:
67
+ step_group: StepGroup to execute
68
+ inputs: Input data available to the group
69
+ context_params: Parameters from context
70
+ cache_store: Cache store for caching
71
+ artifact_store: Artifact store for materialization
72
+ run_id: Run identifier
73
+ project_name: Project name
74
+
75
+ Returns:
76
+ List of ExecutionResult (one per step)
77
+ """
78
+ raise NotImplementedError
79
+
80
+
81
+ class LocalExecutor(Executor):
82
+ """Local executor - runs steps in the current process."""
83
+
84
+ def execute_step(
85
+ self,
86
+ step,
87
+ inputs: dict[str, Any],
88
+ context_params: dict[str, Any],
89
+ cache_store: Any | None = None,
90
+ artifact_store: Any | None = None,
91
+ run_id: str | None = None,
92
+ project_name: str = "default",
93
+ ) -> ExecutionResult:
94
+ """Execute step locally with retry, caching, and materialization."""
95
+ start_time = time.time()
96
+ retries = 0
97
+
98
+ # Check condition
99
+ if step.condition:
100
+ try:
101
+ # We pass inputs and context params to condition if it accepts them
102
+ # For simplicity, let's try to inspect the condition function
103
+ # or just pass what we can.
104
+ # A simple approach: pass nothing if it takes no args, or kwargs if it does.
105
+ # But inspect is safer.
106
+ import inspect
107
+
108
+ sig = inspect.signature(step.condition)
109
+ kwargs = {**inputs, **context_params}
110
+
111
+ # Filter kwargs to only what condition accepts
112
+ cond_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
113
+
114
+ should_run = step.condition(**cond_kwargs)
115
+
116
+ if not should_run:
117
+ duration = time.time() - start_time
118
+ return ExecutionResult(
119
+ step_name=step.name,
120
+ success=True,
121
+ output=None, # Skipped steps produce None
122
+ duration_seconds=duration,
123
+ skipped=True,
124
+ )
125
+ except Exception as e:
126
+ # If condition check fails, treat as step failure
127
+ duration = time.time() - start_time
128
+ return ExecutionResult(
129
+ step_name=step.name,
130
+ success=False,
131
+ error=f"Condition check failed: {str(e)}",
132
+ duration_seconds=duration,
133
+ )
134
+
135
+ # Check cache
136
+ if cache_store and step.cache:
137
+ cache_key = step.get_cache_key(inputs)
138
+ cached_result = cache_store.get(cache_key)
139
+
140
+ if cached_result is not None:
141
+ duration = time.time() - start_time
142
+ return ExecutionResult(
143
+ step_name=step.name,
144
+ success=True,
145
+ output=cached_result,
146
+ duration_seconds=duration,
147
+ cached=True,
148
+ )
149
+
150
+ # Execute with retry
151
+ max_retries = step.retry
152
+ last_error = None
153
+
154
+ for attempt in range(max_retries + 1):
155
+ try:
156
+ # Prepare arguments
157
+ kwargs = {**inputs, **context_params}
158
+
159
+ # Execute step
160
+ result = step.func(**kwargs)
161
+
162
+ # Materialize output if artifact store is available
163
+ artifact_uri = None
164
+ if artifact_store and result is not None and run_id:
165
+ with contextlib.suppress(Exception):
166
+ artifact_uri = artifact_store.materialize(
167
+ obj=result,
168
+ name="output", # Default name for single output
169
+ run_id=run_id,
170
+ step_name=step.name,
171
+ project_name=project_name,
172
+ )
173
+
174
+ # Cache result
175
+ if cache_store and step.cache:
176
+ cache_key = step.get_cache_key(inputs)
177
+ cache_store.set_value(
178
+ cache_key,
179
+ result,
180
+ step.name,
181
+ step.get_code_hash(),
182
+ )
183
+
184
+ duration = time.time() - start_time
185
+ return ExecutionResult(
186
+ step_name=step.name,
187
+ success=True,
188
+ output=result,
189
+ duration_seconds=duration,
190
+ retries=retries,
191
+ artifact_uri=artifact_uri,
192
+ )
193
+
194
+ except Exception as e:
195
+ last_error = str(e)
196
+ retries += 1
197
+
198
+ if attempt < max_retries:
199
+ # Wait before retry (exponential backoff)
200
+ wait_time = 2**attempt
201
+ time.sleep(wait_time)
202
+ continue
203
+
204
+ # All retries exhausted
205
+ duration = time.time() - start_time
206
+ return ExecutionResult(
207
+ step_name=step.name,
208
+ success=False,
209
+ error=f"{last_error}\n{traceback.format_exc()}",
210
+ duration_seconds=duration,
211
+ retries=retries,
212
+ )
213
+
214
+ # Should never reach here
215
+ duration = time.time() - start_time
216
+ return ExecutionResult(
217
+ step_name=step.name,
218
+ success=False,
219
+ error=last_error,
220
+ duration_seconds=duration,
221
+ retries=retries,
222
+ )
223
+
224
+ def execute_step_group(
225
+ self,
226
+ step_group, # StepGroup from step_grouping module
227
+ inputs: dict[str, Any],
228
+ context_params: dict[str, Any],
229
+ cache_store: Any | None = None,
230
+ artifact_store: Any | None = None,
231
+ run_id: str | None = None,
232
+ project_name: str = "default",
233
+ ) -> list[ExecutionResult]:
234
+ """Execute a group of steps together in the same environment.
235
+
236
+ For local execution, steps execute sequentially but share the same process.
237
+
238
+ Args:
239
+ step_group: StepGroup containing steps to execute
240
+ inputs: Input data available to the group
241
+ context_params: Parameters from context
242
+ cache_store: Cache store for caching
243
+ artifact_store: Artifact store for materialization
244
+ run_id: Run identifier
245
+ project_name: Project name
246
+
247
+ Returns:
248
+ List of ExecutionResult (one per step in execution order)
249
+ """
250
+ results: list[ExecutionResult] = []
251
+ step_outputs = dict(inputs) # Copy initial inputs
252
+
253
+ # Execute steps in their defined order
254
+ for step_name in step_group.execution_order:
255
+ # Find the step object
256
+ step = next(s for s in step_group.steps if s.name == step_name)
257
+
258
+ # Prepare inputs for this step
259
+ step_inputs = {}
260
+ for input_name in step.inputs:
261
+ if input_name in step_outputs:
262
+ step_inputs[input_name] = step_outputs[input_name]
263
+
264
+ # Execute this step
265
+ result = self.execute_step(
266
+ step=step,
267
+ inputs=step_inputs,
268
+ context_params=context_params,
269
+ cache_store=cache_store,
270
+ artifact_store=artifact_store,
271
+ run_id=run_id,
272
+ project_name=project_name,
273
+ )
274
+
275
+ results.append(result)
276
+
277
+ # If step failed, stop group execution
278
+ if not result.success:
279
+ # Mark remaining steps as skipped
280
+ current_index = step_group.execution_order.index(step_name)
281
+ remaining_steps = step_group.execution_order[current_index + 1 :]
282
+
283
+ for remaining_name in remaining_steps:
284
+ skip_result = ExecutionResult(
285
+ step_name=remaining_name,
286
+ success=True, # Set to True since skipped steps technically don't fail
287
+ error="Skipped due to earlier failure in group",
288
+ skipped=True,
289
+ )
290
+ results.append(skip_result)
291
+ break
292
+
293
+ # Store outputs for next steps in group
294
+ if result.output is not None:
295
+ if len(step.outputs) == 1:
296
+ step_outputs[step.outputs[0]] = result.output
297
+ elif isinstance(result.output, (list, tuple)) and len(result.output) == len(step.outputs):
298
+ for name, val in zip(step.outputs, result.output, strict=False):
299
+ step_outputs[name] = val
300
+ elif isinstance(result.output, dict):
301
+ for name in step.outputs:
302
+ if name in result.output:
303
+ step_outputs[name] = result.output[name]
304
+ else:
305
+ if step.outputs:
306
+ step_outputs[step.outputs[0]] = result.output
307
+
308
+ return results
309
+
310
+
311
+ class DistributedExecutor(Executor):
312
+ """Distributed executor - runs steps on remote workers.
313
+ (Placeholder for future implementation).
314
+ """
315
+
316
+ def __init__(self, worker_pool_size: int = 4):
317
+ self.worker_pool_size = worker_pool_size
318
+
319
+ def execute_step(
320
+ self,
321
+ step,
322
+ inputs: dict[str, Any],
323
+ context_params: dict[str, Any],
324
+ cache_store: Any | None = None,
325
+ ) -> ExecutionResult:
326
+ """Execute step in distributed manner."""
327
+ # Placeholder - would use Ray, Dask, or similar
328
+ # For now, fall back to local execution
329
+ local_executor = LocalExecutor()
330
+ return local_executor.execute_step(step, inputs, context_params, cache_store)
331
+
332
+ def execute_step_group(
333
+ self,
334
+ step_group, # StepGroup
335
+ inputs: dict[str, Any],
336
+ context_params: dict[str, Any],
337
+ cache_store: Any | None = None,
338
+ artifact_store: Any | None = None,
339
+ run_id: str | None = None,
340
+ project_name: str = "default",
341
+ ) -> list[ExecutionResult]:
342
+ """Execute step group in distributed manner."""
343
+ # Placeholder - in real implementation, would send entire group to remote worker
344
+ # For now, fall back to local execution
345
+ local_executor = LocalExecutor()
346
+ return local_executor.execute_step_group(
347
+ step_group,
348
+ inputs,
349
+ context_params,
350
+ cache_store,
351
+ artifact_store,
352
+ run_id,
353
+ project_name,
354
+ )
flowyml/core/graph.py ADDED
@@ -0,0 +1,185 @@
1
+ """Graph Module - DAG construction and analysis for pipelines."""
2
+
3
+ from collections import defaultdict, deque
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class Node:
9
+ """A node in the pipeline DAG."""
10
+
11
+ name: str
12
+ step: any # Step object
13
+ inputs: list[str]
14
+ outputs: list[str]
15
+
16
+ def __hash__(self):
17
+ return hash(self.name)
18
+
19
+ def __eq__(self, other):
20
+ return isinstance(other, Node) and self.name == other.name
21
+
22
+
23
+ class DAG:
24
+ """Directed Acyclic Graph for pipeline execution planning."""
25
+
26
+ def __init__(self):
27
+ self.nodes: dict[str, Node] = {}
28
+ self.edges: dict[str, set[str]] = defaultdict(set) # node -> dependencies
29
+ self.reverse_edges: dict[str, set[str]] = defaultdict(set) # node -> dependents
30
+ self.asset_producers: dict[str, str] = {} # asset -> producing node
31
+ self.asset_consumers: dict[str, set[str]] = defaultdict(set) # asset -> consuming nodes
32
+
33
+ def add_node(self, node: Node) -> None:
34
+ """Add a node to the graph."""
35
+ self.nodes[node.name] = node
36
+
37
+ # Track asset production
38
+ for output in node.outputs:
39
+ self.asset_producers[output] = node.name
40
+
41
+ # Track asset consumption
42
+ for input_asset in node.inputs:
43
+ self.asset_consumers[input_asset].add(node.name)
44
+
45
+ def build_edges(self) -> None:
46
+ """Build edges based on asset dependencies."""
47
+ for node_name, node in self.nodes.items():
48
+ for input_asset in node.inputs:
49
+ if input_asset in self.asset_producers:
50
+ producer = self.asset_producers[input_asset]
51
+ self.edges[node_name].add(producer)
52
+ self.reverse_edges[producer].add(node_name)
53
+
54
+ def topological_sort(self) -> list[Node]:
55
+ """Return nodes in topological order (dependencies before dependents).
56
+
57
+ Returns:
58
+ List of nodes in execution order
59
+
60
+ Raises:
61
+ ValueError: If graph contains cycles
62
+ """
63
+ in_degree = {name: len(deps) for name, deps in self.edges.items()}
64
+
65
+ # Add nodes with no dependencies
66
+ for name in self.nodes:
67
+ if name not in in_degree:
68
+ in_degree[name] = 0
69
+
70
+ # Find starting nodes (no dependencies)
71
+ queue = deque([name for name, degree in in_degree.items() if degree == 0])
72
+ result = []
73
+
74
+ while queue:
75
+ node_name = queue.popleft()
76
+ result.append(self.nodes[node_name])
77
+
78
+ # Reduce in-degree for dependent nodes
79
+ for dependent in self.reverse_edges[node_name]:
80
+ in_degree[dependent] -= 1
81
+ if in_degree[dependent] == 0:
82
+ queue.append(dependent)
83
+
84
+ if len(result) != len(self.nodes):
85
+ raise ValueError("Pipeline contains cycles!")
86
+
87
+ return result
88
+
89
+ def get_dependencies(self, node_name: str) -> set[str]:
90
+ """Get direct dependencies of a node."""
91
+ return self.edges.get(node_name, set())
92
+
93
+ def get_dependents(self, node_name: str) -> set[str]:
94
+ """Get direct dependents of a node."""
95
+ return self.reverse_edges.get(node_name, set())
96
+
97
+ def get_all_dependencies(self, node_name: str) -> set[str]:
98
+ """Get all transitive dependencies of a node."""
99
+ visited = set()
100
+ queue = deque([node_name])
101
+
102
+ while queue:
103
+ current = queue.popleft()
104
+ if current in visited:
105
+ continue
106
+ visited.add(current)
107
+
108
+ for dep in self.edges.get(current, set()):
109
+ if dep not in visited:
110
+ queue.append(dep)
111
+
112
+ visited.discard(node_name)
113
+ return visited
114
+
115
+ def get_all_dependents(self, node_name: str) -> set[str]:
116
+ """Get all transitive dependents of a node."""
117
+ visited = set()
118
+ queue = deque([node_name])
119
+
120
+ while queue:
121
+ current = queue.popleft()
122
+ if current in visited:
123
+ continue
124
+ visited.add(current)
125
+
126
+ for dep in self.reverse_edges.get(current, set()):
127
+ if dep not in visited:
128
+ queue.append(dep)
129
+
130
+ visited.discard(node_name)
131
+ return visited
132
+
133
+ def validate(self) -> list[str]:
134
+ """Validate the graph for common issues.
135
+
136
+ Returns:
137
+ List of validation errors (empty if valid)
138
+ """
139
+ errors = []
140
+
141
+ # Check for undefined inputs
142
+ for node_name, node in self.nodes.items():
143
+ for input_asset in node.inputs:
144
+ if input_asset not in self.asset_producers:
145
+ errors.append(
146
+ f"Node '{node_name}' requires undefined asset '{input_asset}'",
147
+ )
148
+
149
+ # Check for cycles
150
+ try:
151
+ self.topological_sort()
152
+ except ValueError as e:
153
+ errors.append(str(e))
154
+
155
+ # Check for duplicate outputs
156
+ output_counts = defaultdict(int)
157
+ for node in self.nodes.values():
158
+ for output in node.outputs:
159
+ output_counts[output] += 1
160
+
161
+ for output, count in output_counts.items():
162
+ if count > 1:
163
+ errors.append(f"Multiple nodes produce asset '{output}'")
164
+
165
+ return errors
166
+
167
+ def visualize(self) -> str:
168
+ """Generate a simple text visualization of the DAG."""
169
+ lines = ["Pipeline DAG:"]
170
+ lines.append("=" * 50)
171
+
172
+ try:
173
+ sorted_nodes = self.topological_sort()
174
+ for i, node in enumerate(sorted_nodes, 1):
175
+ deps = self.get_dependencies(node.name)
176
+ deps_str = ", ".join(deps) if deps else "none"
177
+ lines.append(f"{i}. {node.name}")
178
+ lines.append(f" Inputs: {node.inputs}")
179
+ lines.append(f" Outputs: {node.outputs}")
180
+ lines.append(f" Dependencies: {deps_str}")
181
+ lines.append("")
182
+ except ValueError as e:
183
+ lines.append(f"ERROR: {e}")
184
+
185
+ return "\n".join(lines)