flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/core/executor.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Executor Module - Execute pipeline steps with retry and error handling."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
import contextlib
|
|
6
|
+
from typing import Any
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ExecutionResult:
|
|
13
|
+
"""Result of step execution."""
|
|
14
|
+
|
|
15
|
+
step_name: str
|
|
16
|
+
success: bool
|
|
17
|
+
output: Any = None
|
|
18
|
+
error: str | None = None
|
|
19
|
+
duration_seconds: float = 0.0
|
|
20
|
+
cached: bool = False
|
|
21
|
+
skipped: bool = False
|
|
22
|
+
artifact_uri: str | None = None
|
|
23
|
+
retries: int = 0
|
|
24
|
+
timestamp: datetime = None
|
|
25
|
+
|
|
26
|
+
def __post_init__(self):
|
|
27
|
+
if self.timestamp is None:
|
|
28
|
+
self.timestamp = datetime.now()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Executor:
|
|
32
|
+
"""Base executor for running pipeline steps."""
|
|
33
|
+
|
|
34
|
+
def execute_step(
|
|
35
|
+
self,
|
|
36
|
+
step,
|
|
37
|
+
inputs: dict[str, Any],
|
|
38
|
+
context_params: dict[str, Any],
|
|
39
|
+
cache_store: Any | None = None,
|
|
40
|
+
) -> ExecutionResult:
|
|
41
|
+
"""Execute a single step.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
step: Step to execute
|
|
45
|
+
inputs: Input data for the step
|
|
46
|
+
context_params: Parameters from context
|
|
47
|
+
cache_store: Cache store for caching
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
ExecutionResult with output or error
|
|
51
|
+
"""
|
|
52
|
+
raise NotImplementedError
|
|
53
|
+
|
|
54
|
+
def execute_step_group(
|
|
55
|
+
self,
|
|
56
|
+
step_group, # StepGroup
|
|
57
|
+
inputs: dict[str, Any],
|
|
58
|
+
context_params: dict[str, Any],
|
|
59
|
+
cache_store: Any | None = None,
|
|
60
|
+
artifact_store: Any | None = None,
|
|
61
|
+
run_id: str | None = None,
|
|
62
|
+
project_name: str = "default",
|
|
63
|
+
) -> list[ExecutionResult]:
|
|
64
|
+
"""Execute a group of steps together.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
step_group: StepGroup to execute
|
|
68
|
+
inputs: Input data available to the group
|
|
69
|
+
context_params: Parameters from context
|
|
70
|
+
cache_store: Cache store for caching
|
|
71
|
+
artifact_store: Artifact store for materialization
|
|
72
|
+
run_id: Run identifier
|
|
73
|
+
project_name: Project name
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of ExecutionResult (one per step)
|
|
77
|
+
"""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LocalExecutor(Executor):
|
|
82
|
+
"""Local executor - runs steps in the current process."""
|
|
83
|
+
|
|
84
|
+
def execute_step(
|
|
85
|
+
self,
|
|
86
|
+
step,
|
|
87
|
+
inputs: dict[str, Any],
|
|
88
|
+
context_params: dict[str, Any],
|
|
89
|
+
cache_store: Any | None = None,
|
|
90
|
+
artifact_store: Any | None = None,
|
|
91
|
+
run_id: str | None = None,
|
|
92
|
+
project_name: str = "default",
|
|
93
|
+
) -> ExecutionResult:
|
|
94
|
+
"""Execute step locally with retry, caching, and materialization."""
|
|
95
|
+
start_time = time.time()
|
|
96
|
+
retries = 0
|
|
97
|
+
|
|
98
|
+
# Check condition
|
|
99
|
+
if step.condition:
|
|
100
|
+
try:
|
|
101
|
+
# We pass inputs and context params to condition if it accepts them
|
|
102
|
+
# For simplicity, let's try to inspect the condition function
|
|
103
|
+
# or just pass what we can.
|
|
104
|
+
# A simple approach: pass nothing if it takes no args, or kwargs if it does.
|
|
105
|
+
# But inspect is safer.
|
|
106
|
+
import inspect
|
|
107
|
+
|
|
108
|
+
sig = inspect.signature(step.condition)
|
|
109
|
+
kwargs = {**inputs, **context_params}
|
|
110
|
+
|
|
111
|
+
# Filter kwargs to only what condition accepts
|
|
112
|
+
cond_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
|
|
113
|
+
|
|
114
|
+
should_run = step.condition(**cond_kwargs)
|
|
115
|
+
|
|
116
|
+
if not should_run:
|
|
117
|
+
duration = time.time() - start_time
|
|
118
|
+
return ExecutionResult(
|
|
119
|
+
step_name=step.name,
|
|
120
|
+
success=True,
|
|
121
|
+
output=None, # Skipped steps produce None
|
|
122
|
+
duration_seconds=duration,
|
|
123
|
+
skipped=True,
|
|
124
|
+
)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
# If condition check fails, treat as step failure
|
|
127
|
+
duration = time.time() - start_time
|
|
128
|
+
return ExecutionResult(
|
|
129
|
+
step_name=step.name,
|
|
130
|
+
success=False,
|
|
131
|
+
error=f"Condition check failed: {str(e)}",
|
|
132
|
+
duration_seconds=duration,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Check cache
|
|
136
|
+
if cache_store and step.cache:
|
|
137
|
+
cache_key = step.get_cache_key(inputs)
|
|
138
|
+
cached_result = cache_store.get(cache_key)
|
|
139
|
+
|
|
140
|
+
if cached_result is not None:
|
|
141
|
+
duration = time.time() - start_time
|
|
142
|
+
return ExecutionResult(
|
|
143
|
+
step_name=step.name,
|
|
144
|
+
success=True,
|
|
145
|
+
output=cached_result,
|
|
146
|
+
duration_seconds=duration,
|
|
147
|
+
cached=True,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Execute with retry
|
|
151
|
+
max_retries = step.retry
|
|
152
|
+
last_error = None
|
|
153
|
+
|
|
154
|
+
for attempt in range(max_retries + 1):
|
|
155
|
+
try:
|
|
156
|
+
# Prepare arguments
|
|
157
|
+
kwargs = {**inputs, **context_params}
|
|
158
|
+
|
|
159
|
+
# Execute step
|
|
160
|
+
result = step.func(**kwargs)
|
|
161
|
+
|
|
162
|
+
# Materialize output if artifact store is available
|
|
163
|
+
artifact_uri = None
|
|
164
|
+
if artifact_store and result is not None and run_id:
|
|
165
|
+
with contextlib.suppress(Exception):
|
|
166
|
+
artifact_uri = artifact_store.materialize(
|
|
167
|
+
obj=result,
|
|
168
|
+
name="output", # Default name for single output
|
|
169
|
+
run_id=run_id,
|
|
170
|
+
step_name=step.name,
|
|
171
|
+
project_name=project_name,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Cache result
|
|
175
|
+
if cache_store and step.cache:
|
|
176
|
+
cache_key = step.get_cache_key(inputs)
|
|
177
|
+
cache_store.set_value(
|
|
178
|
+
cache_key,
|
|
179
|
+
result,
|
|
180
|
+
step.name,
|
|
181
|
+
step.get_code_hash(),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
duration = time.time() - start_time
|
|
185
|
+
return ExecutionResult(
|
|
186
|
+
step_name=step.name,
|
|
187
|
+
success=True,
|
|
188
|
+
output=result,
|
|
189
|
+
duration_seconds=duration,
|
|
190
|
+
retries=retries,
|
|
191
|
+
artifact_uri=artifact_uri,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
last_error = str(e)
|
|
196
|
+
retries += 1
|
|
197
|
+
|
|
198
|
+
if attempt < max_retries:
|
|
199
|
+
# Wait before retry (exponential backoff)
|
|
200
|
+
wait_time = 2**attempt
|
|
201
|
+
time.sleep(wait_time)
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
# All retries exhausted
|
|
205
|
+
duration = time.time() - start_time
|
|
206
|
+
return ExecutionResult(
|
|
207
|
+
step_name=step.name,
|
|
208
|
+
success=False,
|
|
209
|
+
error=f"{last_error}\n{traceback.format_exc()}",
|
|
210
|
+
duration_seconds=duration,
|
|
211
|
+
retries=retries,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Should never reach here
|
|
215
|
+
duration = time.time() - start_time
|
|
216
|
+
return ExecutionResult(
|
|
217
|
+
step_name=step.name,
|
|
218
|
+
success=False,
|
|
219
|
+
error=last_error,
|
|
220
|
+
duration_seconds=duration,
|
|
221
|
+
retries=retries,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def execute_step_group(
|
|
225
|
+
self,
|
|
226
|
+
step_group, # StepGroup from step_grouping module
|
|
227
|
+
inputs: dict[str, Any],
|
|
228
|
+
context_params: dict[str, Any],
|
|
229
|
+
cache_store: Any | None = None,
|
|
230
|
+
artifact_store: Any | None = None,
|
|
231
|
+
run_id: str | None = None,
|
|
232
|
+
project_name: str = "default",
|
|
233
|
+
) -> list[ExecutionResult]:
|
|
234
|
+
"""Execute a group of steps together in the same environment.
|
|
235
|
+
|
|
236
|
+
For local execution, steps execute sequentially but share the same process.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
step_group: StepGroup containing steps to execute
|
|
240
|
+
inputs: Input data available to the group
|
|
241
|
+
context_params: Parameters from context
|
|
242
|
+
cache_store: Cache store for caching
|
|
243
|
+
artifact_store: Artifact store for materialization
|
|
244
|
+
run_id: Run identifier
|
|
245
|
+
project_name: Project name
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
List of ExecutionResult (one per step in execution order)
|
|
249
|
+
"""
|
|
250
|
+
results: list[ExecutionResult] = []
|
|
251
|
+
step_outputs = dict(inputs) # Copy initial inputs
|
|
252
|
+
|
|
253
|
+
# Execute steps in their defined order
|
|
254
|
+
for step_name in step_group.execution_order:
|
|
255
|
+
# Find the step object
|
|
256
|
+
step = next(s for s in step_group.steps if s.name == step_name)
|
|
257
|
+
|
|
258
|
+
# Prepare inputs for this step
|
|
259
|
+
step_inputs = {}
|
|
260
|
+
for input_name in step.inputs:
|
|
261
|
+
if input_name in step_outputs:
|
|
262
|
+
step_inputs[input_name] = step_outputs[input_name]
|
|
263
|
+
|
|
264
|
+
# Execute this step
|
|
265
|
+
result = self.execute_step(
|
|
266
|
+
step=step,
|
|
267
|
+
inputs=step_inputs,
|
|
268
|
+
context_params=context_params,
|
|
269
|
+
cache_store=cache_store,
|
|
270
|
+
artifact_store=artifact_store,
|
|
271
|
+
run_id=run_id,
|
|
272
|
+
project_name=project_name,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
results.append(result)
|
|
276
|
+
|
|
277
|
+
# If step failed, stop group execution
|
|
278
|
+
if not result.success:
|
|
279
|
+
# Mark remaining steps as skipped
|
|
280
|
+
current_index = step_group.execution_order.index(step_name)
|
|
281
|
+
remaining_steps = step_group.execution_order[current_index + 1 :]
|
|
282
|
+
|
|
283
|
+
for remaining_name in remaining_steps:
|
|
284
|
+
skip_result = ExecutionResult(
|
|
285
|
+
step_name=remaining_name,
|
|
286
|
+
success=True, # Set to True since skipped steps technically don't fail
|
|
287
|
+
error="Skipped due to earlier failure in group",
|
|
288
|
+
skipped=True,
|
|
289
|
+
)
|
|
290
|
+
results.append(skip_result)
|
|
291
|
+
break
|
|
292
|
+
|
|
293
|
+
# Store outputs for next steps in group
|
|
294
|
+
if result.output is not None:
|
|
295
|
+
if len(step.outputs) == 1:
|
|
296
|
+
step_outputs[step.outputs[0]] = result.output
|
|
297
|
+
elif isinstance(result.output, (list, tuple)) and len(result.output) == len(step.outputs):
|
|
298
|
+
for name, val in zip(step.outputs, result.output, strict=False):
|
|
299
|
+
step_outputs[name] = val
|
|
300
|
+
elif isinstance(result.output, dict):
|
|
301
|
+
for name in step.outputs:
|
|
302
|
+
if name in result.output:
|
|
303
|
+
step_outputs[name] = result.output[name]
|
|
304
|
+
else:
|
|
305
|
+
if step.outputs:
|
|
306
|
+
step_outputs[step.outputs[0]] = result.output
|
|
307
|
+
|
|
308
|
+
return results
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class DistributedExecutor(Executor):
|
|
312
|
+
"""Distributed executor - runs steps on remote workers.
|
|
313
|
+
(Placeholder for future implementation).
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
def __init__(self, worker_pool_size: int = 4):
|
|
317
|
+
self.worker_pool_size = worker_pool_size
|
|
318
|
+
|
|
319
|
+
def execute_step(
|
|
320
|
+
self,
|
|
321
|
+
step,
|
|
322
|
+
inputs: dict[str, Any],
|
|
323
|
+
context_params: dict[str, Any],
|
|
324
|
+
cache_store: Any | None = None,
|
|
325
|
+
) -> ExecutionResult:
|
|
326
|
+
"""Execute step in distributed manner."""
|
|
327
|
+
# Placeholder - would use Ray, Dask, or similar
|
|
328
|
+
# For now, fall back to local execution
|
|
329
|
+
local_executor = LocalExecutor()
|
|
330
|
+
return local_executor.execute_step(step, inputs, context_params, cache_store)
|
|
331
|
+
|
|
332
|
+
def execute_step_group(
|
|
333
|
+
self,
|
|
334
|
+
step_group, # StepGroup
|
|
335
|
+
inputs: dict[str, Any],
|
|
336
|
+
context_params: dict[str, Any],
|
|
337
|
+
cache_store: Any | None = None,
|
|
338
|
+
artifact_store: Any | None = None,
|
|
339
|
+
run_id: str | None = None,
|
|
340
|
+
project_name: str = "default",
|
|
341
|
+
) -> list[ExecutionResult]:
|
|
342
|
+
"""Execute step group in distributed manner."""
|
|
343
|
+
# Placeholder - in real implementation, would send entire group to remote worker
|
|
344
|
+
# For now, fall back to local execution
|
|
345
|
+
local_executor = LocalExecutor()
|
|
346
|
+
return local_executor.execute_step_group(
|
|
347
|
+
step_group,
|
|
348
|
+
inputs,
|
|
349
|
+
context_params,
|
|
350
|
+
cache_store,
|
|
351
|
+
artifact_store,
|
|
352
|
+
run_id,
|
|
353
|
+
project_name,
|
|
354
|
+
)
|
flowyml/core/graph.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Graph Module - DAG construction and analysis for pipelines."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict, deque
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Node:
|
|
9
|
+
"""A node in the pipeline DAG."""
|
|
10
|
+
|
|
11
|
+
name: str
|
|
12
|
+
step: any # Step object
|
|
13
|
+
inputs: list[str]
|
|
14
|
+
outputs: list[str]
|
|
15
|
+
|
|
16
|
+
def __hash__(self):
|
|
17
|
+
return hash(self.name)
|
|
18
|
+
|
|
19
|
+
def __eq__(self, other):
|
|
20
|
+
return isinstance(other, Node) and self.name == other.name
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DAG:
|
|
24
|
+
"""Directed Acyclic Graph for pipeline execution planning."""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
self.nodes: dict[str, Node] = {}
|
|
28
|
+
self.edges: dict[str, set[str]] = defaultdict(set) # node -> dependencies
|
|
29
|
+
self.reverse_edges: dict[str, set[str]] = defaultdict(set) # node -> dependents
|
|
30
|
+
self.asset_producers: dict[str, str] = {} # asset -> producing node
|
|
31
|
+
self.asset_consumers: dict[str, set[str]] = defaultdict(set) # asset -> consuming nodes
|
|
32
|
+
|
|
33
|
+
def add_node(self, node: Node) -> None:
|
|
34
|
+
"""Add a node to the graph."""
|
|
35
|
+
self.nodes[node.name] = node
|
|
36
|
+
|
|
37
|
+
# Track asset production
|
|
38
|
+
for output in node.outputs:
|
|
39
|
+
self.asset_producers[output] = node.name
|
|
40
|
+
|
|
41
|
+
# Track asset consumption
|
|
42
|
+
for input_asset in node.inputs:
|
|
43
|
+
self.asset_consumers[input_asset].add(node.name)
|
|
44
|
+
|
|
45
|
+
def build_edges(self) -> None:
|
|
46
|
+
"""Build edges based on asset dependencies."""
|
|
47
|
+
for node_name, node in self.nodes.items():
|
|
48
|
+
for input_asset in node.inputs:
|
|
49
|
+
if input_asset in self.asset_producers:
|
|
50
|
+
producer = self.asset_producers[input_asset]
|
|
51
|
+
self.edges[node_name].add(producer)
|
|
52
|
+
self.reverse_edges[producer].add(node_name)
|
|
53
|
+
|
|
54
|
+
def topological_sort(self) -> list[Node]:
|
|
55
|
+
"""Return nodes in topological order (dependencies before dependents).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
List of nodes in execution order
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
ValueError: If graph contains cycles
|
|
62
|
+
"""
|
|
63
|
+
in_degree = {name: len(deps) for name, deps in self.edges.items()}
|
|
64
|
+
|
|
65
|
+
# Add nodes with no dependencies
|
|
66
|
+
for name in self.nodes:
|
|
67
|
+
if name not in in_degree:
|
|
68
|
+
in_degree[name] = 0
|
|
69
|
+
|
|
70
|
+
# Find starting nodes (no dependencies)
|
|
71
|
+
queue = deque([name for name, degree in in_degree.items() if degree == 0])
|
|
72
|
+
result = []
|
|
73
|
+
|
|
74
|
+
while queue:
|
|
75
|
+
node_name = queue.popleft()
|
|
76
|
+
result.append(self.nodes[node_name])
|
|
77
|
+
|
|
78
|
+
# Reduce in-degree for dependent nodes
|
|
79
|
+
for dependent in self.reverse_edges[node_name]:
|
|
80
|
+
in_degree[dependent] -= 1
|
|
81
|
+
if in_degree[dependent] == 0:
|
|
82
|
+
queue.append(dependent)
|
|
83
|
+
|
|
84
|
+
if len(result) != len(self.nodes):
|
|
85
|
+
raise ValueError("Pipeline contains cycles!")
|
|
86
|
+
|
|
87
|
+
return result
|
|
88
|
+
|
|
89
|
+
def get_dependencies(self, node_name: str) -> set[str]:
|
|
90
|
+
"""Get direct dependencies of a node."""
|
|
91
|
+
return self.edges.get(node_name, set())
|
|
92
|
+
|
|
93
|
+
def get_dependents(self, node_name: str) -> set[str]:
|
|
94
|
+
"""Get direct dependents of a node."""
|
|
95
|
+
return self.reverse_edges.get(node_name, set())
|
|
96
|
+
|
|
97
|
+
def get_all_dependencies(self, node_name: str) -> set[str]:
|
|
98
|
+
"""Get all transitive dependencies of a node."""
|
|
99
|
+
visited = set()
|
|
100
|
+
queue = deque([node_name])
|
|
101
|
+
|
|
102
|
+
while queue:
|
|
103
|
+
current = queue.popleft()
|
|
104
|
+
if current in visited:
|
|
105
|
+
continue
|
|
106
|
+
visited.add(current)
|
|
107
|
+
|
|
108
|
+
for dep in self.edges.get(current, set()):
|
|
109
|
+
if dep not in visited:
|
|
110
|
+
queue.append(dep)
|
|
111
|
+
|
|
112
|
+
visited.discard(node_name)
|
|
113
|
+
return visited
|
|
114
|
+
|
|
115
|
+
def get_all_dependents(self, node_name: str) -> set[str]:
|
|
116
|
+
"""Get all transitive dependents of a node."""
|
|
117
|
+
visited = set()
|
|
118
|
+
queue = deque([node_name])
|
|
119
|
+
|
|
120
|
+
while queue:
|
|
121
|
+
current = queue.popleft()
|
|
122
|
+
if current in visited:
|
|
123
|
+
continue
|
|
124
|
+
visited.add(current)
|
|
125
|
+
|
|
126
|
+
for dep in self.reverse_edges.get(current, set()):
|
|
127
|
+
if dep not in visited:
|
|
128
|
+
queue.append(dep)
|
|
129
|
+
|
|
130
|
+
visited.discard(node_name)
|
|
131
|
+
return visited
|
|
132
|
+
|
|
133
|
+
def validate(self) -> list[str]:
|
|
134
|
+
"""Validate the graph for common issues.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of validation errors (empty if valid)
|
|
138
|
+
"""
|
|
139
|
+
errors = []
|
|
140
|
+
|
|
141
|
+
# Check for undefined inputs
|
|
142
|
+
for node_name, node in self.nodes.items():
|
|
143
|
+
for input_asset in node.inputs:
|
|
144
|
+
if input_asset not in self.asset_producers:
|
|
145
|
+
errors.append(
|
|
146
|
+
f"Node '{node_name}' requires undefined asset '{input_asset}'",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Check for cycles
|
|
150
|
+
try:
|
|
151
|
+
self.topological_sort()
|
|
152
|
+
except ValueError as e:
|
|
153
|
+
errors.append(str(e))
|
|
154
|
+
|
|
155
|
+
# Check for duplicate outputs
|
|
156
|
+
output_counts = defaultdict(int)
|
|
157
|
+
for node in self.nodes.values():
|
|
158
|
+
for output in node.outputs:
|
|
159
|
+
output_counts[output] += 1
|
|
160
|
+
|
|
161
|
+
for output, count in output_counts.items():
|
|
162
|
+
if count > 1:
|
|
163
|
+
errors.append(f"Multiple nodes produce asset '{output}'")
|
|
164
|
+
|
|
165
|
+
return errors
|
|
166
|
+
|
|
167
|
+
def visualize(self) -> str:
|
|
168
|
+
"""Generate a simple text visualization of the DAG."""
|
|
169
|
+
lines = ["Pipeline DAG:"]
|
|
170
|
+
lines.append("=" * 50)
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
sorted_nodes = self.topological_sort()
|
|
174
|
+
for i, node in enumerate(sorted_nodes, 1):
|
|
175
|
+
deps = self.get_dependencies(node.name)
|
|
176
|
+
deps_str = ", ".join(deps) if deps else "none"
|
|
177
|
+
lines.append(f"{i}. {node.name}")
|
|
178
|
+
lines.append(f" Inputs: {node.inputs}")
|
|
179
|
+
lines.append(f" Outputs: {node.outputs}")
|
|
180
|
+
lines.append(f" Dependencies: {deps_str}")
|
|
181
|
+
lines.append("")
|
|
182
|
+
except ValueError as e:
|
|
183
|
+
lines.append(f"ERROR: {e}")
|
|
184
|
+
|
|
185
|
+
return "\n".join(lines)
|