flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/core/pipeline.py
ADDED
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
"""Pipeline Module - Main orchestration for ML pipelines."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from flowyml.core.context import Context
|
|
9
|
+
from flowyml.core.step import Step
|
|
10
|
+
from flowyml.core.graph import DAG, Node
|
|
11
|
+
from flowyml.core.executor import Executor, LocalExecutor, ExecutionResult
|
|
12
|
+
from flowyml.core.cache import CacheStore
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PipelineResult:
|
|
16
|
+
"""Result of pipeline execution."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, run_id: str, pipeline_name: str):
|
|
19
|
+
self.run_id = run_id
|
|
20
|
+
self.pipeline_name = pipeline_name
|
|
21
|
+
self.success = False
|
|
22
|
+
self.step_results: dict[str, ExecutionResult] = {}
|
|
23
|
+
self.outputs: dict[str, Any] = {}
|
|
24
|
+
self.start_time = datetime.now()
|
|
25
|
+
self.end_time: datetime | None = None
|
|
26
|
+
self.duration_seconds: float = 0.0
|
|
27
|
+
|
|
28
|
+
def add_step_result(self, result: ExecutionResult) -> None:
|
|
29
|
+
"""Add result from a step execution."""
|
|
30
|
+
self.step_results[result.step_name] = result
|
|
31
|
+
|
|
32
|
+
# Track outputs
|
|
33
|
+
if result.success and result.output is not None:
|
|
34
|
+
# Assuming single output for simplicity
|
|
35
|
+
self.outputs[result.step_name] = result.output
|
|
36
|
+
|
|
37
|
+
def finalize(self, success: bool) -> None:
|
|
38
|
+
"""Mark pipeline as complete."""
|
|
39
|
+
self.success = success
|
|
40
|
+
self.end_time = datetime.now()
|
|
41
|
+
self.duration_seconds = (self.end_time - self.start_time).total_seconds()
|
|
42
|
+
|
|
43
|
+
def __getitem__(self, key: str) -> Any:
|
|
44
|
+
"""Allow dict-style access to outputs."""
|
|
45
|
+
return self.outputs.get(key)
|
|
46
|
+
|
|
47
|
+
def to_dict(self) -> dict[str, Any]:
|
|
48
|
+
"""Convert to dictionary."""
|
|
49
|
+
return {
|
|
50
|
+
"run_id": self.run_id,
|
|
51
|
+
"pipeline_name": self.pipeline_name,
|
|
52
|
+
"success": self.success,
|
|
53
|
+
"start_time": self.start_time.isoformat(),
|
|
54
|
+
"end_time": self.end_time.isoformat() if self.end_time else None,
|
|
55
|
+
"duration_seconds": self.duration_seconds,
|
|
56
|
+
"steps": {
|
|
57
|
+
name: {
|
|
58
|
+
"success": result.success,
|
|
59
|
+
"duration": result.duration_seconds,
|
|
60
|
+
"cached": result.cached,
|
|
61
|
+
"retries": result.retries,
|
|
62
|
+
"error": result.error,
|
|
63
|
+
}
|
|
64
|
+
for name, result in self.step_results.items()
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def summary(self) -> str:
|
|
69
|
+
"""Generate execution summary."""
|
|
70
|
+
lines = [
|
|
71
|
+
f"Pipeline: {self.pipeline_name}",
|
|
72
|
+
f"Run ID: {self.run_id}",
|
|
73
|
+
f"Status: {'✓ SUCCESS' if self.success else '✗ FAILED'}",
|
|
74
|
+
f"Duration: {self.duration_seconds:.2f}s",
|
|
75
|
+
"",
|
|
76
|
+
"Steps:",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
for name, result in self.step_results.items():
|
|
80
|
+
status = "✓" if result.success else "✗"
|
|
81
|
+
cached = " (cached)" if result.cached else ""
|
|
82
|
+
retries = f" [{result.retries} retries]" if result.retries > 0 else ""
|
|
83
|
+
lines.append(
|
|
84
|
+
f" {status} {name}: {result.duration_seconds:.2f}s{cached}{retries}",
|
|
85
|
+
)
|
|
86
|
+
if result.error:
|
|
87
|
+
lines.append(f" Error: {result.error.split(chr(10))[0]}")
|
|
88
|
+
|
|
89
|
+
return "\n".join(lines)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Pipeline:
|
|
93
|
+
"""Main pipeline class for orchestrating ML workflows.
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
>>> from flowyml import Pipeline, step, context
|
|
97
|
+
>>> ctx = context(learning_rate=0.001, epochs=10)
|
|
98
|
+
>>> @step(outputs=["model/trained"])
|
|
99
|
+
... def train(learning_rate: float, epochs: int):
|
|
100
|
+
... return train_model(learning_rate, epochs)
|
|
101
|
+
>>> pipeline = Pipeline("my_pipeline", context=ctx)
|
|
102
|
+
>>> pipeline.add_step(train)
|
|
103
|
+
>>> result = pipeline.run()
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
name: str,
|
|
109
|
+
context: Context | None = None,
|
|
110
|
+
executor: Executor | None = None,
|
|
111
|
+
enable_cache: bool = True,
|
|
112
|
+
cache_dir: str | None = None,
|
|
113
|
+
stack: Any | None = None, # Stack instance
|
|
114
|
+
project: str | None = None, # Project name to attach to
|
|
115
|
+
):
|
|
116
|
+
"""Initialize pipeline.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
name: Name of the pipeline
|
|
120
|
+
context: Optional context for parameter injection
|
|
121
|
+
executor: Optional executor (defaults to LocalExecutor)
|
|
122
|
+
enable_cache: Whether to enable caching
|
|
123
|
+
cache_dir: Optional directory for cache
|
|
124
|
+
stack: Optional stack instance to run on
|
|
125
|
+
project: Optional project name to attach this pipeline to.
|
|
126
|
+
"""
|
|
127
|
+
self.name = name
|
|
128
|
+
self.context = context or Context()
|
|
129
|
+
self.enable_cache = enable_cache
|
|
130
|
+
self.stack = stack # Store stack instance
|
|
131
|
+
|
|
132
|
+
self.steps: list[Step] = []
|
|
133
|
+
self.dag = DAG()
|
|
134
|
+
|
|
135
|
+
# Storage
|
|
136
|
+
if cache_dir is None:
|
|
137
|
+
from flowyml.utils.config import get_config
|
|
138
|
+
|
|
139
|
+
cache_dir = str(get_config().cache_dir)
|
|
140
|
+
|
|
141
|
+
self.cache_store = CacheStore(cache_dir) if enable_cache else None
|
|
142
|
+
|
|
143
|
+
from flowyml.utils.config import get_config
|
|
144
|
+
|
|
145
|
+
self.runs_dir = get_config().runs_dir
|
|
146
|
+
self.runs_dir.mkdir(parents=True, exist_ok=True)
|
|
147
|
+
|
|
148
|
+
# Initialize components from stack or defaults
|
|
149
|
+
if self.stack:
|
|
150
|
+
self.executor = executor or self.stack.executor
|
|
151
|
+
self.metadata_store = self.stack.metadata_store
|
|
152
|
+
else:
|
|
153
|
+
self.executor = executor or LocalExecutor()
|
|
154
|
+
# Metadata store for UI integration
|
|
155
|
+
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
156
|
+
|
|
157
|
+
self.metadata_store = SQLiteMetadataStore()
|
|
158
|
+
|
|
159
|
+
# Handle Project Attachment
|
|
160
|
+
if project:
|
|
161
|
+
from flowyml.core.project import ProjectManager
|
|
162
|
+
|
|
163
|
+
manager = ProjectManager()
|
|
164
|
+
# Get or create project
|
|
165
|
+
proj = manager.get_project(project)
|
|
166
|
+
if not proj:
|
|
167
|
+
proj = manager.create_project(project)
|
|
168
|
+
|
|
169
|
+
# Configure pipeline with project settings
|
|
170
|
+
self.runs_dir = proj.runs_dir
|
|
171
|
+
self.metadata_store = proj.metadata_store
|
|
172
|
+
|
|
173
|
+
# Register pipeline with project
|
|
174
|
+
if name not in proj.metadata["pipelines"]:
|
|
175
|
+
proj.metadata["pipelines"].append(name)
|
|
176
|
+
proj._save_metadata()
|
|
177
|
+
|
|
178
|
+
# State
|
|
179
|
+
self._built = False
|
|
180
|
+
self.step_groups: list[Any] = [] # Will hold StepGroup objects
|
|
181
|
+
|
|
182
|
+
def add_step(self, step: Step) -> "Pipeline":
|
|
183
|
+
"""Add a step to the pipeline.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
step: Step to add
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Self for chaining
|
|
190
|
+
"""
|
|
191
|
+
self.steps.append(step)
|
|
192
|
+
self._built = False
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def build(self) -> None:
|
|
196
|
+
"""Build the execution DAG."""
|
|
197
|
+
if self._built:
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
# Clear previous DAG
|
|
201
|
+
self.dag = DAG()
|
|
202
|
+
|
|
203
|
+
# Add nodes
|
|
204
|
+
for step in self.steps:
|
|
205
|
+
node = Node(
|
|
206
|
+
name=step.name,
|
|
207
|
+
step=step,
|
|
208
|
+
inputs=step.inputs,
|
|
209
|
+
outputs=step.outputs,
|
|
210
|
+
)
|
|
211
|
+
self.dag.add_node(node)
|
|
212
|
+
|
|
213
|
+
# Build edges
|
|
214
|
+
self.dag.build_edges()
|
|
215
|
+
|
|
216
|
+
# Validate
|
|
217
|
+
errors = self.dag.validate()
|
|
218
|
+
if errors:
|
|
219
|
+
raise ValueError("Pipeline validation failed:\n" + "\n".join(errors))
|
|
220
|
+
|
|
221
|
+
# Analyze step groups
|
|
222
|
+
from flowyml.core.step_grouping import StepGroupAnalyzer
|
|
223
|
+
|
|
224
|
+
analyzer = StepGroupAnalyzer()
|
|
225
|
+
self.step_groups = analyzer.analyze_groups(self.dag, self.steps)
|
|
226
|
+
|
|
227
|
+
self._built = True
|
|
228
|
+
|
|
229
|
+
def run(
|
|
230
|
+
self,
|
|
231
|
+
inputs: dict[str, Any] | None = None,
|
|
232
|
+
debug: bool = False,
|
|
233
|
+
stack: Any | None = None, # Stack override
|
|
234
|
+
resources: Any | None = None, # ResourceConfig
|
|
235
|
+
docker_config: Any | None = None, # DockerConfig
|
|
236
|
+
context: dict[str, Any] | None = None, # Context vars override
|
|
237
|
+
) -> PipelineResult:
|
|
238
|
+
"""Execute the pipeline.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
inputs: Optional input data for the pipeline
|
|
242
|
+
debug: Enable debug mode with detailed logging
|
|
243
|
+
stack: Stack override (uses self.stack if not provided)
|
|
244
|
+
resources: Resource configuration for execution
|
|
245
|
+
docker_config: Docker configuration for containerized execution
|
|
246
|
+
context: Context variables override
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
PipelineResult with outputs and execution info
|
|
250
|
+
"""
|
|
251
|
+
import uuid
|
|
252
|
+
|
|
253
|
+
run_id = str(uuid.uuid4())
|
|
254
|
+
|
|
255
|
+
# Use provided stack or instance stack
|
|
256
|
+
if stack is not None:
|
|
257
|
+
self.stack = stack
|
|
258
|
+
# Update components from new stack
|
|
259
|
+
self.executor = self.stack.executor
|
|
260
|
+
self.metadata_store = self.stack.metadata_store
|
|
261
|
+
|
|
262
|
+
# Determine artifact store
|
|
263
|
+
artifact_store = None
|
|
264
|
+
if self.stack:
|
|
265
|
+
artifact_store = self.stack.artifact_store
|
|
266
|
+
|
|
267
|
+
# Update context with provided values
|
|
268
|
+
if context:
|
|
269
|
+
self.context.update(context)
|
|
270
|
+
|
|
271
|
+
# Build DAG if needed
|
|
272
|
+
if not self._built:
|
|
273
|
+
self.build()
|
|
274
|
+
|
|
275
|
+
# Initialize result
|
|
276
|
+
result = PipelineResult(run_id, self.name)
|
|
277
|
+
step_outputs = inputs or {}
|
|
278
|
+
|
|
279
|
+
# Map step names to step objects for easier lookup
|
|
280
|
+
self.steps_dict = {step.name: step for step in self.steps}
|
|
281
|
+
if debug:
|
|
282
|
+
pass
|
|
283
|
+
else:
|
|
284
|
+
# Always print the run URL for better UX
|
|
285
|
+
pass
|
|
286
|
+
|
|
287
|
+
# Get execution units (individual steps or groups)
|
|
288
|
+
from flowyml.core.step_grouping import get_execution_units
|
|
289
|
+
|
|
290
|
+
execution_units = get_execution_units(self.dag, self.steps)
|
|
291
|
+
|
|
292
|
+
# Execute steps/groups in order
|
|
293
|
+
for unit in execution_units:
|
|
294
|
+
# Check if unit is a group or individual step
|
|
295
|
+
from flowyml.core.step_grouping import StepGroup
|
|
296
|
+
|
|
297
|
+
if isinstance(unit, StepGroup):
|
|
298
|
+
# Execute entire group
|
|
299
|
+
if debug:
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
# Get context parameters (use first step's function as representative)
|
|
303
|
+
first_step = unit.steps[0]
|
|
304
|
+
context_params = self.context.inject_params(first_step.func)
|
|
305
|
+
|
|
306
|
+
# Execute the group
|
|
307
|
+
group_results = self.executor.execute_step_group(
|
|
308
|
+
step_group=unit,
|
|
309
|
+
inputs=step_outputs,
|
|
310
|
+
context_params=context_params,
|
|
311
|
+
cache_store=self.cache_store,
|
|
312
|
+
artifact_store=artifact_store,
|
|
313
|
+
run_id=run_id,
|
|
314
|
+
project_name=self.name,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Process each step result
|
|
318
|
+
for step_result in group_results:
|
|
319
|
+
result.add_step_result(step_result)
|
|
320
|
+
|
|
321
|
+
if debug:
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
# Handle failure
|
|
325
|
+
if not step_result.success and not step_result.skipped:
|
|
326
|
+
result.finalize(success=False)
|
|
327
|
+
self._save_run(result)
|
|
328
|
+
return result
|
|
329
|
+
|
|
330
|
+
# Store outputs for next steps/groups
|
|
331
|
+
if step_result.output is not None:
|
|
332
|
+
# Find step definition to get output names
|
|
333
|
+
step_def = next((s for s in self.steps if s.name == step_result.step_name), None)
|
|
334
|
+
if step_def:
|
|
335
|
+
if len(step_def.outputs) == 1:
|
|
336
|
+
step_outputs[step_def.outputs[0]] = step_result.output
|
|
337
|
+
result.outputs[step_def.outputs[0]] = step_result.output
|
|
338
|
+
elif isinstance(step_result.output, (list, tuple)) and len(step_result.output) == len(
|
|
339
|
+
step_def.outputs,
|
|
340
|
+
):
|
|
341
|
+
for name, val in zip(step_def.outputs, step_result.output, strict=False):
|
|
342
|
+
step_outputs[name] = val
|
|
343
|
+
result.outputs[name] = val
|
|
344
|
+
elif isinstance(step_result.output, dict):
|
|
345
|
+
for name in step_def.outputs:
|
|
346
|
+
if name in step_result.output:
|
|
347
|
+
step_outputs[name] = step_result.output[name]
|
|
348
|
+
result.outputs[name] = step_result.output[name]
|
|
349
|
+
else:
|
|
350
|
+
if step_def.outputs:
|
|
351
|
+
step_outputs[step_def.outputs[0]] = step_result.output
|
|
352
|
+
result.outputs[step_def.outputs[0]] = step_result.output
|
|
353
|
+
|
|
354
|
+
else:
|
|
355
|
+
# Execute single ungrouped step
|
|
356
|
+
step = unit
|
|
357
|
+
|
|
358
|
+
if debug:
|
|
359
|
+
pass
|
|
360
|
+
|
|
361
|
+
# Prepare step inputs
|
|
362
|
+
step_inputs = {}
|
|
363
|
+
|
|
364
|
+
# Get function signature to map inputs to parameters
|
|
365
|
+
import inspect
|
|
366
|
+
|
|
367
|
+
sig = inspect.signature(step.func)
|
|
368
|
+
params = list(sig.parameters.values())
|
|
369
|
+
|
|
370
|
+
# Filter out self/cls
|
|
371
|
+
params = [p for p in params if p.name not in ("self", "cls")]
|
|
372
|
+
|
|
373
|
+
# Strategy:
|
|
374
|
+
# 1. Map inputs to parameters
|
|
375
|
+
# - If input name matches param name, use it
|
|
376
|
+
# - If not, use positional mapping (input[i] -> param[i])
|
|
377
|
+
|
|
378
|
+
# Track which parameters have been assigned
|
|
379
|
+
assigned_params = set()
|
|
380
|
+
|
|
381
|
+
if step.inputs:
|
|
382
|
+
for i, input_name in enumerate(step.inputs):
|
|
383
|
+
if input_name not in step_outputs:
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
val = step_outputs[input_name]
|
|
387
|
+
|
|
388
|
+
# Check if input name matches a parameter
|
|
389
|
+
param_match = next((p for p in params if p.name == input_name), None)
|
|
390
|
+
|
|
391
|
+
if param_match:
|
|
392
|
+
step_inputs[param_match.name] = val
|
|
393
|
+
assigned_params.add(param_match.name)
|
|
394
|
+
elif i < len(params):
|
|
395
|
+
# Positional fallback
|
|
396
|
+
# Only if this parameter hasn't been assigned yet
|
|
397
|
+
target_param = params[i]
|
|
398
|
+
if target_param.name not in assigned_params:
|
|
399
|
+
step_inputs[target_param.name] = val
|
|
400
|
+
assigned_params.add(target_param.name)
|
|
401
|
+
|
|
402
|
+
# Auto-map parameters from available outputs if they match function signature
|
|
403
|
+
# This allows passing inputs to run() without declaring them as asset dependencies
|
|
404
|
+
for param in params:
|
|
405
|
+
if param.name in step_outputs and param.name not in step_inputs:
|
|
406
|
+
step_inputs[param.name] = step_outputs[param.name]
|
|
407
|
+
assigned_params.add(param.name)
|
|
408
|
+
|
|
409
|
+
# Validate context parameters
|
|
410
|
+
# Exclude parameters that are already provided in step_inputs
|
|
411
|
+
exclude_params = list(step.inputs) + list(step_inputs.keys())
|
|
412
|
+
missing_params = self.context.validate_for_step(step.func, exclude=exclude_params)
|
|
413
|
+
if missing_params:
|
|
414
|
+
if debug:
|
|
415
|
+
pass
|
|
416
|
+
|
|
417
|
+
error_msg = f"Missing required parameters: {missing_params}"
|
|
418
|
+
step_result = ExecutionResult(
|
|
419
|
+
step_name=step.name,
|
|
420
|
+
success=False,
|
|
421
|
+
error=error_msg,
|
|
422
|
+
)
|
|
423
|
+
result.add_step_result(step_result)
|
|
424
|
+
result.finalize(success=False)
|
|
425
|
+
self._save_run(result) # Save run before returning
|
|
426
|
+
self._save_pipeline_definition() # Save definition even on failure
|
|
427
|
+
print("DEBUG: Pipeline failed at step execution")
|
|
428
|
+
return result
|
|
429
|
+
|
|
430
|
+
# Get context parameters for this step
|
|
431
|
+
context_params = self.context.inject_params(step.func)
|
|
432
|
+
|
|
433
|
+
# Execute step
|
|
434
|
+
step_result = self.executor.execute_step(
|
|
435
|
+
step,
|
|
436
|
+
step_inputs,
|
|
437
|
+
context_params,
|
|
438
|
+
self.cache_store,
|
|
439
|
+
artifact_store=artifact_store,
|
|
440
|
+
run_id=run_id,
|
|
441
|
+
project_name=self.name,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
result.add_step_result(step_result)
|
|
445
|
+
|
|
446
|
+
if debug:
|
|
447
|
+
pass
|
|
448
|
+
|
|
449
|
+
# Handle failure
|
|
450
|
+
if not step_result.success:
|
|
451
|
+
if debug and not step_result.error:
|
|
452
|
+
pass
|
|
453
|
+
result.finalize(success=False)
|
|
454
|
+
self._save_run(result)
|
|
455
|
+
self._save_pipeline_definition() # Save definition even on failure
|
|
456
|
+
print("DEBUG: Pipeline failed at step execution")
|
|
457
|
+
return result
|
|
458
|
+
|
|
459
|
+
# Store outputs for next steps
|
|
460
|
+
if step_result.output is not None:
|
|
461
|
+
if len(step.outputs) == 1:
|
|
462
|
+
step_outputs[step.outputs[0]] = step_result.output
|
|
463
|
+
result.outputs[step.outputs[0]] = step_result.output
|
|
464
|
+
elif isinstance(step_result.output, (list, tuple)) and len(step_result.output) == len(step.outputs):
|
|
465
|
+
for name, val in zip(step.outputs, step_result.output, strict=False):
|
|
466
|
+
step_outputs[name] = val
|
|
467
|
+
result.outputs[name] = val
|
|
468
|
+
elif isinstance(step_result.output, dict):
|
|
469
|
+
for name in step.outputs:
|
|
470
|
+
if name in step_result.output:
|
|
471
|
+
step_outputs[name] = step_result.output[name]
|
|
472
|
+
result.outputs[name] = step_result.output[name]
|
|
473
|
+
else:
|
|
474
|
+
# Fallback: assign to first output if available
|
|
475
|
+
if step.outputs:
|
|
476
|
+
step_outputs[step.outputs[0]] = step_result.output
|
|
477
|
+
result.outputs[step.outputs[0]] = step_result.output
|
|
478
|
+
|
|
479
|
+
# Success!
|
|
480
|
+
result.finalize(success=True)
|
|
481
|
+
|
|
482
|
+
if debug:
|
|
483
|
+
pass
|
|
484
|
+
|
|
485
|
+
self._save_run(result)
|
|
486
|
+
self._save_pipeline_definition() # Save pipeline structure for scheduling
|
|
487
|
+
return result
|
|
488
|
+
|
|
489
|
+
def to_definition(self) -> dict:
|
|
490
|
+
"""Serialize pipeline to definition for storage and reconstruction."""
|
|
491
|
+
if not self._built:
|
|
492
|
+
self.build()
|
|
493
|
+
|
|
494
|
+
return {
|
|
495
|
+
"name": self.name,
|
|
496
|
+
"steps": [
|
|
497
|
+
{
|
|
498
|
+
"name": step.name,
|
|
499
|
+
"inputs": step.inputs,
|
|
500
|
+
"outputs": step.outputs,
|
|
501
|
+
"source_code": step.source_code,
|
|
502
|
+
"tags": step.tags,
|
|
503
|
+
}
|
|
504
|
+
for step in self.steps
|
|
505
|
+
],
|
|
506
|
+
"dag": {
|
|
507
|
+
"nodes": [
|
|
508
|
+
{
|
|
509
|
+
"name": node.name,
|
|
510
|
+
"inputs": node.inputs,
|
|
511
|
+
"outputs": node.outputs,
|
|
512
|
+
}
|
|
513
|
+
for node in self.dag.nodes.values()
|
|
514
|
+
],
|
|
515
|
+
"edges": [
|
|
516
|
+
{"source": dep, "target": node_name} for node_name, deps in self.dag.edges.items() for dep in deps
|
|
517
|
+
],
|
|
518
|
+
},
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
def _save_pipeline_definition(self) -> None:
|
|
522
|
+
"""Save pipeline definition to metadata store for scheduling."""
|
|
523
|
+
try:
|
|
524
|
+
definition = self.to_definition()
|
|
525
|
+
self.metadata_store.save_pipeline_definition(self.name, definition)
|
|
526
|
+
except Exception as e:
|
|
527
|
+
# Don't fail the run if definition saving fails
|
|
528
|
+
print(f"Warning: Failed to save pipeline definition: {e}")
|
|
529
|
+
|
|
530
|
+
def _save_run(self, result: PipelineResult) -> None:
|
|
531
|
+
"""Save run results to disk and metadata database."""
|
|
532
|
+
# Save to JSON file
|
|
533
|
+
run_file = self.runs_dir / f"{result.run_id}.json"
|
|
534
|
+
with open(run_file, "w") as f:
|
|
535
|
+
json.dump(result.to_dict(), f, indent=2)
|
|
536
|
+
|
|
537
|
+
# Serialize DAG structure for UI
|
|
538
|
+
dag_data = {
|
|
539
|
+
"nodes": [
|
|
540
|
+
{
|
|
541
|
+
"id": node.name,
|
|
542
|
+
"name": node.name,
|
|
543
|
+
"inputs": node.inputs,
|
|
544
|
+
"outputs": node.outputs,
|
|
545
|
+
}
|
|
546
|
+
for node in self.dag.nodes.values()
|
|
547
|
+
],
|
|
548
|
+
"edges": [
|
|
549
|
+
{
|
|
550
|
+
"source": dep,
|
|
551
|
+
"target": node_name,
|
|
552
|
+
}
|
|
553
|
+
for node_name, deps in self.dag.edges.items()
|
|
554
|
+
for dep in deps
|
|
555
|
+
],
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
# Collect step metadata including source code
|
|
559
|
+
steps_metadata = {}
|
|
560
|
+
for step in self.steps:
|
|
561
|
+
step_result = result.step_results.get(step.name)
|
|
562
|
+
steps_metadata[step.name] = {
|
|
563
|
+
"success": step_result.success if step_result else False,
|
|
564
|
+
"duration": step_result.duration_seconds if step_result else 0,
|
|
565
|
+
"cached": step_result.cached if step_result else False,
|
|
566
|
+
"retries": step_result.retries if step_result else 0,
|
|
567
|
+
"error": step_result.error if step_result else None,
|
|
568
|
+
"source_code": step.source_code,
|
|
569
|
+
"inputs": step.inputs,
|
|
570
|
+
"outputs": step.outputs,
|
|
571
|
+
"tags": step.tags,
|
|
572
|
+
"resources": step.resources.to_dict() if hasattr(step.resources, "to_dict") else step.resources,
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
# Save to metadata database for UI
|
|
576
|
+
metadata = {
|
|
577
|
+
"run_id": result.run_id,
|
|
578
|
+
"pipeline_name": result.pipeline_name,
|
|
579
|
+
"status": "completed" if result.success else "failed",
|
|
580
|
+
"start_time": result.start_time.isoformat(),
|
|
581
|
+
"end_time": result.end_time.isoformat() if result.end_time else None,
|
|
582
|
+
"duration": result.duration_seconds,
|
|
583
|
+
"success": result.success,
|
|
584
|
+
"context": self.context._params if hasattr(self.context, "_params") else {},
|
|
585
|
+
"steps": steps_metadata,
|
|
586
|
+
"dag": dag_data,
|
|
587
|
+
}
|
|
588
|
+
self.metadata_store.save_run(result.run_id, metadata)
|
|
589
|
+
|
|
590
|
+
# Save artifacts and metrics
|
|
591
|
+
for step_name, step_result in result.step_results.items():
|
|
592
|
+
if step_result.success and step_result.output is not None:
|
|
593
|
+
# Find step definition to get output names
|
|
594
|
+
step_def = next((s for s in self.steps if s.name == step_name), None)
|
|
595
|
+
output_names = step_def.outputs if step_def else []
|
|
596
|
+
|
|
597
|
+
# Normalize outputs to a dictionary
|
|
598
|
+
outputs_to_save = {}
|
|
599
|
+
|
|
600
|
+
# Case 1: Dictionary output (common for metrics)
|
|
601
|
+
if isinstance(step_result.output, dict):
|
|
602
|
+
# If step has defined outputs, try to map them
|
|
603
|
+
if output_names and len(output_names) == 1:
|
|
604
|
+
outputs_to_save[output_names[0]] = step_result.output
|
|
605
|
+
else:
|
|
606
|
+
# Otherwise treat keys as output names if they match, or just save whole dict
|
|
607
|
+
outputs_to_save[f"{step_name}_output"] = step_result.output
|
|
608
|
+
|
|
609
|
+
# Also save individual numeric values as metrics
|
|
610
|
+
for k, v in step_result.output.items():
|
|
611
|
+
if isinstance(v, (int, float)):
|
|
612
|
+
self.metadata_store.save_metric(result.run_id, k, float(v))
|
|
613
|
+
|
|
614
|
+
# Case 2: Tuple/List output matching output names
|
|
615
|
+
elif isinstance(step_result.output, (list, tuple)) and len(output_names) == len(step_result.output):
|
|
616
|
+
for name, val in zip(output_names, step_result.output, strict=False):
|
|
617
|
+
outputs_to_save[name] = val
|
|
618
|
+
|
|
619
|
+
# Case 3: Single output
|
|
620
|
+
else:
|
|
621
|
+
name = output_names[0] if output_names else f"{step_name}_output"
|
|
622
|
+
outputs_to_save[name] = step_result.output
|
|
623
|
+
|
|
624
|
+
# Save artifacts
|
|
625
|
+
for name, value in outputs_to_save.items():
|
|
626
|
+
artifact_id = f"{result.run_id}_{step_name}_{name}"
|
|
627
|
+
|
|
628
|
+
# Check if it's a flowyml Asset
|
|
629
|
+
is_asset = hasattr(value, "metadata") and hasattr(value, "data")
|
|
630
|
+
|
|
631
|
+
if is_asset:
|
|
632
|
+
# Handle flowyml Asset
|
|
633
|
+
asset_type = value.__class__.__name__
|
|
634
|
+
artifact_metadata = {
|
|
635
|
+
"artifact_id": artifact_id,
|
|
636
|
+
"name": value.name,
|
|
637
|
+
"type": asset_type,
|
|
638
|
+
"run_id": result.run_id,
|
|
639
|
+
"step": step_name,
|
|
640
|
+
"path": None,
|
|
641
|
+
"value": str(value.data)[:1000] if value.data else None,
|
|
642
|
+
"created_at": datetime.now().isoformat(),
|
|
643
|
+
"properties": self._sanitize_for_json(value.metadata.properties)
|
|
644
|
+
if hasattr(value.metadata, "properties")
|
|
645
|
+
else {},
|
|
646
|
+
}
|
|
647
|
+
self.metadata_store.save_artifact(artifact_id, artifact_metadata)
|
|
648
|
+
|
|
649
|
+
# Special handling for Metrics asset
|
|
650
|
+
if asset_type == "Metrics" and isinstance(value.data, dict):
|
|
651
|
+
for k, v in value.data.items():
|
|
652
|
+
if isinstance(v, (int, float)):
|
|
653
|
+
self.metadata_store.save_metric(result.run_id, k, float(v))
|
|
654
|
+
else:
|
|
655
|
+
# Handle standard Python objects
|
|
656
|
+
artifact_metadata = {
|
|
657
|
+
"artifact_id": artifact_id,
|
|
658
|
+
"name": name,
|
|
659
|
+
"type": type(value).__name__,
|
|
660
|
+
"run_id": result.run_id,
|
|
661
|
+
"step": step_name,
|
|
662
|
+
"path": str(value) if isinstance(value, (str, Path)) and len(str(value)) < 255 else None,
|
|
663
|
+
"value": str(value)[:1000], # Preview
|
|
664
|
+
"created_at": datetime.now().isoformat(),
|
|
665
|
+
}
|
|
666
|
+
self.metadata_store.save_artifact(artifact_id, artifact_metadata)
|
|
667
|
+
|
|
668
|
+
# Save single value metric if applicable
|
|
669
|
+
if isinstance(value, (int, float)):
|
|
670
|
+
self.metadata_store.save_metric(result.run_id, name, float(value))
|
|
671
|
+
|
|
672
|
+
def _sanitize_for_json(self, obj: Any) -> Any:
|
|
673
|
+
"""Helper to make objects JSON serializable."""
|
|
674
|
+
if hasattr(obj, "id") and hasattr(obj, "name"): # Asset-like
|
|
675
|
+
return {"type": obj.__class__.__name__, "id": obj.id, "name": obj.name}
|
|
676
|
+
if isinstance(obj, dict):
|
|
677
|
+
return {k: self._sanitize_for_json(v) for k, v in obj.items()}
|
|
678
|
+
if isinstance(obj, (list, tuple)):
|
|
679
|
+
return [self._sanitize_for_json(v) for v in obj]
|
|
680
|
+
if isinstance(obj, (str, int, float, bool, type(None))):
|
|
681
|
+
return obj
|
|
682
|
+
return str(obj)
|
|
683
|
+
|
|
684
|
+
def cache_stats(self) -> dict[str, Any]:
|
|
685
|
+
"""Get cache statistics."""
|
|
686
|
+
if self.cache_store:
|
|
687
|
+
return self.cache_store.stats()
|
|
688
|
+
return {}
|
|
689
|
+
|
|
690
|
+
def invalidate_cache(
|
|
691
|
+
self,
|
|
692
|
+
step: str | None = None,
|
|
693
|
+
before: str | None = None,
|
|
694
|
+
) -> None:
|
|
695
|
+
"""Invalidate cache entries.
|
|
696
|
+
|
|
697
|
+
Args:
|
|
698
|
+
step: Invalidate cache for specific step
|
|
699
|
+
before: Invalidate cache entries before date
|
|
700
|
+
"""
|
|
701
|
+
if self.cache_store:
|
|
702
|
+
if step:
|
|
703
|
+
self.cache_store.invalidate(step_name=step)
|
|
704
|
+
else:
|
|
705
|
+
self.cache_store.clear()
|
|
706
|
+
|
|
707
|
+
def visualize(self) -> str:
|
|
708
|
+
"""Generate pipeline visualization."""
|
|
709
|
+
if not self._built:
|
|
710
|
+
self.build()
|
|
711
|
+
return self.dag.visualize()
|
|
712
|
+
|
|
713
|
+
@classmethod
|
|
714
|
+
def from_definition(cls, definition: dict, context: Context | None = None) -> "Pipeline":
|
|
715
|
+
"""Reconstruct pipeline from stored definition.
|
|
716
|
+
|
|
717
|
+
This creates a "ghost" pipeline that can be executed but uses
|
|
718
|
+
the stored step structure. Actual step logic must still be
|
|
719
|
+
available in the codebase.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
definition: Pipeline definition from to_definition()
|
|
723
|
+
context: Optional context for execution
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
Reconstructed Pipeline instance
|
|
727
|
+
"""
|
|
728
|
+
from flowyml.core.step import step as step_decorator
|
|
729
|
+
|
|
730
|
+
# Create pipeline instance
|
|
731
|
+
pipeline = cls(
|
|
732
|
+
name=definition["name"],
|
|
733
|
+
context=context or Context(),
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
# Reconstruct steps
|
|
737
|
+
for step_def in definition["steps"]:
|
|
738
|
+
# Create a generic step function that can be called
|
|
739
|
+
# In a real implementation, we'd need to either:
|
|
740
|
+
# 1. Store serialized functions (using cloudpickle)
|
|
741
|
+
# 2. Import functions by name from codebase
|
|
742
|
+
# 3. Use placeholder functions
|
|
743
|
+
|
|
744
|
+
# For now, we'll create a placeholder that logs execution
|
|
745
|
+
def generic_step_func(*args, **kwargs):
|
|
746
|
+
"""Generic step function for reconstructed pipeline."""
|
|
747
|
+
print(f"Executing reconstructed step with args={args}, kwargs={kwargs}")
|
|
748
|
+
return
|
|
749
|
+
|
|
750
|
+
# Apply step decorator with stored metadata
|
|
751
|
+
decorated = step_decorator(
|
|
752
|
+
name=step_def["name"],
|
|
753
|
+
inputs=step_def["inputs"],
|
|
754
|
+
outputs=step_def["outputs"],
|
|
755
|
+
tags=step_def.get("tags", []),
|
|
756
|
+
)(generic_step_func)
|
|
757
|
+
|
|
758
|
+
# Add to pipeline
|
|
759
|
+
pipeline.add_step(decorated)
|
|
760
|
+
|
|
761
|
+
return pipeline
|
|
762
|
+
|
|
763
|
+
def __repr__(self) -> str:
|
|
764
|
+
return f"Pipeline(name='{self.name}', steps={len(self.steps)})"
|