flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,764 @@
1
+ """Pipeline Module - Main orchestration for ML pipelines."""
2
+
3
+ import json
4
+ from typing import Any
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+
8
+ from flowyml.core.context import Context
9
+ from flowyml.core.step import Step
10
+ from flowyml.core.graph import DAG, Node
11
+ from flowyml.core.executor import Executor, LocalExecutor, ExecutionResult
12
+ from flowyml.core.cache import CacheStore
13
+
14
+
15
+ class PipelineResult:
16
+ """Result of pipeline execution."""
17
+
18
+ def __init__(self, run_id: str, pipeline_name: str):
19
+ self.run_id = run_id
20
+ self.pipeline_name = pipeline_name
21
+ self.success = False
22
+ self.step_results: dict[str, ExecutionResult] = {}
23
+ self.outputs: dict[str, Any] = {}
24
+ self.start_time = datetime.now()
25
+ self.end_time: datetime | None = None
26
+ self.duration_seconds: float = 0.0
27
+
28
+ def add_step_result(self, result: ExecutionResult) -> None:
29
+ """Add result from a step execution."""
30
+ self.step_results[result.step_name] = result
31
+
32
+ # Track outputs
33
+ if result.success and result.output is not None:
34
+ # Assuming single output for simplicity
35
+ self.outputs[result.step_name] = result.output
36
+
37
+ def finalize(self, success: bool) -> None:
38
+ """Mark pipeline as complete."""
39
+ self.success = success
40
+ self.end_time = datetime.now()
41
+ self.duration_seconds = (self.end_time - self.start_time).total_seconds()
42
+
43
+ def __getitem__(self, key: str) -> Any:
44
+ """Allow dict-style access to outputs."""
45
+ return self.outputs.get(key)
46
+
47
+ def to_dict(self) -> dict[str, Any]:
48
+ """Convert to dictionary."""
49
+ return {
50
+ "run_id": self.run_id,
51
+ "pipeline_name": self.pipeline_name,
52
+ "success": self.success,
53
+ "start_time": self.start_time.isoformat(),
54
+ "end_time": self.end_time.isoformat() if self.end_time else None,
55
+ "duration_seconds": self.duration_seconds,
56
+ "steps": {
57
+ name: {
58
+ "success": result.success,
59
+ "duration": result.duration_seconds,
60
+ "cached": result.cached,
61
+ "retries": result.retries,
62
+ "error": result.error,
63
+ }
64
+ for name, result in self.step_results.items()
65
+ },
66
+ }
67
+
68
+ def summary(self) -> str:
69
+ """Generate execution summary."""
70
+ lines = [
71
+ f"Pipeline: {self.pipeline_name}",
72
+ f"Run ID: {self.run_id}",
73
+ f"Status: {'✓ SUCCESS' if self.success else '✗ FAILED'}",
74
+ f"Duration: {self.duration_seconds:.2f}s",
75
+ "",
76
+ "Steps:",
77
+ ]
78
+
79
+ for name, result in self.step_results.items():
80
+ status = "✓" if result.success else "✗"
81
+ cached = " (cached)" if result.cached else ""
82
+ retries = f" [{result.retries} retries]" if result.retries > 0 else ""
83
+ lines.append(
84
+ f" {status} {name}: {result.duration_seconds:.2f}s{cached}{retries}",
85
+ )
86
+ if result.error:
87
+ lines.append(f" Error: {result.error.split(chr(10))[0]}")
88
+
89
+ return "\n".join(lines)
90
+
91
+
92
+ class Pipeline:
93
+ """Main pipeline class for orchestrating ML workflows.
94
+
95
+ Example:
96
+ >>> from flowyml import Pipeline, step, context
97
+ >>> ctx = context(learning_rate=0.001, epochs=10)
98
+ >>> @step(outputs=["model/trained"])
99
+ ... def train(learning_rate: float, epochs: int):
100
+ ... return train_model(learning_rate, epochs)
101
+ >>> pipeline = Pipeline("my_pipeline", context=ctx)
102
+ >>> pipeline.add_step(train)
103
+ >>> result = pipeline.run()
104
+ """
105
+
106
+ def __init__(
107
+ self,
108
+ name: str,
109
+ context: Context | None = None,
110
+ executor: Executor | None = None,
111
+ enable_cache: bool = True,
112
+ cache_dir: str | None = None,
113
+ stack: Any | None = None, # Stack instance
114
+ project: str | None = None, # Project name to attach to
115
+ ):
116
+ """Initialize pipeline.
117
+
118
+ Args:
119
+ name: Name of the pipeline
120
+ context: Optional context for parameter injection
121
+ executor: Optional executor (defaults to LocalExecutor)
122
+ enable_cache: Whether to enable caching
123
+ cache_dir: Optional directory for cache
124
+ stack: Optional stack instance to run on
125
+ project: Optional project name to attach this pipeline to.
126
+ """
127
+ self.name = name
128
+ self.context = context or Context()
129
+ self.enable_cache = enable_cache
130
+ self.stack = stack # Store stack instance
131
+
132
+ self.steps: list[Step] = []
133
+ self.dag = DAG()
134
+
135
+ # Storage
136
+ if cache_dir is None:
137
+ from flowyml.utils.config import get_config
138
+
139
+ cache_dir = str(get_config().cache_dir)
140
+
141
+ self.cache_store = CacheStore(cache_dir) if enable_cache else None
142
+
143
+ from flowyml.utils.config import get_config
144
+
145
+ self.runs_dir = get_config().runs_dir
146
+ self.runs_dir.mkdir(parents=True, exist_ok=True)
147
+
148
+ # Initialize components from stack or defaults
149
+ if self.stack:
150
+ self.executor = executor or self.stack.executor
151
+ self.metadata_store = self.stack.metadata_store
152
+ else:
153
+ self.executor = executor or LocalExecutor()
154
+ # Metadata store for UI integration
155
+ from flowyml.storage.metadata import SQLiteMetadataStore
156
+
157
+ self.metadata_store = SQLiteMetadataStore()
158
+
159
+ # Handle Project Attachment
160
+ if project:
161
+ from flowyml.core.project import ProjectManager
162
+
163
+ manager = ProjectManager()
164
+ # Get or create project
165
+ proj = manager.get_project(project)
166
+ if not proj:
167
+ proj = manager.create_project(project)
168
+
169
+ # Configure pipeline with project settings
170
+ self.runs_dir = proj.runs_dir
171
+ self.metadata_store = proj.metadata_store
172
+
173
+ # Register pipeline with project
174
+ if name not in proj.metadata["pipelines"]:
175
+ proj.metadata["pipelines"].append(name)
176
+ proj._save_metadata()
177
+
178
+ # State
179
+ self._built = False
180
+ self.step_groups: list[Any] = [] # Will hold StepGroup objects
181
+
182
+ def add_step(self, step: Step) -> "Pipeline":
183
+ """Add a step to the pipeline.
184
+
185
+ Args:
186
+ step: Step to add
187
+
188
+ Returns:
189
+ Self for chaining
190
+ """
191
+ self.steps.append(step)
192
+ self._built = False
193
+ return self
194
+
195
+ def build(self) -> None:
196
+ """Build the execution DAG."""
197
+ if self._built:
198
+ return
199
+
200
+ # Clear previous DAG
201
+ self.dag = DAG()
202
+
203
+ # Add nodes
204
+ for step in self.steps:
205
+ node = Node(
206
+ name=step.name,
207
+ step=step,
208
+ inputs=step.inputs,
209
+ outputs=step.outputs,
210
+ )
211
+ self.dag.add_node(node)
212
+
213
+ # Build edges
214
+ self.dag.build_edges()
215
+
216
+ # Validate
217
+ errors = self.dag.validate()
218
+ if errors:
219
+ raise ValueError("Pipeline validation failed:\n" + "\n".join(errors))
220
+
221
+ # Analyze step groups
222
+ from flowyml.core.step_grouping import StepGroupAnalyzer
223
+
224
+ analyzer = StepGroupAnalyzer()
225
+ self.step_groups = analyzer.analyze_groups(self.dag, self.steps)
226
+
227
+ self._built = True
228
+
229
+ def run(
230
+ self,
231
+ inputs: dict[str, Any] | None = None,
232
+ debug: bool = False,
233
+ stack: Any | None = None, # Stack override
234
+ resources: Any | None = None, # ResourceConfig
235
+ docker_config: Any | None = None, # DockerConfig
236
+ context: dict[str, Any] | None = None, # Context vars override
237
+ ) -> PipelineResult:
238
+ """Execute the pipeline.
239
+
240
+ Args:
241
+ inputs: Optional input data for the pipeline
242
+ debug: Enable debug mode with detailed logging
243
+ stack: Stack override (uses self.stack if not provided)
244
+ resources: Resource configuration for execution
245
+ docker_config: Docker configuration for containerized execution
246
+ context: Context variables override
247
+
248
+ Returns:
249
+ PipelineResult with outputs and execution info
250
+ """
251
+ import uuid
252
+
253
+ run_id = str(uuid.uuid4())
254
+
255
+ # Use provided stack or instance stack
256
+ if stack is not None:
257
+ self.stack = stack
258
+ # Update components from new stack
259
+ self.executor = self.stack.executor
260
+ self.metadata_store = self.stack.metadata_store
261
+
262
+ # Determine artifact store
263
+ artifact_store = None
264
+ if self.stack:
265
+ artifact_store = self.stack.artifact_store
266
+
267
+ # Update context with provided values
268
+ if context:
269
+ self.context.update(context)
270
+
271
+ # Build DAG if needed
272
+ if not self._built:
273
+ self.build()
274
+
275
+ # Initialize result
276
+ result = PipelineResult(run_id, self.name)
277
+ step_outputs = inputs or {}
278
+
279
+ # Map step names to step objects for easier lookup
280
+ self.steps_dict = {step.name: step for step in self.steps}
281
+ if debug:
282
+ pass
283
+ else:
284
+ # Always print the run URL for better UX
285
+ pass
286
+
287
+ # Get execution units (individual steps or groups)
288
+ from flowyml.core.step_grouping import get_execution_units
289
+
290
+ execution_units = get_execution_units(self.dag, self.steps)
291
+
292
+ # Execute steps/groups in order
293
+ for unit in execution_units:
294
+ # Check if unit is a group or individual step
295
+ from flowyml.core.step_grouping import StepGroup
296
+
297
+ if isinstance(unit, StepGroup):
298
+ # Execute entire group
299
+ if debug:
300
+ pass
301
+
302
+ # Get context parameters (use first step's function as representative)
303
+ first_step = unit.steps[0]
304
+ context_params = self.context.inject_params(first_step.func)
305
+
306
+ # Execute the group
307
+ group_results = self.executor.execute_step_group(
308
+ step_group=unit,
309
+ inputs=step_outputs,
310
+ context_params=context_params,
311
+ cache_store=self.cache_store,
312
+ artifact_store=artifact_store,
313
+ run_id=run_id,
314
+ project_name=self.name,
315
+ )
316
+
317
+ # Process each step result
318
+ for step_result in group_results:
319
+ result.add_step_result(step_result)
320
+
321
+ if debug:
322
+ pass
323
+
324
+ # Handle failure
325
+ if not step_result.success and not step_result.skipped:
326
+ result.finalize(success=False)
327
+ self._save_run(result)
328
+ return result
329
+
330
+ # Store outputs for next steps/groups
331
+ if step_result.output is not None:
332
+ # Find step definition to get output names
333
+ step_def = next((s for s in self.steps if s.name == step_result.step_name), None)
334
+ if step_def:
335
+ if len(step_def.outputs) == 1:
336
+ step_outputs[step_def.outputs[0]] = step_result.output
337
+ result.outputs[step_def.outputs[0]] = step_result.output
338
+ elif isinstance(step_result.output, (list, tuple)) and len(step_result.output) == len(
339
+ step_def.outputs,
340
+ ):
341
+ for name, val in zip(step_def.outputs, step_result.output, strict=False):
342
+ step_outputs[name] = val
343
+ result.outputs[name] = val
344
+ elif isinstance(step_result.output, dict):
345
+ for name in step_def.outputs:
346
+ if name in step_result.output:
347
+ step_outputs[name] = step_result.output[name]
348
+ result.outputs[name] = step_result.output[name]
349
+ else:
350
+ if step_def.outputs:
351
+ step_outputs[step_def.outputs[0]] = step_result.output
352
+ result.outputs[step_def.outputs[0]] = step_result.output
353
+
354
+ else:
355
+ # Execute single ungrouped step
356
+ step = unit
357
+
358
+ if debug:
359
+ pass
360
+
361
+ # Prepare step inputs
362
+ step_inputs = {}
363
+
364
+ # Get function signature to map inputs to parameters
365
+ import inspect
366
+
367
+ sig = inspect.signature(step.func)
368
+ params = list(sig.parameters.values())
369
+
370
+ # Filter out self/cls
371
+ params = [p for p in params if p.name not in ("self", "cls")]
372
+
373
+ # Strategy:
374
+ # 1. Map inputs to parameters
375
+ # - If input name matches param name, use it
376
+ # - If not, use positional mapping (input[i] -> param[i])
377
+
378
+ # Track which parameters have been assigned
379
+ assigned_params = set()
380
+
381
+ if step.inputs:
382
+ for i, input_name in enumerate(step.inputs):
383
+ if input_name not in step_outputs:
384
+ continue
385
+
386
+ val = step_outputs[input_name]
387
+
388
+ # Check if input name matches a parameter
389
+ param_match = next((p for p in params if p.name == input_name), None)
390
+
391
+ if param_match:
392
+ step_inputs[param_match.name] = val
393
+ assigned_params.add(param_match.name)
394
+ elif i < len(params):
395
+ # Positional fallback
396
+ # Only if this parameter hasn't been assigned yet
397
+ target_param = params[i]
398
+ if target_param.name not in assigned_params:
399
+ step_inputs[target_param.name] = val
400
+ assigned_params.add(target_param.name)
401
+
402
+ # Auto-map parameters from available outputs if they match function signature
403
+ # This allows passing inputs to run() without declaring them as asset dependencies
404
+ for param in params:
405
+ if param.name in step_outputs and param.name not in step_inputs:
406
+ step_inputs[param.name] = step_outputs[param.name]
407
+ assigned_params.add(param.name)
408
+
409
+ # Validate context parameters
410
+ # Exclude parameters that are already provided in step_inputs
411
+ exclude_params = list(step.inputs) + list(step_inputs.keys())
412
+ missing_params = self.context.validate_for_step(step.func, exclude=exclude_params)
413
+ if missing_params:
414
+ if debug:
415
+ pass
416
+
417
+ error_msg = f"Missing required parameters: {missing_params}"
418
+ step_result = ExecutionResult(
419
+ step_name=step.name,
420
+ success=False,
421
+ error=error_msg,
422
+ )
423
+ result.add_step_result(step_result)
424
+ result.finalize(success=False)
425
+ self._save_run(result) # Save run before returning
426
+ self._save_pipeline_definition() # Save definition even on failure
427
+ print("DEBUG: Pipeline failed at step execution")
428
+ return result
429
+
430
+ # Get context parameters for this step
431
+ context_params = self.context.inject_params(step.func)
432
+
433
+ # Execute step
434
+ step_result = self.executor.execute_step(
435
+ step,
436
+ step_inputs,
437
+ context_params,
438
+ self.cache_store,
439
+ artifact_store=artifact_store,
440
+ run_id=run_id,
441
+ project_name=self.name,
442
+ )
443
+
444
+ result.add_step_result(step_result)
445
+
446
+ if debug:
447
+ pass
448
+
449
+ # Handle failure
450
+ if not step_result.success:
451
+ if debug and not step_result.error:
452
+ pass
453
+ result.finalize(success=False)
454
+ self._save_run(result)
455
+ self._save_pipeline_definition() # Save definition even on failure
456
+ print("DEBUG: Pipeline failed at step execution")
457
+ return result
458
+
459
+ # Store outputs for next steps
460
+ if step_result.output is not None:
461
+ if len(step.outputs) == 1:
462
+ step_outputs[step.outputs[0]] = step_result.output
463
+ result.outputs[step.outputs[0]] = step_result.output
464
+ elif isinstance(step_result.output, (list, tuple)) and len(step_result.output) == len(step.outputs):
465
+ for name, val in zip(step.outputs, step_result.output, strict=False):
466
+ step_outputs[name] = val
467
+ result.outputs[name] = val
468
+ elif isinstance(step_result.output, dict):
469
+ for name in step.outputs:
470
+ if name in step_result.output:
471
+ step_outputs[name] = step_result.output[name]
472
+ result.outputs[name] = step_result.output[name]
473
+ else:
474
+ # Fallback: assign to first output if available
475
+ if step.outputs:
476
+ step_outputs[step.outputs[0]] = step_result.output
477
+ result.outputs[step.outputs[0]] = step_result.output
478
+
479
+ # Success!
480
+ result.finalize(success=True)
481
+
482
+ if debug:
483
+ pass
484
+
485
+ self._save_run(result)
486
+ self._save_pipeline_definition() # Save pipeline structure for scheduling
487
+ return result
488
+
489
+ def to_definition(self) -> dict:
490
+ """Serialize pipeline to definition for storage and reconstruction."""
491
+ if not self._built:
492
+ self.build()
493
+
494
+ return {
495
+ "name": self.name,
496
+ "steps": [
497
+ {
498
+ "name": step.name,
499
+ "inputs": step.inputs,
500
+ "outputs": step.outputs,
501
+ "source_code": step.source_code,
502
+ "tags": step.tags,
503
+ }
504
+ for step in self.steps
505
+ ],
506
+ "dag": {
507
+ "nodes": [
508
+ {
509
+ "name": node.name,
510
+ "inputs": node.inputs,
511
+ "outputs": node.outputs,
512
+ }
513
+ for node in self.dag.nodes.values()
514
+ ],
515
+ "edges": [
516
+ {"source": dep, "target": node_name} for node_name, deps in self.dag.edges.items() for dep in deps
517
+ ],
518
+ },
519
+ }
520
+
521
+ def _save_pipeline_definition(self) -> None:
522
+ """Save pipeline definition to metadata store for scheduling."""
523
+ try:
524
+ definition = self.to_definition()
525
+ self.metadata_store.save_pipeline_definition(self.name, definition)
526
+ except Exception as e:
527
+ # Don't fail the run if definition saving fails
528
+ print(f"Warning: Failed to save pipeline definition: {e}")
529
+
530
+ def _save_run(self, result: PipelineResult) -> None:
531
+ """Save run results to disk and metadata database."""
532
+ # Save to JSON file
533
+ run_file = self.runs_dir / f"{result.run_id}.json"
534
+ with open(run_file, "w") as f:
535
+ json.dump(result.to_dict(), f, indent=2)
536
+
537
+ # Serialize DAG structure for UI
538
+ dag_data = {
539
+ "nodes": [
540
+ {
541
+ "id": node.name,
542
+ "name": node.name,
543
+ "inputs": node.inputs,
544
+ "outputs": node.outputs,
545
+ }
546
+ for node in self.dag.nodes.values()
547
+ ],
548
+ "edges": [
549
+ {
550
+ "source": dep,
551
+ "target": node_name,
552
+ }
553
+ for node_name, deps in self.dag.edges.items()
554
+ for dep in deps
555
+ ],
556
+ }
557
+
558
+ # Collect step metadata including source code
559
+ steps_metadata = {}
560
+ for step in self.steps:
561
+ step_result = result.step_results.get(step.name)
562
+ steps_metadata[step.name] = {
563
+ "success": step_result.success if step_result else False,
564
+ "duration": step_result.duration_seconds if step_result else 0,
565
+ "cached": step_result.cached if step_result else False,
566
+ "retries": step_result.retries if step_result else 0,
567
+ "error": step_result.error if step_result else None,
568
+ "source_code": step.source_code,
569
+ "inputs": step.inputs,
570
+ "outputs": step.outputs,
571
+ "tags": step.tags,
572
+ "resources": step.resources.to_dict() if hasattr(step.resources, "to_dict") else step.resources,
573
+ }
574
+
575
+ # Save to metadata database for UI
576
+ metadata = {
577
+ "run_id": result.run_id,
578
+ "pipeline_name": result.pipeline_name,
579
+ "status": "completed" if result.success else "failed",
580
+ "start_time": result.start_time.isoformat(),
581
+ "end_time": result.end_time.isoformat() if result.end_time else None,
582
+ "duration": result.duration_seconds,
583
+ "success": result.success,
584
+ "context": self.context._params if hasattr(self.context, "_params") else {},
585
+ "steps": steps_metadata,
586
+ "dag": dag_data,
587
+ }
588
+ self.metadata_store.save_run(result.run_id, metadata)
589
+
590
+ # Save artifacts and metrics
591
+ for step_name, step_result in result.step_results.items():
592
+ if step_result.success and step_result.output is not None:
593
+ # Find step definition to get output names
594
+ step_def = next((s for s in self.steps if s.name == step_name), None)
595
+ output_names = step_def.outputs if step_def else []
596
+
597
+ # Normalize outputs to a dictionary
598
+ outputs_to_save = {}
599
+
600
+ # Case 1: Dictionary output (common for metrics)
601
+ if isinstance(step_result.output, dict):
602
+ # If step has defined outputs, try to map them
603
+ if output_names and len(output_names) == 1:
604
+ outputs_to_save[output_names[0]] = step_result.output
605
+ else:
606
+ # Otherwise treat keys as output names if they match, or just save whole dict
607
+ outputs_to_save[f"{step_name}_output"] = step_result.output
608
+
609
+ # Also save individual numeric values as metrics
610
+ for k, v in step_result.output.items():
611
+ if isinstance(v, (int, float)):
612
+ self.metadata_store.save_metric(result.run_id, k, float(v))
613
+
614
+ # Case 2: Tuple/List output matching output names
615
+ elif isinstance(step_result.output, (list, tuple)) and len(output_names) == len(step_result.output):
616
+ for name, val in zip(output_names, step_result.output, strict=False):
617
+ outputs_to_save[name] = val
618
+
619
+ # Case 3: Single output
620
+ else:
621
+ name = output_names[0] if output_names else f"{step_name}_output"
622
+ outputs_to_save[name] = step_result.output
623
+
624
+ # Save artifacts
625
+ for name, value in outputs_to_save.items():
626
+ artifact_id = f"{result.run_id}_{step_name}_{name}"
627
+
628
+ # Check if it's a flowyml Asset
629
+ is_asset = hasattr(value, "metadata") and hasattr(value, "data")
630
+
631
+ if is_asset:
632
+ # Handle flowyml Asset
633
+ asset_type = value.__class__.__name__
634
+ artifact_metadata = {
635
+ "artifact_id": artifact_id,
636
+ "name": value.name,
637
+ "type": asset_type,
638
+ "run_id": result.run_id,
639
+ "step": step_name,
640
+ "path": None,
641
+ "value": str(value.data)[:1000] if value.data else None,
642
+ "created_at": datetime.now().isoformat(),
643
+ "properties": self._sanitize_for_json(value.metadata.properties)
644
+ if hasattr(value.metadata, "properties")
645
+ else {},
646
+ }
647
+ self.metadata_store.save_artifact(artifact_id, artifact_metadata)
648
+
649
+ # Special handling for Metrics asset
650
+ if asset_type == "Metrics" and isinstance(value.data, dict):
651
+ for k, v in value.data.items():
652
+ if isinstance(v, (int, float)):
653
+ self.metadata_store.save_metric(result.run_id, k, float(v))
654
+ else:
655
+ # Handle standard Python objects
656
+ artifact_metadata = {
657
+ "artifact_id": artifact_id,
658
+ "name": name,
659
+ "type": type(value).__name__,
660
+ "run_id": result.run_id,
661
+ "step": step_name,
662
+ "path": str(value) if isinstance(value, (str, Path)) and len(str(value)) < 255 else None,
663
+ "value": str(value)[:1000], # Preview
664
+ "created_at": datetime.now().isoformat(),
665
+ }
666
+ self.metadata_store.save_artifact(artifact_id, artifact_metadata)
667
+
668
+ # Save single value metric if applicable
669
+ if isinstance(value, (int, float)):
670
+ self.metadata_store.save_metric(result.run_id, name, float(value))
671
+
672
+ def _sanitize_for_json(self, obj: Any) -> Any:
673
+ """Helper to make objects JSON serializable."""
674
+ if hasattr(obj, "id") and hasattr(obj, "name"): # Asset-like
675
+ return {"type": obj.__class__.__name__, "id": obj.id, "name": obj.name}
676
+ if isinstance(obj, dict):
677
+ return {k: self._sanitize_for_json(v) for k, v in obj.items()}
678
+ if isinstance(obj, (list, tuple)):
679
+ return [self._sanitize_for_json(v) for v in obj]
680
+ if isinstance(obj, (str, int, float, bool, type(None))):
681
+ return obj
682
+ return str(obj)
683
+
684
+ def cache_stats(self) -> dict[str, Any]:
685
+ """Get cache statistics."""
686
+ if self.cache_store:
687
+ return self.cache_store.stats()
688
+ return {}
689
+
690
+ def invalidate_cache(
691
+ self,
692
+ step: str | None = None,
693
+ before: str | None = None,
694
+ ) -> None:
695
+ """Invalidate cache entries.
696
+
697
+ Args:
698
+ step: Invalidate cache for specific step
699
+ before: Invalidate cache entries before date
700
+ """
701
+ if self.cache_store:
702
+ if step:
703
+ self.cache_store.invalidate(step_name=step)
704
+ else:
705
+ self.cache_store.clear()
706
+
707
+ def visualize(self) -> str:
708
+ """Generate pipeline visualization."""
709
+ if not self._built:
710
+ self.build()
711
+ return self.dag.visualize()
712
+
713
+ @classmethod
714
+ def from_definition(cls, definition: dict, context: Context | None = None) -> "Pipeline":
715
+ """Reconstruct pipeline from stored definition.
716
+
717
+ This creates a "ghost" pipeline that can be executed but uses
718
+ the stored step structure. Actual step logic must still be
719
+ available in the codebase.
720
+
721
+ Args:
722
+ definition: Pipeline definition from to_definition()
723
+ context: Optional context for execution
724
+
725
+ Returns:
726
+ Reconstructed Pipeline instance
727
+ """
728
+ from flowyml.core.step import step as step_decorator
729
+
730
+ # Create pipeline instance
731
+ pipeline = cls(
732
+ name=definition["name"],
733
+ context=context or Context(),
734
+ )
735
+
736
+ # Reconstruct steps
737
+ for step_def in definition["steps"]:
738
+ # Create a generic step function that can be called
739
+ # In a real implementation, we'd need to either:
740
+ # 1. Store serialized functions (using cloudpickle)
741
+ # 2. Import functions by name from codebase
742
+ # 3. Use placeholder functions
743
+
744
+ # For now, we'll create a placeholder that logs execution
745
+ def generic_step_func(*args, **kwargs):
746
+ """Generic step function for reconstructed pipeline."""
747
+ print(f"Executing reconstructed step with args={args}, kwargs={kwargs}")
748
+ return
749
+
750
+ # Apply step decorator with stored metadata
751
+ decorated = step_decorator(
752
+ name=step_def["name"],
753
+ inputs=step_def["inputs"],
754
+ outputs=step_def["outputs"],
755
+ tags=step_def.get("tags", []),
756
+ )(generic_step_func)
757
+
758
+ # Add to pipeline
759
+ pipeline.add_step(decorated)
760
+
761
+ return pipeline
762
+
763
+ def __repr__(self) -> str:
764
+ return f"Pipeline(name='{self.name}', steps={len(self.steps)})"