PyPI - flowyml - Versions diffs - 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

flowyml 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

flowyml/__init__.py +2 -1
flowyml/assets/featureset.py +30 -5
flowyml/assets/metrics.py +47 -4
flowyml/cli/main.py +397 -0
flowyml/cli/models.py +444 -0
flowyml/cli/rich_utils.py +95 -0
flowyml/core/checkpoint.py +6 -1
flowyml/core/conditional.py +104 -0
flowyml/core/display.py +595 -0
flowyml/core/executor.py +27 -6
flowyml/core/orchestrator.py +500 -7
flowyml/core/pipeline.py +447 -11
flowyml/core/project.py +4 -1
flowyml/core/scheduler.py +225 -81
flowyml/core/versioning.py +13 -4
flowyml/registry/model_registry.py +1 -1
flowyml/ui/frontend/dist/assets/index-By4trVyv.css +1 -0
flowyml/ui/frontend/dist/assets/{index-DF8dJaFL.js → index-CX5RV2C9.js} +118 -117
flowyml/ui/frontend/dist/index.html +2 -2
flowyml/ui/frontend/src/components/PipelineGraph.jsx +43 -4
flowyml/ui/server_manager.py +189 -0
flowyml/ui/utils.py +66 -2
flowyml/utils/config.py +7 -0
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/METADATA +5 -3
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/RECORD +28 -24
flowyml/ui/frontend/dist/assets/index-CBUXOWze.css +0 -1
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/WHEEL +0 -0
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/entry_points.txt +0 -0
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/licenses/LICENSE +0 -0

flowyml/core/orchestrator.py CHANGED Viewed

@@ -17,7 +17,9 @@ from flowyml.core.observability import get_metrics_collector
 from flowyml.core.retry_policy import with_retry
 if TYPE_CHECKING:
-    from flowyml.core.pipeline import Pipeline
+    from flowyml.core.pipeline import Pipeline, PipelineResult
+    from flowyml.core.executor import ExecutionResult
+    from flowyml.core.step import Step
 class LocalOrchestrator(Orchestrator):
@@ -89,21 +91,26 @@ class LocalOrchestrator(Orchestrator):
         # Get execution units (individual steps or groups)
         execution_units = get_execution_units(pipeline.dag, pipeline.steps)
+        # Check if we're resuming from checkpoint
+        resume_from_checkpoint = getattr(pipeline, "_resume_from_checkpoint", False)
+        completed_steps_from_checkpoint = getattr(pipeline, "_completed_steps_from_checkpoint", set())
+        checkpoint = getattr(pipeline, "_checkpoint", None)
         # Execute steps/groups in order
         for unit in execution_units:
             # Check if unit is a group or individual step
             if isinstance(unit, StepGroup):
                 # Execute entire group
+                # Show group execution start
+                if hasattr(pipeline, "_display") and pipeline._display:
+                    for step in unit.steps:
+                        pipeline._display.update_step_status(step_name=step.name, status="running")
-                # Get context parameters (use first step's function as representative)
-                first_step = unit.steps[0]
-                context_params = pipeline.context.inject_params(first_step.func)
-                # Execute the group
+                # Pass pipeline context so each step can get its own injected params
                 group_results = pipeline.executor.execute_step_group(
                     step_group=unit,
                     inputs=step_outputs,
-                    context_params=context_params,
+                    context=pipeline.context,  # Pass full context object
                     cache_store=pipeline.cache_store,
                     artifact_store=pipeline.stack.artifact_store if pipeline.stack else None,
                     run_id=run_id,
@@ -112,6 +119,16 @@ class LocalOrchestrator(Orchestrator):
                 # Process each step result
                 for step_result in group_results:
+                    # Update display
+                    if hasattr(pipeline, "_display") and pipeline._display:
+                        pipeline._display.update_step_status(
+                            step_name=step_result.step_name,
+                            status="success" if step_result.success else "failed",
+                            duration=step_result.duration_seconds,
+                            cached=step_result.cached,
+                            error=step_result.error,
+                        )
                     result.add_step_result(step_result)
                     # Handle failure
@@ -124,10 +141,76 @@ class LocalOrchestrator(Orchestrator):
                     if step_result.output is not None:
                         self._process_step_output(pipeline, step_result, step_outputs, result)
+                        # Save checkpoint after successful step
+                        checkpoint = getattr(pipeline, "_checkpoint", None)
+                        if checkpoint and step_result.success:
+                            try:
+                                # Save step outputs to checkpoint
+                                checkpoint.save_step_state(
+                                    step_name=step_result.step_name,
+                                    outputs=step_outputs,
+                                    metadata={
+                                        "duration": step_result.duration_seconds,
+                                        "cached": step_result.cached,
+                                    },
+                                )
+                            except Exception as e:
+                                # Don't fail pipeline if checkpoint save fails
+                                import warnings
+                                warnings.warn(
+                                    f"Failed to save checkpoint for step {step_result.step_name}: {e}",
+                                    stacklevel=2,
+                                )
+                    # Check for control flows that need to be evaluated after this step
+                    self._evaluate_control_flows(pipeline, step_result, step_outputs, result, run_id)
             else:
                 # Execute single ungrouped step
                 step = unit
+                # Skip step if already completed in checkpoint
+                if resume_from_checkpoint and step.name in completed_steps_from_checkpoint:
+                    if hasattr(pipeline, "_display") and pipeline._display:
+                        pipeline._display.update_step_status(
+                            step_name=step.name,
+                            status="success",
+                            cached=True,
+                        )
+                    # Load step outputs from checkpoint
+                    try:
+                        if checkpoint:
+                            step_outputs_from_checkpoint = checkpoint.load_step_state(step.name)
+                            # Process checkpoint outputs
+                            if isinstance(step_outputs_from_checkpoint, dict):
+                                for output_name, output_value in step_outputs_from_checkpoint.items():
+                                    step_outputs[output_name] = output_value
+                                    result.outputs[output_name] = output_value
+                            # Create a mock ExecutionResult for checkpointed step
+                            step_result = ExecutionResult(
+                                step_name=step.name,
+                                success=True,
+                                output=step_outputs_from_checkpoint,
+                                duration_seconds=0.0,
+                                cached=True,
+                            )
+                            result.add_step_result(step_result)
+                            # Continue to next step
+                            continue
+                    except Exception as e:
+                        # If checkpoint load fails, execute the step normally
+                        import warnings
+                        warnings.warn(
+                            f"Failed to load checkpoint for step {step.name}: {e}. Executing step normally.",
+                            stacklevel=2,
+                        )
                 # Prepare step inputs
                 step_inputs = {}
@@ -186,6 +269,10 @@ class LocalOrchestrator(Orchestrator):
                 # Get context parameters for this step
                 context_params = pipeline.context.inject_params(step.func)
+                # Update display - step starting
+                if hasattr(pipeline, "_display") and pipeline._display:
+                    pipeline._display.update_step_status(step_name=step.name, status="running")
                 # Run step start hooks
                 hooks.run_step_start_hooks(step, step_inputs)
@@ -203,6 +290,16 @@ class LocalOrchestrator(Orchestrator):
                 # Run step end hooks
                 hooks.run_step_end_hooks(step, step_result)
+                # Update display - step completed
+                if hasattr(pipeline, "_display") and pipeline._display:
+                    pipeline._display.update_step_status(
+                        step_name=step.name,
+                        status="success" if step_result.success else "failed",
+                        duration=step_result.duration_seconds,
+                        cached=step_result.cached,
+                        error=step_result.error,
+                    )
                 result.add_step_result(step_result)
                 # Handle failure
@@ -216,9 +313,48 @@ class LocalOrchestrator(Orchestrator):
                 if step_result.output is not None:
                     self._process_step_output(pipeline, step_result, step_outputs, result)
+                    # Save checkpoint after successful step
+                    checkpoint = getattr(pipeline, "_checkpoint", None)
+                    if checkpoint and step_result.success:
+                        try:
+                            # Save step outputs to checkpoint
+                            checkpoint.save_step_state(
+                                step_name=step.name,
+                                outputs=step_outputs,
+                                metadata={
+                                    "duration": step_result.duration_seconds,
+                                    "cached": step_result.cached,
+                                },
+                            )
+                        except Exception as e:
+                            # Don't fail pipeline if checkpoint save fails
+                            import warnings
+                            warnings.warn(f"Failed to save checkpoint for step {step.name}: {e}", stacklevel=2)
+                # Check for control flows that need to be evaluated after this step
+                self._evaluate_control_flows(pipeline, step_result, step_outputs, result, run_id)
         # Success! Finalize and return
         result.finalize(success=True)
+        # Save final checkpoint if checkpointing is enabled
+        checkpoint = getattr(pipeline, "_checkpoint", None)
+        if checkpoint and result.success:
+            try:
+                checkpoint.save_step_state(
+                    "pipeline_complete",
+                    result.outputs,
+                    metadata={
+                        "duration": result.duration_seconds,
+                        "success": True,
+                    },
+                )
+            except Exception as e:
+                import warnings
+                warnings.warn(f"Failed to save final checkpoint: {e}", stacklevel=2)
         # Run pipeline end hooks
         hooks.run_pipeline_end_hooks(pipeline, result)
@@ -230,6 +366,363 @@ class LocalOrchestrator(Orchestrator):
         pipeline._save_pipeline_definition()
         return result
+    def _evaluate_control_flows(
+        self,
+        pipeline: "Pipeline",
+        step_result: "ExecutionResult",
+        step_outputs: dict[str, Any],
+        result: "PipelineResult",
+        run_id: str,
+    ) -> None:
+        """Evaluate control flows after a step completes.
+        Args:
+            pipeline: Pipeline instance
+            step_result: Result of the step that just completed
+            step_outputs: Current step outputs dictionary
+            result: Pipeline result object
+            run_id: Run identifier
+        """
+        from flowyml.core.conditional import If
+        # Create a context object for condition evaluation
+        class ExecutionContext:
+            """Context object for conditional evaluation.
+            Provides access to step outputs via ctx.steps['step_name'].outputs['output_name']
+            """
+            def __init__(self, result: "PipelineResult", pipeline: "Pipeline"):
+                self.result = result
+                self.pipeline = pipeline
+                self._steps_cache = None
+            @property
+            def steps(self):
+                """Lazy-load steps dictionary with outputs."""
+                if self._steps_cache is None:
+                    self._steps_cache = {}
+                    # Build steps dictionary with outputs
+                    for step_name, step_res in self.result.step_results.items():
+                        if step_res.success and step_res.output is not None:
+                            step_def = next((s for s in self.pipeline.steps if s.name == step_name), None)
+                            if step_def:
+                                # Create step outputs dictionary
+                                step_outputs = {}
+                                if len(step_def.outputs) == 1:
+                                    step_outputs[step_def.outputs[0]] = step_res.output
+                                elif isinstance(step_res.output, dict):
+                                    step_outputs = step_res.output
+                                elif step_def.outputs:
+                                    # Try to map tuple/list outputs
+                                    if isinstance(step_res.output, (list, tuple)) and len(step_res.output) == len(
+                                        step_def.outputs,
+                                    ):
+                                        for name, val in zip(step_def.outputs, step_res.output, strict=False):
+                                            step_outputs[name] = val
+                                    else:
+                                        step_outputs[step_def.outputs[0]] = step_res.output
+                                # Create step object with outputs attribute that supports Asset objects
+                                class StepContext:
+                                    def __init__(self, outputs):
+                                        # Wrap outputs to support Asset object access
+                                        self._raw_outputs = outputs
+                                        self.outputs = self._wrap_outputs(outputs)
+                                    def _wrap_outputs(self, outputs):
+                                        """Wrap outputs to support Asset object property access."""
+                                        wrapped = {}
+                                        for key, value in outputs.items():
+                                            wrapped[key] = self._wrap_asset(value)
+                                        return wrapped
+                                    def _wrap_asset(self, value):
+                                        """Wrap Asset objects to expose their properties."""
+                                        # Check if it's an Asset object
+                                        from flowyml.assets.base import Asset
+                                        from flowyml.assets.metrics import Metrics
+                                        from flowyml.assets.featureset import FeatureSet
+                                        if isinstance(value, Asset):
+                                            # Create a wrapper that exposes Asset properties
+                                            class AssetWrapper:
+                                                def __init__(self, asset):
+                                                    self._asset = asset
+                                                    # Expose the asset itself
+                                                    self._self = asset
+                                                def __getattr__(self, name):  # noqa: B023
+                                                    # Handle special cases FIRST before generic hasattr check
+                                                    # This is important because Asset has a 'metadata' attribute
+                                                    # that is an AssetMetadata dataclass, not a dict
+                                                    # Expose metadata as a merged dict of properties + tags
+                                                    # This MUST come before hasattr check because Asset.metadata
+                                                    # is an AssetMetadata dataclass, not a dict
+                                                    if name == "metadata":  # noqa: B023
+                                                        # Create a dict that merges properties and tags
+                                                        # Tags take precedence if there's a conflict
+                                                        metadata_dict = (
+                                                            dict(self._asset.properties)
+                                                            if self._asset.properties
+                                                            else {}
+                                                        )
+                                                        if self._asset.tags:
+                                                            metadata_dict.update(self._asset.tags)
+                                                        return metadata_dict
+                                                    # For Metrics, map .metrics to .data or .get_all_metrics()
+                                                    if isinstance(self._asset, Metrics):
+                                                        if name == "metrics":  # noqa: B023
+                                                            return (
+                                                                self._asset.get_all_metrics() or self._asset.data or {}  # noqa: B023
+                                                            )
+                                                    # Now try to get from asset (handles all Asset properties)
+                                                    try:
+                                                        if hasattr(self._asset, name):  # noqa: B023
+                                                            attr = getattr(self._asset, name)  # noqa: B023
+                                                            # If it's a property/method, return it
+                                                            # If it's callable but we want the value, call it
+                                                            if callable(attr) and not isinstance(attr, type):
+                                                                # It's a method, not a property - return as-is
+                                                                return attr
+                                                            return attr
+                                                    except Exception as e:
+                                                        # If accessing the attribute fails, log and continue to fallback logic
+                                                        # This can happen if a property raises an exception
+                                                        import warnings
+                                                        warnings.warn(
+                                                            f"Failed to access attribute '{name}' on {type(self._asset).__name__}: {e}",  # noqa: B023
+                                                            stacklevel=3,
+                                                        )
+                                                        pass
+                                                    # Fallback: expose common properties/tags/data
+                                                    if name == "data":  # noqa: B023
+                                                        return self._asset.data
+                                                    if name == "properties":  # noqa: B023
+                                                        return self._asset.properties
+                                                    if name == "tags":  # noqa: B023
+                                                        return self._asset.tags
+                                                    raise AttributeError(  # noqa: B023
+                                                        f"'{type(self).__name__}' object has no attribute '{name}'",  # noqa: B023
+                                                    )
+                                                def __getitem__(self, key):
+                                                    """Allow dict-like access for Metrics.data and Asset.data."""
+                                                    # For Metrics, access via get_all_metrics()
+                                                    if isinstance(self._asset, Metrics):
+                                                        metrics = (
+                                                            self._asset.get_all_metrics() or self._asset.data or {}
+                                                        )
+                                                        if isinstance(metrics, dict):
+                                                            return metrics[key]  # noqa: B023
+                                                    # For all Assets, allow dict access to .data if it's a dict
+                                                    if isinstance(self._asset.data, dict):
+                                                        return self._asset.data[key]
+                                                    # For FeatureSet, allow access to statistics
+                                                    if isinstance(self._asset, FeatureSet):
+                                                        if key in self._asset.statistics:
+                                                            return self._asset.statistics[key]
+                                                    raise KeyError(f"'{key}' not found in {type(self._asset).__name__}")
+                                                def __contains__(self, key):
+                                                    """Support 'in' operator."""
+                                                    # For Metrics, check in metrics dict
+                                                    if isinstance(self._asset, Metrics):
+                                                        metrics = (
+                                                            self._asset.get_all_metrics() or self._asset.data or {}
+                                                        )
+                                                        if isinstance(metrics, dict):
+                                                            return key in metrics
+                                                    # For all Assets, check in .data if it's a dict
+                                                    if isinstance(self._asset.data, dict):
+                                                        return key in self._asset.data
+                                                    # For FeatureSet, check in statistics
+                                                    if isinstance(self._asset, FeatureSet):
+                                                        return key in self._asset.statistics
+                                                    return False
+                                                def __repr__(self):
+                                                    return f"<AssetWrapper({type(self._asset).__name__})>"
+                                            return AssetWrapper(value)
+                                        # For dict values, return as-is but allow attribute access
+                                        elif isinstance(value, dict):
+                                            class DictWrapper(dict):
+                                                """Dict wrapper that allows attribute access."""
+                                                def __getattr__(self, name):  # noqa: B023
+                                                    if name in self:  # noqa: B023
+                                                        return self[name]  # noqa: B023
+                                                    raise AttributeError(  # noqa: B023
+                                                        f"'{type(self).__name__}' object has no attribute '{name}'",  # noqa: B023
+                                                    )
+                                            return DictWrapper(value)
+                                        # For other types, return as-is
+                                        return value
+                                self._steps_cache[step_name] = StepContext(step_outputs)
+                return self._steps_cache
+        context = ExecutionContext(result, pipeline)
+        # Evaluate each control flow
+        for control_flow in pipeline.control_flows:
+            if isinstance(control_flow, If):
+                try:
+                    selected_step = control_flow.evaluate(context)
+                except Exception as e:
+                    # If condition evaluation fails, log the error with full traceback for debugging
+                    import warnings
+                    import traceback
+                    warnings.warn(
+                        f"Failed to evaluate control flow condition: {e}\n{traceback.format_exc()}",
+                        stacklevel=2,
+                    )
+                    # If condition evaluation fails, try to execute else_step as fallback
+                    # This ensures we don't silently skip execution
+                    selected_step = control_flow.else_step
+                # Execute selected_step if it exists (could be then_step, else_step, or None)
+                if selected_step:
+                    from flowyml.core.step import Step
+                    # Check if selected_step is already a Step object or a function
+                    if isinstance(selected_step, Step):
+                        # Already a Step object (e.g., from @step decorator), use it directly
+                        step_obj = selected_step
+                    else:
+                        # It's a function, try to find existing Step in pipeline.steps
+                        # or check if any step has this function
+                        step_obj = next((s for s in pipeline.steps if s.func == selected_step), None)
+                        # If step not found in pipeline.steps, it's a conditional step - create Step object on the fly
+                        if step_obj is None:
+                            # Get function name safely
+                            func_name = getattr(selected_step, "__name__", "conditional_step")
+                            # Create a Step object for the conditional step function
+                            step_obj = Step(
+                                func=selected_step,
+                                name=func_name,
+                                inputs=[],  # Conditional steps may not have explicit inputs
+                                outputs=[],  # Conditional steps may not have explicit outputs
+                            )
+                    if step_obj.name not in result.step_results:
+                        # Execute the selected step
+                        # The check above prevents re-execution of the same step
+                        self._execute_conditional_step(
+                            pipeline,
+                            step_obj,
+                            step_outputs,
+                            result,
+                            run_id,
+                        )
+                        # Note: Control flows will be re-evaluated after conditional step completes
+    def _execute_conditional_step(
+        self,
+        pipeline: "Pipeline",
+        step: "Step",
+        step_outputs: dict[str, Any],
+        result: "PipelineResult",
+        run_id: str,
+    ) -> None:
+        """Execute a step that was selected by conditional logic.
+        Args:
+            pipeline: Pipeline instance
+            step: Step to execute
+            step_outputs: Current step outputs
+            result: Pipeline result object
+            run_id: Run identifier
+        """
+        # Prepare step inputs (similar to regular step execution)
+        import inspect
+        step_inputs = {}
+        sig = inspect.signature(step.func)
+        params = [p for p in sig.parameters.values() if p.name not in ("self", "cls")]
+        for param in params:
+            if param.name in step_outputs:
+                step_inputs[param.name] = step_outputs[param.name]
+        # Get context parameters
+        context_params = pipeline.context.inject_params(step.func)
+        # Update display
+        if hasattr(pipeline, "_display") and pipeline._display:
+            pipeline._display.update_step_status(step_name=step.name, status="running")
+        # Execute step
+        step_result = pipeline.executor.execute_step(
+            step,
+            step_inputs,
+            context_params,
+            pipeline.cache_store,
+            artifact_store=pipeline.stack.artifact_store if pipeline.stack else None,
+            run_id=run_id,
+            project_name=pipeline.name,
+        )
+        # Update display
+        if hasattr(pipeline, "_display") and pipeline._display:
+            pipeline._display.update_step_status(
+                step_name=step.name,
+                status="success" if step_result.success else "failed",
+                duration=step_result.duration_seconds,
+                cached=step_result.cached,
+                error=step_result.error,
+            )
+        result.add_step_result(step_result)
+        # Handle failure
+        if not step_result.success:
+            result.finalize(success=False)
+            return
+        # Process outputs
+        if step_result.output is not None:
+            self._process_step_output(pipeline, step_result, step_outputs, result)
+            # Save checkpoint after successful conditional step
+            checkpoint = getattr(pipeline, "_checkpoint", None)
+            if checkpoint and step_result.success:
+                try:
+                    checkpoint.save_step_state(
+                        step_name=step.name,
+                        outputs=step_outputs,
+                        metadata={
+                            "duration": step_result.duration_seconds,
+                            "cached": step_result.cached,
+                        },
+                    )
+                except Exception as e:
+                    import warnings
+                    warnings.warn(f"Failed to save checkpoint for conditional step {step.name}: {e}", stacklevel=2)
+        # Check for control flows that need to be evaluated after conditional step
+        self._evaluate_control_flows(pipeline, step_result, step_outputs, result, run_id)
     def _process_step_output(self, pipeline, step_result, step_outputs, result):
         """Helper to process step outputs and update state."""
         from pathlib import Path

flowyml 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

flowyml 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl