PyPI - flowyml - Versions diffs - 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

flowyml 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

flowyml/__init__.py +2 -1
flowyml/assets/featureset.py +30 -5
flowyml/assets/metrics.py +47 -4
flowyml/cli/main.py +397 -0
flowyml/cli/models.py +444 -0
flowyml/cli/rich_utils.py +95 -0
flowyml/core/checkpoint.py +6 -1
flowyml/core/conditional.py +104 -0
flowyml/core/display.py +595 -0
flowyml/core/executor.py +27 -6
flowyml/core/orchestrator.py +500 -7
flowyml/core/pipeline.py +447 -11
flowyml/core/project.py +4 -1
flowyml/core/scheduler.py +225 -81
flowyml/core/versioning.py +13 -4
flowyml/registry/model_registry.py +1 -1
flowyml/ui/frontend/dist/assets/index-By4trVyv.css +1 -0
flowyml/ui/frontend/dist/assets/{index-DF8dJaFL.js → index-CX5RV2C9.js} +118 -117
flowyml/ui/frontend/dist/index.html +2 -2
flowyml/ui/frontend/src/components/PipelineGraph.jsx +43 -4
flowyml/ui/server_manager.py +189 -0
flowyml/ui/utils.py +66 -2
flowyml/utils/config.py +7 -0
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/METADATA +5 -3
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/RECORD +28 -24
flowyml/ui/frontend/dist/assets/index-CBUXOWze.css +0 -1
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/WHEEL +0 -0
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/entry_points.txt +0 -0
{flowyml-1.5.0.dist-info → flowyml-1.7.0.dist-info}/licenses/LICENSE +0 -0

flowyml/core/pipeline.py CHANGED Viewed

@@ -134,17 +134,53 @@ class Pipeline:
         >>> pipeline = Pipeline("my_pipeline", context=ctx)
         >>> pipeline.add_step(train)
         >>> result = pipeline.run()
+        # With project_name, automatically creates/attaches to project
+        >>> pipeline = Pipeline("my_pipeline", context=ctx, project_name="ml_project")
+        # With version parameter, automatically creates VersionedPipeline
+        >>> pipeline = Pipeline("my_pipeline", context=ctx, version="v1.0.1", project_name="ml_project")
     """
+    def __new__(
+        cls,
+        name: str,
+        version: str | None = None,
+        project_name: str | None = None,
+        project: str | None = None,  # For backward compatibility
+        **kwargs,
+    ):
+        """Create a Pipeline or VersionedPipeline instance.
+        If version is provided, automatically returns a VersionedPipeline instance.
+        Otherwise, returns a regular Pipeline instance.
+        """
+        if version is not None:
+            from flowyml.core.versioning import VersionedPipeline
+            # Pass project_name or project to VersionedPipeline
+            vp_kwargs = kwargs.copy()
+            if project_name:
+                vp_kwargs["project_name"] = project_name
+            elif project:
+                vp_kwargs["project"] = project
+            return VersionedPipeline(name=name, version=version, **vp_kwargs)
+        return super().__new__(cls)
     def __init__(
         self,
         name: str,
         context: Context | None = None,
         executor: Executor | None = None,
         enable_cache: bool = True,
+        enable_checkpointing: bool | None = None,  # None means use config default
+        enable_experiment_tracking: bool | None = None,  # None means use config default (True)
         cache_dir: str | None = None,
         stack: Any | None = None,  # Stack instance
-        project: str | None = None,  # Project name to attach to
+        project: str | None = None,  # Project name to attach to (deprecated, use project_name)
+        project_name: str | None = None,  # Project name to attach to (creates if doesn't exist)
+        version: str | None = None,  # If provided, VersionedPipeline is created via __new__
+        **kwargs,
     ):
         """Initialize pipeline.
@@ -153,13 +189,35 @@ class Pipeline:
             context: Optional context for parameter injection
             executor: Optional executor (defaults to LocalExecutor)
             enable_cache: Whether to enable caching
+            enable_checkpointing: Whether to enable checkpointing (defaults to config setting, True by default)
+            enable_experiment_tracking: Whether to enable automatic experiment tracking (defaults to config.auto_log_metrics, True by default)
             cache_dir: Optional directory for cache
             stack: Optional stack instance to run on
-            project: Optional project name to attach this pipeline to.
+            project: Optional project name to attach this pipeline to (deprecated, use project_name)
+            project_name: Optional project name to attach this pipeline to.
+                If the project doesn't exist, it will be created automatically.
+            version: Optional version string. If provided, a VersionedPipeline
+                instance will be created instead of a regular Pipeline.
+            **kwargs: Additional keyword arguments passed to the pipeline.
+                instance is automatically created instead of a regular Pipeline.
         """
+        from flowyml.utils.config import get_config
         self.name = name
         self.context = context or Context()
         self.enable_cache = enable_cache
+        # Set checkpointing (use config default if not specified)
+        config = get_config()
+        self.enable_checkpointing = (
+            enable_checkpointing if enable_checkpointing is not None else config.enable_checkpointing
+        )
+        # Set experiment tracking (use config default if not specified, default: True)
+        # Can be set via enable_experiment_tracking parameter or defaults to config.auto_log_metrics
+        self.enable_experiment_tracking = (
+            enable_experiment_tracking if enable_experiment_tracking is not None else config.auto_log_metrics
+        )
         self.stack = None  # Will be assigned via _apply_stack
         self._stack_locked = stack is not None
         self._provided_executor = executor
@@ -182,23 +240,28 @@ class Pipeline:
         # Initialize components from stack or defaults
         self.executor = executor or LocalExecutor()
-        # Metadata store for UI integration
+        # Metadata store for UI integration - use same store as UI
         from flowyml.storage.metadata import SQLiteMetadataStore
+        from flowyml.utils.config import get_config
-        self.metadata_store = SQLiteMetadataStore()
+        config = get_config()
+        # Use the same metadata database path as the UI to ensure visibility
+        self.metadata_store = SQLiteMetadataStore(db_path=str(config.metadata_db))
         if stack:
             self._apply_stack(stack, locked=True)
         # Handle Project Attachment
-        if project:
+        # Support both project_name (preferred) and project (for backward compatibility)
+        project_to_use = project_name or project
+        if project_to_use:
             from flowyml.core.project import ProjectManager
             manager = ProjectManager()
             # Get or create project
-            proj = manager.get_project(project)
+            proj = manager.get_project(project_to_use)
             if not proj:
-                proj = manager.create_project(project)
+                proj = manager.create_project(project_to_use)
             # Configure pipeline with project settings
             self.runs_dir = proj.runs_dir
@@ -209,9 +272,15 @@ class Pipeline:
                 proj.metadata["pipelines"].append(name)
                 proj._save_metadata()
+            # Store project name for later use (e.g., in _save_run)
+            self.project_name = project_to_use
+        else:
+            self.project_name = None
         # State
         self._built = False
         self.step_groups: list[Any] = []  # Will hold StepGroup objects
+        self.control_flows: list[Any] = []  # Store conditional control flows (If, Switch, etc.)
     def _apply_stack(self, stack: Any | None, locked: bool) -> None:
         """Attach a stack and update executors/metadata."""
@@ -238,6 +307,32 @@ class Pipeline:
         self._built = False
         return self
+    def add_control_flow(self, control_flow: Any) -> "Pipeline":
+        """Add conditional control flow to the pipeline.
+        Args:
+            control_flow: Control flow object (If, Switch, etc.)
+        Returns:
+            Self for chaining
+        Example:
+            ```python
+            from flowyml import If
+            pipeline.add_control_flow(
+                If(
+                    condition=lambda ctx: ctx.steps["evaluate_model"].outputs["accuracy"] > 0.9,
+                    then_step=deploy_model,
+                    else_step=retrain_model,
+                )
+            )
+            ```
+        """
+        self.control_flows.append(control_flow)
+        self._built = False
+        return self
     def build(self) -> None:
         """Build the execution DAG."""
         if self._built:
@@ -277,9 +372,11 @@ class Pipeline:
         inputs: dict[str, Any] | None = None,
         debug: bool = False,
         stack: Any | None = None,  # Stack override
+        orchestrator: Any | None = None,  # Orchestrator override (takes precedence over stack orchestrator)
         resources: Any | None = None,  # ResourceConfig
         docker_config: Any | None = None,  # DockerConfig
         context: dict[str, Any] | None = None,  # Context vars override
+        auto_start_ui: bool = True,  # Auto-start UI server
         **kwargs,
     ) -> PipelineResult:
         """Execute the pipeline.
@@ -287,19 +384,76 @@ class Pipeline:
         Args:
             inputs: Optional input data for the pipeline
             debug: Enable debug mode with detailed logging
-            stack: Stack override (uses self.stack if not provided)
+            stack: Stack override (uses self.stack or active stack if not provided)
+            orchestrator: Orchestrator override (takes precedence over stack orchestrator)
             resources: Resource configuration for execution
             docker_config: Docker configuration for containerized execution
             context: Context variables override
+            auto_start_ui: Automatically start UI server if not running and display URL
             **kwargs: Additional arguments passed to the orchestrator
+        Note:
+            The orchestrator is determined in this priority order:
+            1. Explicit `orchestrator` parameter (if provided)
+            2. Stack's orchestrator (if stack is set/active)
+            3. Default LocalOrchestrator
+            When using a stack (e.g., GCPStack), the stack's orchestrator is automatically
+            used unless explicitly overridden. This is the recommended approach for
+            production deployments.
         Returns:
             PipelineResult with outputs and execution info
         """
         import uuid
         from flowyml.core.orchestrator import LocalOrchestrator
+        from flowyml.core.checkpoint import PipelineCheckpoint
+        from flowyml.utils.config import get_config
-        run_id = str(uuid.uuid4())
+        # Generate or use provided run_id
+        run_id = kwargs.pop("run_id", None) or str(uuid.uuid4())
+        # Initialize checkpointing if enabled
+        if self.enable_checkpointing:
+            config = get_config()
+            checkpoint = PipelineCheckpoint(
+                run_id=run_id,
+                checkpoint_dir=str(config.checkpoint_dir),
+            )
+            # Check if we should resume from checkpoint
+            if checkpoint.exists():
+                checkpoint_data = checkpoint.load()
+                completed_steps = checkpoint_data.get("completed_steps", [])
+                if completed_steps:
+                    # Auto-resume: use checkpoint state
+                    if hasattr(self, "_display") and self._display:
+                        self._display.console.print(
+                            f"[yellow]📦 Resuming from checkpoint: {len(completed_steps)} steps already completed[/yellow]",
+                        )
+                    # Store checkpoint info for orchestrator
+                    self._checkpoint = checkpoint
+                    self._resume_from_checkpoint = True
+                    self._completed_steps_from_checkpoint = set(completed_steps)
+                else:
+                    self._checkpoint = checkpoint
+                    self._resume_from_checkpoint = False
+                    self._completed_steps_from_checkpoint = set()
+            else:
+                self._checkpoint = checkpoint
+                self._resume_from_checkpoint = False
+                self._completed_steps_from_checkpoint = set()
+        else:
+            self._checkpoint = None
+            self._resume_from_checkpoint = False
+            self._completed_steps_from_checkpoint = set()
+        # Auto-start UI server if requested
+        ui_url = None
+        run_url = None
+        ui_start_failed = False
+        if auto_start_ui:
+            ui_url, run_url, ui_start_failed = self._ensure_ui_server(run_id)
         # Determine stack for this run
         if stack is not None:
@@ -316,9 +470,12 @@ class Pipeline:
                 self._apply_stack(active_stack, locked=False)
         # Determine orchestrator
-        orchestrator = getattr(self.stack, "orchestrator", None) if self.stack else None
+        # Priority: 1) Explicit orchestrator parameter, 2) Stack orchestrator, 3) Default LocalOrchestrator
         if orchestrator is None:
-            orchestrator = LocalOrchestrator()
+            # Use orchestrator from stack if available
+            orchestrator = getattr(self.stack, "orchestrator", None) if self.stack else None
+            if orchestrator is None:
+                orchestrator = LocalOrchestrator()
         # Update context with provided values
         if context:
@@ -331,6 +488,28 @@ class Pipeline:
         resource_config = self._coerce_resource_config(resources)
         docker_cfg = self._coerce_docker_config(docker_config)
+        # Initialize display system for beautiful CLI output
+        display = None
+        try:
+            from flowyml.core.display import PipelineDisplay
+            display = PipelineDisplay(
+                pipeline_name=self.name,
+                steps=self.steps,
+                dag=self.dag,
+                verbose=True,
+                ui_url=ui_url,  # Pass UI URL for prominent display at start
+                run_url=run_url,  # Pass run-specific URL for clickable link
+            )
+            display.show_header()
+            display.show_execution_start()
+        except Exception:
+            # Silently fail if display system not available
+            pass
+        # Store display on pipeline for orchestrator to use
+        self._display = display
         # Run the pipeline via orchestrator
         result = orchestrator.run_pipeline(
             self,
@@ -342,6 +521,10 @@ class Pipeline:
             **kwargs,
         )
+        # Show summary (only if result is a PipelineResult, not a string)
+        if display and not isinstance(result, str):
+            display.show_summary(result, ui_url=ui_url, run_url=run_url)
         # If result is just a job ID (remote execution), wrap it in a basic result
         if isinstance(result, str):
             # Create a submitted result wrapper
@@ -352,6 +535,10 @@ class Pipeline:
             self._save_pipeline_definition()
             return wrapper
+        # Ensure result has configs attached (in case orchestrator didn't do it)
+        if hasattr(result, "attach_configs") and not hasattr(result, "resource_config"):
+            result.attach_configs(resource_config, docker_cfg)
         return result
     def to_definition(self) -> dict:
@@ -368,6 +555,7 @@ class Pipeline:
                     "outputs": step.outputs,
                     "source_code": step.source_code,
                     "tags": step.tags,
+                    "execution_group": step.execution_group,
                 }
                 for step in self.steps
             ],
@@ -395,6 +583,163 @@ class Pipeline:
             # Don't fail the run if definition saving fails
             print(f"Warning: Failed to save pipeline definition: {e}")
+    def _ensure_ui_server(self, run_id: str) -> tuple[str | None, str | None, bool]:
+        """Ensure UI server is running, start it if needed, or show guidance.
+        Args:
+            run_id: The run ID for generating the run URL
+        Returns:
+            Tuple of (ui_url, run_url, start_failed)
+            - ui_url: Base URL of the UI server if running
+            - run_url: URL to view this specific run if server is running
+            - start_failed: True if we tried to start and failed (show guidance)
+        """
+        import subprocess
+        import sys
+        import time
+        from pathlib import Path
+        try:
+            from flowyml.ui.utils import is_ui_running, get_ui_host_port
+        except ImportError:
+            return None, None, False
+        host, port = get_ui_host_port()
+        url = f"http://{host}:{port}"
+        # Check if already running
+        if is_ui_running(host, port):
+            return url, f"{url}/runs/{run_id}", False
+        # Try to start the UI server as a background subprocess
+        try:
+            # Check if uvicorn is available
+            try:
+                import uvicorn  # noqa: F401
+            except ImportError:
+                # uvicorn not installed, show guidance but don't fail
+                self._show_ui_guidance(host, port, reason="missing_deps")
+                return None, None, True
+            # Start uvicorn as a background process
+            cmd = [
+                sys.executable,
+                "-m",
+                "uvicorn",
+                "flowyml.ui.backend.main:app",
+                "--host",
+                host,
+                "--port",
+                str(port),
+                "--log-level",
+                "warning",
+            ]
+            # Start as detached background process
+            if sys.platform == "win32":
+                process = subprocess.Popen(
+                    cmd,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.DETACHED_PROCESS,
+                )
+            else:
+                process = subprocess.Popen(
+                    cmd,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    start_new_session=True,
+                )
+            # Wait for server to start (up to 8 seconds)
+            started = False
+            for _ in range(80):
+                time.sleep(0.1)
+                if is_ui_running(host, port):
+                    started = True
+                    break
+            if started:
+                # Save PID for later stop command
+                pid_file = Path.home() / ".flowyml" / "ui_server.pid"
+                pid_file.parent.mkdir(parents=True, exist_ok=True)
+                pid_file.write_text(f"{process.pid}\n{host}\n{port}")
+                return url, f"{url}/runs/{run_id}", False
+            else:
+                # Server didn't start, kill the process and show guidance
+                process.terminate()
+                self._show_ui_guidance(host, port, reason="start_failed")
+                return None, None, True
+        except Exception:
+            # Show guidance on failure
+            self._show_ui_guidance(host, port, reason="error")
+            return None, None, True
+    def _show_ui_guidance(self, host: str, port: int, reason: str = "not_running") -> None:
+        """Show a helpful message guiding the user to start the UI server.
+        Args:
+            host: Host the server should run on
+            port: Port the server should run on
+            reason: Why we're showing guidance (not_running, missing_deps, start_failed, error)
+        """
+        try:
+            from rich.console import Console
+            from rich.panel import Panel
+            from rich.text import Text
+            from rich import box
+            console = Console()
+            content = Text()
+            content.append("💡 ", style="yellow")
+            content.append("Want to see your pipeline run in a live dashboard?\n\n", style="bold")
+            if reason == "missing_deps":
+                content.append("UI dependencies not installed. ", style="dim")
+                content.append("Install with:\n", style="")
+                content.append("  pip install uvicorn fastapi\n\n", style="bold cyan")
+            content.append("Start the dashboard with:\n", style="")
+            content.append("  flowyml go", style="bold green")
+            if port != 8080:
+                content.append(f" --port {port}", style="bold green")
+            content.append("\n\n", style="")
+            content.append("Then run your pipeline again to see it in the UI!", style="dim")
+            console.print()
+            console.print(
+                Panel(
+                    content,
+                    title="[bold cyan]🌐 Dashboard Available[/bold cyan]",
+                    border_style="yellow",
+                    box=box.ROUNDED,
+                ),
+            )
+            console.print()
+        except ImportError:
+            # Fallback to simple print
+            print()
+            print("=" * 60)
+            print("💡 Want to see your pipeline run in a live dashboard?")
+            print()
+            if reason == "missing_deps":
+                print("   UI dependencies not installed. Install with:")
+                print("     pip install uvicorn fastapi")
+                print()
+            print("   Start the dashboard with:")
+            print("     flowyml go" + (f" --port {port}" if port != 8080 else ""))
+            print()
+            print("   Then run your pipeline again to see it in the UI!")
+            print("=" * 60)
+            print()
     def _coerce_resource_config(self, resources: Any | None):
         """Convert resources input to ResourceConfig if necessary."""
         if resources is None:
@@ -425,6 +770,92 @@ class Pipeline:
             return DockerConfig(**docker_config)
         return docker_config
+    def _log_experiment_metrics(self, result: PipelineResult) -> None:
+        """Automatically log Metrics to experiment tracking.
+        Extracts Metrics objects from pipeline outputs and logs them along with
+        context parameters to the experiment tracking system.
+        This is called automatically after each pipeline run if experiment tracking is enabled.
+        """
+        from flowyml.utils.config import get_config
+        from flowyml.assets.metrics import Metrics
+        config = get_config()
+        # Check if experiment tracking is enabled (default: True)
+        # Can be disabled globally via config or per-pipeline via enable_experiment_tracking
+        enable_tracking = getattr(self, "enable_experiment_tracking", None)
+        if enable_tracking is None:
+            enable_tracking = getattr(config, "auto_log_metrics", True)
+        if not enable_tracking:
+            return
+        # Extract all Metrics from pipeline outputs
+        all_metrics = {}
+        for output_name, output_value in result.outputs.items():
+            if isinstance(output_value, Metrics):
+                # Extract metrics from Metrics object
+                metrics_dict = output_value.get_all_metrics() or output_value.data or {}
+                # Use output name as prefix to avoid conflicts, but simplify if output is "metrics"
+                for key, value in metrics_dict.items():
+                    if output_name == "metrics" or output_name.endswith("/metrics"):
+                        # Use metric key directly for cleaner names
+                        all_metrics[key] = value
+                    else:
+                        # Prefix with output name to avoid conflicts
+                        all_metrics[f"{output_name}.{key}"] = value
+            elif isinstance(output_value, dict):
+                # Check if dict contains Metrics objects
+                for key, val in output_value.items():
+                    if isinstance(val, Metrics):
+                        metrics_dict = val.get_all_metrics() or val.data or {}
+                        for mkey, mval in metrics_dict.items():
+                            all_metrics[f"{key}.{mkey}"] = mval
+        # Extract context parameters
+        context_params = {}
+        if self.context:
+            # Get all context parameters using to_dict() method
+            context_params = self.context.to_dict()
+        # Only log if we have metrics or parameters
+        if all_metrics or context_params:
+            try:
+                from flowyml.tracking.experiment import Experiment
+                from flowyml.tracking.runs import Run
+                # Create or get experiment (use pipeline name as experiment name)
+                experiment_name = self.name
+                experiment = Experiment(
+                    name=experiment_name,
+                    description=f"Auto-tracked experiment for pipeline: {self.name}",
+                )
+                # Log run to experiment
+                experiment.log_run(
+                    run_id=result.run_id,
+                    metrics=all_metrics,
+                    parameters=context_params,
+                )
+                # Also create/update Run object for compatibility
+                run = Run(
+                    run_id=result.run_id,
+                    pipeline_name=self.name,
+                    parameters=context_params,
+                )
+                if all_metrics:
+                    run.log_metrics(all_metrics)
+                run.complete(status="success" if result.success else "failed")
+            except Exception as e:
+                # Don't fail pipeline if experiment logging fails
+                import warnings
+                warnings.warn(f"Failed to log experiment metrics: {e}", stacklevel=2)
     def _save_run(self, result: PipelineResult) -> None:
         """Save run results to disk and metadata database."""
         # Save to JSON file
@@ -467,6 +898,7 @@ class Pipeline:
                 "inputs": step.inputs,
                 "outputs": step.outputs,
                 "tags": step.tags,
+                "execution_group": step.execution_group,
                 "resources": step.resources.to_dict() if hasattr(step.resources, "to_dict") else step.resources,
             }
@@ -489,9 +921,13 @@ class Pipeline:
             if hasattr(result.docker_config, "to_dict")
             else result.docker_config,
             "remote_job_id": result.remote_job_id,
+            "project": getattr(self, "project_name", None),  # Include project for stats tracking
         }
         self.metadata_store.save_run(result.run_id, metadata)
+        # Automatic experiment tracking: Extract Metrics and log to experiments
+        self._log_experiment_metrics(result)
         # Save artifacts and metrics
         for step_name, step_result in result.step_results.items():
             if step_result.success and step_result.output is not None:

flowyml/core/project.py CHANGED Viewed

@@ -107,6 +107,9 @@ class Project:
         # Use project metadata store
         pipeline.metadata_store = self.metadata_store
+        # Set project name on pipeline for stats tracking
+        pipeline.project_name = self.name
         # Register pipeline
         if name not in self.metadata["pipelines"]:
             self.metadata["pipelines"].append(name)
@@ -161,7 +164,7 @@ class Project:
     def get_stats(self) -> dict[str, Any]:
         """Get project statistics."""
-        stats = self.metadata_store.get_statistics()
+        stats = self.metadata_store.get_statistics(project=self.name)
         stats["project_name"] = self.name
         stats["pipelines"] = len(self.metadata["pipelines"])
         return stats

flowyml 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

flowyml 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl