PyPI - flowyml - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

flowyml 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

flowyml/core/execution_status.py +1 -0
flowyml/core/executor.py +175 -3
flowyml/storage/sql.py +53 -13
flowyml/ui/backend/main.py +2 -0
flowyml/ui/backend/routers/assets.py +36 -0
flowyml/ui/backend/routers/execution.py +2 -2
flowyml/ui/backend/routers/runs.py +211 -0
flowyml/ui/backend/routers/stats.py +2 -2
flowyml/ui/backend/routers/websocket.py +121 -0
flowyml/ui/frontend/dist/assets/index-CBUXOWze.css +1 -0
flowyml/ui/frontend/dist/assets/index-DF8dJaFL.js +629 -0
flowyml/ui/frontend/dist/index.html +2 -2
flowyml/ui/frontend/package-lock.json +289 -0
flowyml/ui/frontend/package.json +1 -0
flowyml/ui/frontend/src/app/compare/page.jsx +213 -0
flowyml/ui/frontend/src/app/experiments/compare/page.jsx +289 -0
flowyml/ui/frontend/src/app/experiments/page.jsx +61 -1
flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +418 -203
flowyml/ui/frontend/src/app/runs/page.jsx +64 -3
flowyml/ui/frontend/src/app/settings/page.jsx +1 -1
flowyml/ui/frontend/src/app/tokens/page.jsx +8 -6
flowyml/ui/frontend/src/components/ArtifactViewer.jsx +159 -0
flowyml/ui/frontend/src/components/NavigationTree.jsx +26 -9
flowyml/ui/frontend/src/components/PipelineGraph.jsx +26 -24
flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +42 -14
flowyml/ui/frontend/src/router/index.jsx +4 -0
{flowyml-1.4.0.dist-info → flowyml-1.5.0.dist-info}/METADATA +1 -1
{flowyml-1.4.0.dist-info → flowyml-1.5.0.dist-info}/RECORD +31 -27
flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +0 -1
flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +0 -592
{flowyml-1.4.0.dist-info → flowyml-1.5.0.dist-info}/WHEEL +0 -0
{flowyml-1.4.0.dist-info → flowyml-1.5.0.dist-info}/entry_points.txt +0 -0
{flowyml-1.4.0.dist-info → flowyml-1.5.0.dist-info}/licenses/LICENSE +0 -0

flowyml/core/execution_status.py CHANGED Viewed

@@ -25,6 +25,7 @@ class ExecutionStatus(str, Enum):
     # Intermediate states
     STOPPING = "stopping"
     CANCELLING = "cancelling"
+    DEAD = "dead"
     @property
     def is_finished(self) -> bool:

flowyml/core/executor.py CHANGED Viewed

@@ -7,6 +7,133 @@ from typing import Any
 from dataclasses import dataclass
 from datetime import datetime
+import threading
+import ctypes
+import requests
+import os
+import inspect
+class StopExecutionError(Exception):
+    """Exception raised when execution is stopped externally."""
+    pass
+# Alias for backwards compatibility
+StopExecution = StopExecutionError
+def _async_raise(tid, exctype):
+    """Raises an exception in the threads with id tid"""
+    if not inspect.isclass(exctype):
+        raise TypeError("Only types can be raised (not instances)")
+    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(tid), ctypes.py_object(exctype))
+    if res == 0:
+        raise ValueError("invalid thread id")
+    if res != 1:
+        # """if it returns a number greater than one, you're in trouble,
+        # and you should call it again with exc=NULL to revert the effect"""
+        ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(tid), None)
+        raise SystemError("PyThreadState_SetAsyncExc failed")
+class LogCapture:
+    """Context manager to capture stdout/stderr for streaming to the server."""
+    def __init__(self):
+        self._buffer = []
+        self._lock = threading.Lock()
+    def write(self, text):
+        if text.strip():
+            with self._lock:
+                self._buffer.append(text)
+    def flush(self):
+        pass
+    def get_and_clear(self) -> list[str]:
+        with self._lock:
+            lines = self._buffer[:]
+            self._buffer.clear()
+            return lines
+class MonitorThread(threading.Thread):
+    """Background thread that sends heartbeats and flushes logs to the server."""
+    def __init__(
+        self,
+        run_id: str,
+        step_name: str,
+        target_tid: int,
+        log_capture: LogCapture | None = None,
+        interval: int = 5,
+    ):
+        super().__init__()
+        self.run_id = run_id
+        self.step_name = step_name
+        self.target_tid = target_tid
+        self.log_capture = log_capture
+        self.interval = interval
+        self._stop_event = threading.Event()
+        self.api_url = os.getenv("FLOWYML_SERVER_URL", "http://localhost:8000")
+    def stop(self):
+        self._stop_event.set()
+    def _flush_logs(self):
+        """Send captured logs to the server."""
+        if not self.log_capture:
+            return
+        lines = self.log_capture.get_and_clear()
+        if not lines:
+            return
+        content = "".join(lines)
+        with contextlib.suppress(Exception):
+            requests.post(
+                f"{self.api_url}/api/runs/{self.run_id}/steps/{self.step_name}/logs",
+                json={
+                    "content": content,
+                    "level": "INFO",
+                    "timestamp": datetime.now().isoformat(),
+                },
+                timeout=2,
+            )
+    def run(self):
+        while not self._stop_event.is_set():
+            try:
+                # Send heartbeat
+                response = requests.post(
+                    f"{self.api_url}/api/runs/{self.run_id}/steps/{self.step_name}/heartbeat",
+                    json={"step_name": self.step_name, "status": "running"},
+                    timeout=2,
+                )
+                if response.status_code == 200:
+                    data = response.json()
+                    if data.get("action") == "stop":
+                        print(f"Received stop signal for step {self.step_name}")
+                        _async_raise(self.target_tid, StopExecution)
+                        break
+            except Exception:
+                pass  # Ignore heartbeat failures
+            # Flush logs
+            self._flush_logs()
+            self._stop_event.wait(self.interval)
+        # Final log flush
+        self._flush_logs()
+# Keep HeartbeatThread as an alias for backwards compatibility
+HeartbeatThread = MonitorThread
 @dataclass
 class ExecutionResult:
@@ -103,8 +230,6 @@ class LocalExecutor(Executor):
                 # or just pass what we can.
                 # A simple approach: pass nothing if it takes no args, or kwargs if it does.
                 # But inspect is safer.
-                import inspect
                 sig = inspect.signature(step.condition)
                 kwargs = {**inputs, **context_params}
@@ -157,7 +282,54 @@ class LocalExecutor(Executor):
                 kwargs = {**inputs, **context_params}
                 # Execute step
-                result = step.func(**kwargs)
+                monitor_thread = None
+                log_capture = None
+                original_stdout = None
+                original_stderr = None
+                try:
+                    # Start monitoring thread with log capture if run_id is present
+                    if run_id:
+                        import sys
+                        log_capture = LogCapture()
+                        original_stdout = sys.stdout
+                        original_stderr = sys.stderr
+                        sys.stdout = log_capture
+                        sys.stderr = log_capture
+                        monitor_thread = MonitorThread(
+                            run_id=run_id,
+                            step_name=step.name,
+                            target_tid=threading.get_ident(),
+                            log_capture=log_capture,
+                        )
+                        monitor_thread.start()
+                    result = step.func(**kwargs)
+                except StopExecution:
+                    duration = time.time() - start_time
+                    return ExecutionResult(
+                        step_name=step.name,
+                        success=False,
+                        error="Execution stopped by user",
+                        duration_seconds=duration,
+                        retries=retries,
+                    )
+                finally:
+                    # Restore stdout/stderr
+                    if original_stdout:
+                        import sys
+                        sys.stdout = original_stdout
+                    if original_stderr:
+                        import sys
+                        sys.stderr = original_stderr
+                    # Stop monitor thread
+                    if monitor_thread:
+                        monitor_thread.stop()
+                        monitor_thread.join()
                 # Materialize output if artifact store is available
                 artifact_uri = None

flowyml/storage/sql.py CHANGED Viewed

@@ -884,26 +884,66 @@ class SQLMetadataStore(MetadataStore):
                 "period_days": days,
             }
-    def get_statistics(self) -> dict:
+    def get_statistics(self, project: str | None = None) -> dict:
         """Get global statistics."""
         with self.engine.connect() as conn:
-            # Total runs
-            total_runs = conn.execute(select(func.count()).select_from(self.runs)).scalar()
+            # 1. Total runs
+            runs_stmt = select(func.count()).select_from(self.runs)
+            if project:
+                runs_stmt = runs_stmt.where(self.runs.c.project == project)
+            total_runs = conn.execute(runs_stmt).scalar() or 0
-            # Total pipelines
-            total_pipelines = conn.execute(
-                select(func.count(func.distinct(self.runs.c.pipeline_name))),
-            ).scalar()
+            # 2. Total pipelines (unique names)
+            pipelines_stmt = select(func.count(func.distinct(self.runs.c.pipeline_name)))
+            if project:
+                pipelines_stmt = pipelines_stmt.where(self.runs.c.project == project)
+            total_pipelines = conn.execute(pipelines_stmt).scalar() or 0
-            # Total experiments
-            total_experiments = conn.execute(select(func.count()).select_from(self.experiments)).scalar()
+            # 3. Total artifacts
+            artifacts_stmt = select(func.count()).select_from(self.artifacts)
+            if project:
+                artifacts_stmt = artifacts_stmt.where(self.artifacts.c.project == project)
+            total_artifacts = conn.execute(artifacts_stmt).scalar() or 0
-            # Total models (unique model names in metrics)
-            total_models = conn.execute(
-                select(func.count(func.distinct(self.model_metrics.c.model_name))),
-            ).scalar()
+            # 4. Total experiments
+            experiments_stmt = select(func.count()).select_from(self.experiments)
+            if project:
+                experiments_stmt = experiments_stmt.where(self.experiments.c.project == project)
+            total_experiments = conn.execute(experiments_stmt).scalar() or 0
+            # 5. Total models
+            models_stmt = select(func.count(func.distinct(self.model_metrics.c.model_name)))
+            if project:
+                models_stmt = models_stmt.where(self.model_metrics.c.project == project)
+            total_models = conn.execute(models_stmt).scalar() or 0
+            # 6. Status counts (completed vs failed)
+            status_stmt = select(self.runs.c.status, func.count()).group_by(self.runs.c.status)
+            if project:
+                status_stmt = status_stmt.where(self.runs.c.project == project)
+            status_rows = conn.execute(status_stmt).fetchall()
+            status_map = {row[0]: row[1] for row in status_rows if row[0]}
+            completed_runs = status_map.get("completed", 0)
+            failed_runs = status_map.get("failed", 0)
+            # 7. Avg duration (only completed runs)
+            dur_stmt = select(func.avg(self.runs.c.duration)).where(self.runs.c.status == "completed")
+            if project:
+                dur_stmt = dur_stmt.where(self.runs.c.project == project)
+            avg_duration = conn.execute(dur_stmt).scalar() or 0.0
             return {
+                # Frontend-friendly keys
+                "pipelines": total_pipelines,
+                "runs": total_runs,
+                "artifacts": total_artifacts,
+                "completed_runs": completed_runs,
+                "failed_runs": failed_runs,
+                "avg_duration": avg_duration,
+                # Backward compatibility
                 "total_runs": total_runs,
                 "total_pipelines": total_pipelines,
                 "total_experiments": total_experiments,

flowyml/ui/backend/main.py CHANGED Viewed

@@ -24,6 +24,7 @@ from flowyml.ui.backend.routers import (
     metrics,
     client,
     stats,
+    websocket,
 )
 app = FastAPI(
@@ -77,6 +78,7 @@ app.include_router(metrics.router, prefix="/api/metrics", tags=["metrics"])
 app.include_router(plugins.router, prefix="/api", tags=["plugins"])
 app.include_router(client.router, prefix="/api/client", tags=["client"])
 app.include_router(stats.router, prefix="/api/stats", tags=["stats"])
+app.include_router(websocket.router, tags=["websocket"])
 # Static file serving for frontend

flowyml/ui/backend/routers/assets.py CHANGED Viewed

@@ -476,6 +476,42 @@ async def download_asset(artifact_id: str):
     )
+@router.get("/{artifact_id}/content")
+async def get_asset_content(artifact_id: str):
+    """Get the artifact content for inline viewing."""
+    import mimetypes
+    asset, _ = _find_asset_with_store(artifact_id)
+    if not asset:
+        raise HTTPException(status_code=404, detail="Asset not found")
+    artifact_path = asset.get("path")
+    if not artifact_path:
+        raise HTTPException(status_code=404, detail="Artifact path not available")
+    # Handle relative paths for local store
+    from flowyml.utils.config import get_config
+    config = get_config()
+    file_path = Path(artifact_path)
+    if not file_path.is_absolute():
+        file_path = config.artifacts_dir / file_path
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Artifact file not found on disk")
+    # Guess mime type
+    mime_type, _ = mimetypes.guess_type(file_path.name)
+    if not mime_type:
+        mime_type = "text/plain"  # Default fallback
+    return FileResponse(
+        path=file_path,
+        media_type=mime_type,
+    )
 class ProjectUpdate(BaseModel):
     project_name: str

flowyml/ui/backend/routers/execution.py CHANGED Viewed

@@ -97,10 +97,10 @@ async def execute_pipeline(
         run_kwargs = request.parameters.copy()
         if request.retry_count > 0:
-            from flowyml.core.retry import OrchestratorRetryPolicy
+            from flowyml.core.retry_policy import OrchestratorRetryPolicy
             run_kwargs["retry_policy"] = OrchestratorRetryPolicy(
-                max_retries=min(request.retry_count, 5),  # Cap at 5
+                max_attempts=min(request.retry_count, 5),  # Cap at 5
             )
         result = pipeline.run(**run_kwargs)

flowyml/ui/backend/routers/runs.py CHANGED Viewed

@@ -146,6 +146,24 @@ async def get_run(run_id: str):
     run, _ = _find_run(run_id)
     if not run:
         raise HTTPException(status_code=404, detail="Run not found")
+    # Mark dead steps
+    dead_steps = _get_dead_steps(run_id)
+    if dead_steps and "steps" in run:
+        for step_name in dead_steps:
+            if step_name in run["steps"]:
+                # Only mark as dead if it was running
+                if run["steps"][step_name].get("status") == "running":
+                    run["steps"][step_name]["status"] = "dead"
+                    run["steps"][step_name]["success"] = False
+    # Inject heartbeat timestamps
+    with _heartbeat_lock:
+        if run_id in _heartbeat_timestamps:
+            for step_name, ts in _heartbeat_timestamps[run_id].items():
+                if step_name in run.get("steps", {}):
+                    run["steps"][step_name]["last_heartbeat"] = ts
     return run
@@ -273,3 +291,196 @@ async def get_cloud_status(run_id: str):
         "cloud_status": cloud_status,
         "cloud_error": cloud_error,
     }
+class HeartbeatRequest(BaseModel):
+    step_name: str
+    status: str = "running"
+# In-memory storage for heartbeat timestamps
+# Format: {run_id: {step_name: last_heartbeat_timestamp}}
+_heartbeat_timestamps: dict[str, dict[str, float]] = {}
+_heartbeat_lock = __import__("threading").Lock()
+# Heartbeat interval in seconds (should match executor's interval)
+HEARTBEAT_INTERVAL = 5
+# Number of missed heartbeats before marking step as dead
+DEAD_THRESHOLD = 3
+def _record_heartbeat(run_id: str, step_name: str) -> None:
+    """Record heartbeat timestamp for a step."""
+    import time
+    with _heartbeat_lock:
+        if run_id not in _heartbeat_timestamps:
+            _heartbeat_timestamps[run_id] = {}
+        _heartbeat_timestamps[run_id][step_name] = time.time()
+def _get_dead_steps(run_id: str) -> list[str]:
+    """Get list of steps that have missed too many heartbeats."""
+    import time
+    dead_steps = []
+    timeout = HEARTBEAT_INTERVAL * DEAD_THRESHOLD
+    with _heartbeat_lock:
+        if run_id not in _heartbeat_timestamps:
+            return []
+        current_time = time.time()
+        for step_name, last_heartbeat in _heartbeat_timestamps[run_id].items():
+            if current_time - last_heartbeat > timeout:
+                dead_steps.append(step_name)
+    return dead_steps
+def _cleanup_heartbeats(run_id: str) -> None:
+    """Remove heartbeat tracking for a completed run."""
+    with _heartbeat_lock:
+        _heartbeat_timestamps.pop(run_id, None)
+@router.post("/{run_id}/steps/{step_name}/heartbeat")
+async def step_heartbeat(run_id: str, step_name: str, heartbeat: HeartbeatRequest):
+    """Receive heartbeat from a running step.
+    Returns:
+        dict: Instructions for the step (e.g., {"action": "continue"} or {"action": "stop"})
+    """
+    store = _find_store_for_run(run_id)
+    # Record heartbeat timestamp
+    _record_heartbeat(run_id, step_name)
+    # Check if run is marked for stopping
+    run = store.load_run(run_id)
+    if not run:
+        raise HTTPException(status_code=404, detail="Run not found")
+    run_status = run.get("status")
+    if run_status in ["stopping", "stopped", "cancelled", "cancelling"]:
+        return {"action": "stop"}
+    return {"action": "continue"}
+@router.get("/{run_id}/dead-steps")
+async def get_dead_steps(run_id: str):
+    """Get list of steps that appear to be dead (missed heartbeats)."""
+    dead_steps = _get_dead_steps(run_id)
+    return {"dead_steps": dead_steps}
+@router.post("/{run_id}/stop")
+async def stop_run(run_id: str):
+    """Signal a run to stop."""
+    store = _find_store_for_run(run_id)
+    try:
+        # Update run status to STOPPING
+        # This will be picked up by the next heartbeat
+        store.update_run_status(run_id, "stopping")
+        return {"status": "success", "message": "Stop signal sent"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+class LogChunk(BaseModel):
+    content: str
+    level: str = "INFO"
+    timestamp: str | None = None
+@router.post("/{run_id}/steps/{step_name}/logs")
+async def post_step_logs(run_id: str, step_name: str, log_chunk: LogChunk):
+    """Receive log chunk from a running step."""
+    import anyio
+    from flowyml.utils.config import get_config
+    # Store logs in the runs directory
+    runs_dir = get_config().runs_dir
+    log_dir = runs_dir / run_id / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    log_file = log_dir / f"{step_name}.log"
+    # Append log content
+    timestamp = log_chunk.timestamp or ""
+    line = f"[{timestamp}] [{log_chunk.level}] {log_chunk.content}\n"
+    def write_log():
+        with open(log_file, "a") as f:
+            f.write(line)
+    await anyio.to_thread.run_sync(write_log)
+    # Broadcast to WebSocket clients
+    try:
+        from flowyml.ui.backend.routers.websocket import manager
+        await manager.broadcast_log(run_id, step_name, log_chunk.content)
+    except Exception:
+        pass  # Ignore WebSocket broadcast failures
+    return {"status": "success"}
+@router.get("/{run_id}/steps/{step_name}/logs")
+async def get_step_logs(run_id: str, step_name: str, offset: int = 0):
+    """Get logs for a specific step."""
+    import anyio
+    from flowyml.utils.config import get_config
+    runs_dir = get_config().runs_dir
+    log_file = runs_dir / run_id / "logs" / f"{step_name}.log"
+    if not log_file.exists():
+        return {"logs": "", "offset": 0, "has_more": False}
+    def read_log():
+        with open(log_file) as f:
+            return f.read()
+    content = await anyio.to_thread.run_sync(read_log)
+    # Return content from offset
+    if offset > 0 and offset < len(content):
+        content = content[offset:]
+    return {
+        "logs": content,
+        "offset": offset + len(content),
+        "has_more": False,  # For now, always return all available
+    }
+@router.get("/{run_id}/logs")
+async def get_run_logs(run_id: str):
+    """Get all logs for a run."""
+    import anyio
+    from flowyml.utils.config import get_config
+    runs_dir = get_config().runs_dir
+    log_dir = runs_dir / run_id / "logs"
+    if not log_dir.exists():
+        return {"logs": {}}
+    def read_all_logs():
+        logs = {}
+        for log_file in log_dir.glob("*.log"):
+            step_name = log_file.stem
+            with open(log_file) as f:
+                logs[step_name] = f.read()
+        return logs
+    logs = await anyio.to_thread.run_sync(read_all_logs)
+    return {"logs": logs}

flowyml/ui/backend/routers/stats.py CHANGED Viewed

@@ -5,10 +5,10 @@ router = APIRouter()
 @router.get("/")
-async def get_global_stats():
+async def get_global_stats(project: str | None = None):
     """Get global statistics."""
     try:
         store = get_store()
-        return store.get_statistics()
+        return store.get_statistics(project=project)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

flowyml 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

flowyml 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl