PyPI - pyoco - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

pyoco 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

pyoco/cli/main.py +122 -16
pyoco/client.py +69 -0
pyoco/core/context.py +20 -4
pyoco/core/engine.py +249 -146
pyoco/core/models.py +41 -0
pyoco/discovery/loader.py +1 -2
pyoco/server/__init__.py +0 -0
pyoco/server/api.py +71 -0
pyoco/server/models.py +28 -0
pyoco/server/store.py +82 -0
pyoco/trace/backend.py +1 -1
pyoco/trace/console.py +12 -4
pyoco/worker/__init__.py +0 -0
pyoco/worker/client.py +43 -0
pyoco/worker/runner.py +171 -0
pyoco-0.3.0.dist-info/METADATA +146 -0
pyoco-0.3.0.dist-info/RECORD +25 -0
pyoco-0.1.0.dist-info/METADATA +0 -7
pyoco-0.1.0.dist-info/RECORD +0 -17
{pyoco-0.1.0.dist-info → pyoco-0.3.0.dist-info}/WHEEL +0 -0
{pyoco-0.1.0.dist-info → pyoco-0.3.0.dist-info}/top_level.txt +0 -0

pyoco/core/engine.py CHANGED Viewed

@@ -1,180 +1,275 @@
 import time
-from typing import Dict, Any, List, Set
-from .models import Flow, Task
+from typing import Dict, Any, List, Set, Optional
+from .models import Flow, Task, RunContext, TaskState, RunStatus
 from .context import Context
 from ..trace.backend import TraceBackend
 from ..trace.console import ConsoleTraceBackend
 class Engine:
+    """
+    The core execution engine for Pyoco flows.
+    Responsible for:
+    - Resolving task dependencies
+    - Managing parallel execution (using ThreadPoolExecutor)
+    - Handling input injection and artifact storage
+    - Delegating logging to the TraceBackend
+    Intentionally keeps scheduling logic simple (no distributed queue, no external DB).
+    """
     def __init__(self, trace_backend: TraceBackend = None):
         self.trace = trace_backend or ConsoleTraceBackend()
+        # Track active runs: run_id -> RunContext
+        from .models import RunContext
+        self.active_runs: Dict[str, RunContext] = {}
-    def run(self, flow: Flow, params: Dict[str, Any] = None) -> Context:
-        ctx = Context(params=params or {})
-        self.trace.on_flow_start(flow.name)
+    def get_run(self, run_id: str) -> Any:
+        # Return RunContext if active, else None (for now)
+        return self.active_runs.get(run_id)
+    def cancel(self, run_id: str):
+        """
+        Cancel an active run.
+        """
+        from .models import RunStatus
+        run_ctx = self.active_runs.get(run_id)
+        if run_ctx:
+            if run_ctx.status == RunStatus.RUNNING:
+                run_ctx.status = RunStatus.CANCELLING
+                # We don't force kill threads here, the loop will handle it.
+    def run(self, flow: Flow, params: Dict[str, Any] = None, run_context: Optional[RunContext] = None) -> Context:
+        # Initialize RunContext (v0.2.0)
+        if run_context is None:
+            run_context = RunContext()
+        run_ctx = run_context
-        executed: Set[Task] = set()
-        running: Set[Any] = set() # Set of Futures
+        # Initialize all tasks as PENDING
+        for task in flow.tasks:
+            run_ctx.tasks[task.name] = TaskState.PENDING
+        ctx = Context(params=params or {}, run_context=run_ctx)
+        self.trace.on_flow_start(flow.name, run_id=run_ctx.run_id)
-        import concurrent.futures
+        # Register active run
+        self.active_runs[run_ctx.run_id] = run_ctx
-        # Use ThreadPoolExecutor for parallel execution
-        # Max workers could be configurable, default to something reasonable
-        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
-            future_to_task = {}
-            task_deadlines: Dict[Task, float] = {}
+        try:
+            executed: Set[Task] = set()
+            running: Set[Any] = set() # Set of Futures
-            failed: Set[Task] = set()
+            import concurrent.futures
+            # Use ThreadPoolExecutor for parallel execution
+            # Max workers could be configurable, default to something reasonable
+            with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
+                future_to_task = {}
+                task_deadlines: Dict[Task, float] = {}
+                failed: Set[Task] = set()
-            while len(executed) + len(failed) < len(flow.tasks):
-                # Identify runnable tasks
-                runnable = []
-                for task in flow.tasks:
-                    if task in executed or task in failed or task in [future_to_task[f] for f in running]:
-                        continue
-                    # Check dependencies
-                    deps_met = True
-                    if task.trigger_policy == "ANY":
-                        # OR-join: Run if ANY dependency is executed (and successful)
-                        # But what if all failed? Then we can't run.
-                        # If at least one succeeded, we run.
-                        # If none succeeded yet, we wait.
-                        # If all failed, we fail (or skip).
+                while len(executed) + len(failed) < len(flow.tasks):
+                    # Check for cancellation
+                    if run_ctx.status in [RunStatus.CANCELLING, RunStatus.CANCELLED]:
+                        # Stop submitting new tasks
+                        # Mark all PENDING tasks as CANCELLED
+                        for t_name, t_state in run_ctx.tasks.items():
+                            if t_state == TaskState.PENDING:
+                                run_ctx.tasks[t_name] = TaskState.CANCELLED
-                        any_success = False
-                        all_failed = True
-                        if not task.dependencies:
-                            # No deps = ready
-                            any_success = True
-                            all_failed = False
-                        else:
-                            for dep in task.dependencies:
-                                if dep in executed:
+                        # If no running tasks, we are done
+                        if not running:
+                            run_ctx.status = RunStatus.CANCELLED
+                            break
+                        # Else continue loop to wait for running tasks (graceful shutdown)
+                        # We still need to wait, so we fall through to the wait logic,
+                        # but 'runnable' will be empty because we won't add anything.
+                    # Identify runnable tasks
+                    runnable = []
+                    if run_ctx.status == RunStatus.RUNNING:
+                        for task in flow.tasks:
+                            if task in executed or task in failed or task in [future_to_task[f] for f in running]:
+                                continue
+                            # Check dependencies
+                            deps_met = True
+                            if task.trigger_policy == "ANY":
+                                # OR-join: Run if ANY dependency is executed (and successful)
+                                # But what if all failed? Then we can't run.
+                                # If at least one succeeded, we run.
+                                # If none succeeded yet, we wait.
+                                # If all failed, we fail (or skip).
+                                any_success = False
+                                all_failed = True
+                                if not task.dependencies:
+                                    # No deps = ready
                                     any_success = True
                                     all_failed = False
-                                    break # Found one success
-                                if dep not in failed:
-                                    all_failed = False # At least one is still running/pending
-                        if any_success:
-                            deps_met = True
-                        elif all_failed:
-                            # All deps failed, so we fail/skip
-                            failed.add(task)
-                            deps_met = False
-                            # Continue to next task loop to avoid adding to runnable
-                            continue
-                        else:
-                            # Still waiting
-                            deps_met = False
-                    else:
-                        # ALL (AND-join) - Default
-                        for dep in task.dependencies:
-                            if dep in failed:
-                                # Dependency failed
-                                if task.fail_policy == "isolate" or dep.fail_policy == "isolate":
+                                else:
+                                    for dep in task.dependencies:
+                                        if dep in executed:
+                                            any_success = True
+                                            all_failed = False
+                                            break # Found one success
+                                        if dep not in failed:
+                                            all_failed = False # At least one is still running/pending
+                                if any_success:
+                                    deps_met = True
+                                elif all_failed:
+                                    # All deps failed, so we fail/skip
                                     failed.add(task)
+                                    run_ctx.tasks[task.name] = TaskState.FAILED
                                     deps_met = False
-                                    break
+                                    # Continue to next task loop to avoid adding to runnable
+                                    continue
                                 else:
-                                    pass # fail=stop handled elsewhere
+                                    # Still waiting
+                                    deps_met = False
+                            else:
+                                # ALL (AND-join) - Default
+                                for dep in task.dependencies:
+                                    if dep in failed:
+                                        # Dependency failed
+                                        if task.fail_policy == "isolate" or dep.fail_policy == "isolate":
+                                            failed.add(task)
+                                            run_ctx.tasks[task.name] = TaskState.FAILED # Mark as FAILED (or SKIPPED if we had it)
+                                            deps_met = False
+                                            break
+                                        else:
+                                            pass # fail=stop handled elsewhere
+                                    if dep not in executed:
+                                        deps_met = False
+                                        break
-                            if dep not in executed:
-                                deps_met = False
-                                break
+                            if deps_met and task not in failed:
+                                runnable.append(task)
-                    if deps_met and task not in failed:
-                        runnable.append(task)
-                # If no runnable tasks and no running tasks, we are stuck
-                # But if we have failed tasks, maybe that's why?
-                if not runnable and not running:
-                    if len(executed) + len(failed) == len(flow.tasks):
-                        # All done (some failed)
-                        break
-                    raise RuntimeError("Deadlock or cycle detected in workflow")
-                # Submit runnable tasks
-                for task in runnable:
-                    future = executor.submit(self._execute_task, task, ctx)
-                    running.add(future)
-                    future_to_task[future] = task
-                    # Record start time for timeout tracking
-                    # We need to track start times or deadlines.
-                    # Let's store deadline in a separate dict or attach to task?
-                    # Task is immutable-ish (dataclass).
-                    # Let's use a dict.
-                    if task.timeout_sec:
-                         task_deadlines[task] = time.time() + task.timeout_sec
+                    # If no runnable tasks and no running tasks, we are stuck
+                    # But if we have failed tasks, maybe that's why?
+                    if not runnable and not running:
+                        if len(executed) + len(failed) == len(flow.tasks):
+                            # All done (some failed)
+                            break
+                        run_ctx.status = RunStatus.FAILED
+                        run_ctx.end_time = time.time()
+                        raise RuntimeError("Deadlock or cycle detected in workflow")
+                    # Submit runnable tasks
+                    for task in runnable:
+                        future = executor.submit(self._execute_task, task, ctx)
+                        running.add(future)
+                        future_to_task[future] = task
+                        # Record start time for timeout tracking
+                        if task.timeout_sec:
+                             task_deadlines[task] = time.time() + task.timeout_sec
-                # Calculate wait timeout
-                wait_timeout = None
-                if task_deadlines:
-                    now = time.time()
-                    min_deadline = min(task_deadlines.values())
-                    wait_timeout = max(0, min_deadline - now)
-                # Wait for at least one task to complete or timeout
-                if running:
-                    done, _ = concurrent.futures.wait(
-                        running,
-                        timeout=wait_timeout,
-                        return_when=concurrent.futures.FIRST_COMPLETED
-                    )
+                    # Calculate wait timeout
+                    wait_timeout = None
+                    if task_deadlines:
+                        now = time.time()
+                        min_deadline = min(task_deadlines.values())
+                        wait_timeout = max(0, min_deadline - now)
-                    # Check for timeouts first
-                    now = time.time()
-                    timed_out_tasks = []
-                    for task, deadline in list(task_deadlines.items()):
-                        if now >= deadline:
-                            # Task timed out
-                            # Find the future for this task
-                            # This is inefficient, but running set is small
-                            found_future = None
-                            for f, t in future_to_task.items():
-                                if t == task and f in running:
-                                    found_future = f
-                                    break
-                            if found_future:
-                                timed_out_tasks.append(found_future)
-                                # Remove from tracking
-                                running.remove(found_future)
-                                del task_deadlines[task]
+                    # Wait for at least one task to complete or timeout
+                    if running:
+                        done, _ = concurrent.futures.wait(
+                            running,
+                            timeout=wait_timeout,
+                            return_when=concurrent.futures.FIRST_COMPLETED
+                        )
+                        # Check for timeouts first
+                        now = time.time()
+                        timed_out_tasks = []
+                        for task, deadline in list(task_deadlines.items()):
+                            if now >= deadline:
+                                # Task timed out
+                                # Find the future for this task
+                                found_future = None
+                                for f, t in future_to_task.items():
+                                    if t == task and f in running:
+                                        found_future = f
+                                        break
-                                # Handle failure
-                                if task.fail_policy == "isolate":
-                                    failed.add(task)
-                                    self.trace.on_node_error(task.name, TimeoutError(f"Task exceeded timeout of {task.timeout_sec}s"))
-                                else:
-                                    raise TimeoutError(f"Task '{task.name}' exceeded timeout of {task.timeout_sec}s")
+                                if found_future:
+                                    timed_out_tasks.append(found_future)
+                                    # Remove from tracking
+                                    running.remove(found_future)
+                                    del task_deadlines[task]
+                                    # Handle failure
+                                    if task.fail_policy == "isolate":
+                                        failed.add(task)
+                                        run_ctx.tasks[task.name] = TaskState.FAILED
+                                        self.trace.on_node_error(task.name, TimeoutError(f"Task exceeded timeout of {task.timeout_sec}s"))
+                                    else:
+                                        run_ctx.status = RunStatus.FAILED
+                                        run_ctx.end_time = time.time()
+                                        raise TimeoutError(f"Task '{task.name}' exceeded timeout of {task.timeout_sec}s")
-                    for future in done:
-                        if future in running: # Might have been removed by timeout check above (unlikely if wait returned due to completion, but possible race)
-                            running.remove(future)
-                            task = future_to_task[future]
-                            if task in task_deadlines:
-                                del task_deadlines[task]
-                            try:
-                                future.result() # Re-raise exception if any
-                                executed.add(task)
-                            except Exception as e:
-                                if task.fail_policy == "isolate":
-                                    failed.add(task)
-                                    self.trace.on_node_error(task.name, e) # Log it
-                                else:
-                                    # fail=stop (default)
-                                    raise e
+                        for future in done:
+                            if future in running: # Might have been removed by timeout check above
+                                running.remove(future)
+                                task = future_to_task[future]
+                                if task in task_deadlines:
+                                    del task_deadlines[task]
+                                try:
+                                    future.result() # Re-raise exception if any
+                                    executed.add(task)
+                                except Exception as e:
+                                    if task.fail_policy == "isolate":
+                                        failed.add(task)
+                                        # TaskState update is handled in _execute_task on exception?
+                                        # No, _execute_task raises. So we need to update here if it failed.
+                                        # Actually _execute_task updates to FAILED before raising?
+                                        # Let's check _execute_task implementation below.
+                                        # If _execute_task raises, we catch it here.
+                                        # We should ensure FAILED state.
+                                        run_ctx.tasks[task.name] = TaskState.FAILED
+                                        self.trace.on_node_error(task.name, e) # Log it
+                                    else:
+                                        # fail=stop (default)
+                                        run_ctx.status = RunStatus.FAILED
+                                        run_ctx.end_time = time.time()
+                                        raise e
+        finally:
+            # Cleanup active run
+            if run_ctx.run_id in self.active_runs:
+                del self.active_runs[run_ctx.run_id]
         self.trace.on_flow_end(flow.name)
+        # Update final run status
+        if run_ctx.status == RunStatus.RUNNING:
+            if failed:
+                # Some tasks failed but were isolated
+                # Should run be COMPLETED or FAILED?
+                # Usually if flow finished (even with partial failures), it's COMPLETED (or PARTIAL_SUCCESS?)
+                # For now let's say COMPLETED if it didn't crash.
+                run_ctx.status = RunStatus.COMPLETED # Or maybe FAILED if strict?
+            else:
+                run_ctx.status = RunStatus.COMPLETED
+        run_ctx.end_time = time.time()
         return ctx
     def _execute_task(self, task: Task, ctx: Context):
+        # Update state to RUNNING
+        from .models import TaskState
+        if ctx.run_context:
+            ctx.run_context.tasks[task.name] = TaskState.RUNNING
         self.trace.on_node_start(task.name)
         start_time = time.time()
         # Retry loop
@@ -234,6 +329,11 @@ class Engine:
                 duration = (time.time() - start_time) * 1000
                 self.trace.on_node_end(task.name, duration)
+                # Update state to SUCCEEDED
+                if ctx.run_context:
+                    ctx.run_context.tasks[task.name] = TaskState.SUCCEEDED
                 return # Success
             except Exception as e:
@@ -246,4 +346,7 @@ class Engine:
                     continue
                 else:
                     self.trace.on_node_error(task.name, e)
+                    # Update state to FAILED
+                    if ctx.run_context:
+                        ctx.run_context.tasks[task.name] = TaskState.FAILED
                     raise e

pyoco/core/models.py CHANGED Viewed

@@ -1,8 +1,17 @@
 from typing import Any, Callable, Dict, List, Optional, Set, Union, ForwardRef
 from dataclasses import dataclass, field
+from enum import Enum
+import time
+import uuid
 @dataclass
 class Task:
+    """
+    Represents a single unit of work in the workflow.
+    Designed to be lightweight and serializable.
+    Contains metadata about the task, its dependencies, and execution policies.
+    """
     func: Callable
     name: str
     dependencies: Set['Task'] = field(default_factory=set)
@@ -32,8 +41,40 @@ class Task:
     def __repr__(self):
         return f"<Task {self.name}>"
+class TaskState(Enum):
+    PENDING = "PENDING"
+    RUNNING = "RUNNING"
+    SUCCEEDED = "SUCCEEDED"
+    FAILED = "FAILED"
+    CANCELLED = "CANCELLED"
+class RunStatus(Enum):
+    PENDING = "PENDING"
+    RUNNING = "RUNNING"
+    COMPLETED = "COMPLETED"
+    FAILED = "FAILED"
+    CANCELLING = "CANCELLING"
+    CANCELLED = "CANCELLED"
+@dataclass
+class RunContext:
+    """
+    Holds the state of a single workflow execution.
+    """
+    run_id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    status: RunStatus = RunStatus.RUNNING
+    tasks: Dict[str, TaskState] = field(default_factory=dict)
+    start_time: float = field(default_factory=time.time)
+    end_time: Optional[float] = None
 @dataclass
 class Flow:
+    """
+    Represents a Directed Acyclic Graph (DAG) of tasks.
+    Manages the collection of tasks and their dependencies.
+    Optimized for single-machine execution without complex scheduling overhead.
+    """
     name: str = "main"
     tasks: Set[Task] = field(default_factory=set)
     _tail: Set[Task] = field(default_factory=set)

pyoco/discovery/loader.py CHANGED Viewed

@@ -45,7 +45,7 @@ class TaskLoader:
                 print(f"Warning: {msg} Overwriting.")
         # Apply config overlay if exists
-        if name in self.config.tasks:
+        if self.config and name in self.config.tasks:
             conf = self.config.tasks[name]
             if not conf.callable:
                 if conf.inputs:
@@ -124,7 +124,6 @@ class TaskLoader:
             # Create a Task wrapper
             t = Task(func=real_func, name=name)
             t.inputs = conf.inputs
-            t.inputs = conf.inputs
             t.outputs = conf.outputs
             self.tasks[name] = t
         except (ImportError, AttributeError) as e:

pyoco/server/__init__.py ADDED Viewed

File without changes

pyoco/server/api.py ADDED Viewed

@@ -0,0 +1,71 @@
+from fastapi import FastAPI, HTTPException
+from typing import List, Optional
+from .store import StateStore
+from .models import (
+    RunSubmitRequest, RunResponse,
+    WorkerPollRequest, WorkerPollResponse,
+    WorkerHeartbeatRequest, WorkerHeartbeatResponse
+)
+from ..core.models import RunContext, RunStatus
+app = FastAPI(title="Pyoco Kanban Server")
+store = StateStore()
+@app.post("/runs", response_model=RunResponse)
+def submit_run(req: RunSubmitRequest):
+    run_ctx = store.create_run(req.flow_name, req.params)
+    return RunResponse(run_id=run_ctx.run_id, status=run_ctx.status)
+@app.get("/runs", response_model=List[RunContext])
+def list_runs(status: Optional[RunStatus] = None):
+    runs = store.list_runs()
+    if status:
+        runs = [r for r in runs if r.status == status]
+    return runs
+@app.get("/runs/{run_id}", response_model=RunContext)
+def get_run(run_id: str):
+    run = store.get_run(run_id)
+    if not run:
+        raise HTTPException(status_code=404, detail="Run not found")
+    return run
+@app.post("/runs/{run_id}/cancel")
+def cancel_run(run_id: str):
+    run = store.get_run(run_id)
+    if not run:
+        raise HTTPException(status_code=404, detail="Run not found")
+    store.cancel_run(run_id)
+    return {"status": "CANCELLING"}
+@app.post("/workers/poll", response_model=WorkerPollResponse)
+def poll_work(req: WorkerPollRequest):
+    # In v0.3.0, we ignore worker_id and tags for simplicity
+    run = store.dequeue()
+    if run:
+        # Mark as RUNNING? Or wait for worker to say so?
+        # Ideally, worker should confirm start.
+        # But for now, let's assume dequeue means "assigned".
+        # We update status to RUNNING when worker sends first heartbeat?
+        # Or here? Let's do it here to prevent re-queueing if logic was complex.
+        # But store.dequeue removes from queue.
+        # Status is still PENDING until worker starts.
+        return WorkerPollResponse(
+            run_id=run.run_id,
+            flow_name=run.flow_name,
+            params=run.params
+        )
+    return WorkerPollResponse()
+@app.post("/runs/{run_id}/heartbeat", response_model=WorkerHeartbeatResponse)
+def heartbeat(run_id: str, req: WorkerHeartbeatRequest):
+    run = store.get_run(run_id)
+    if not run:
+        raise HTTPException(status_code=404, detail="Run not found")
+    store.update_run(run_id, status=req.run_status, task_states=req.task_states)
+    # Check if cancellation was requested
+    cancel_requested = (run.status == RunStatus.CANCELLING)
+    return WorkerHeartbeatResponse(cancel_requested=cancel_requested)

pyoco/server/models.py ADDED Viewed

@@ -0,0 +1,28 @@
+from typing import Dict, List, Optional, Any
+from pydantic import BaseModel
+from ..core.models import RunStatus, TaskState
+class RunSubmitRequest(BaseModel):
+    flow_name: str
+    params: Dict[str, Any] = {}
+    tags: List[str] = []
+class RunResponse(BaseModel):
+    run_id: str
+    status: RunStatus
+class WorkerPollRequest(BaseModel):
+    worker_id: str
+    tags: List[str] = []
+class WorkerPollResponse(BaseModel):
+    run_id: Optional[str] = None
+    flow_name: Optional[str] = None
+    params: Optional[Dict[str, Any]] = None
+class WorkerHeartbeatRequest(BaseModel):
+    task_states: Dict[str, TaskState]
+    run_status: RunStatus
+class WorkerHeartbeatResponse(BaseModel):
+    cancel_requested: bool

pyoco 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

pyoco 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl