PyPI - pyoco - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pyoco 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pyoco/__init__.py +11 -0
pyoco/cli/entry.py +5 -0
pyoco/cli/main.py +177 -0
pyoco/core/base_task.py +65 -0
pyoco/core/context.py +110 -0
pyoco/core/engine.py +249 -0
pyoco/core/models.py +116 -0
pyoco/discovery/loader.py +134 -0
pyoco/dsl/__init__.py +3 -0
pyoco/dsl/syntax.py +122 -0
pyoco/schemas/config.py +57 -0
pyoco/trace/backend.py +26 -0
pyoco/trace/console.py +40 -0
pyoco-0.1.0.dist-info/METADATA +7 -0
pyoco-0.1.0.dist-info/RECORD +17 -0
pyoco-0.1.0.dist-info/WHEEL +5 -0
pyoco-0.1.0.dist-info/top_level.txt +1 -0

pyoco/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .core.models import Flow, Task
+from .core.engine import Engine
+from .dsl.syntax import task
+from .trace.console import ConsoleTraceBackend
+def run(flow: Flow, params: dict = None, trace: bool = True, cute: bool = True):
+    backend = ConsoleTraceBackend(style="cute" if cute else "plain")
+    engine = Engine(trace_backend=backend)
+    return engine.run(flow, params)
+__all__ = ["task", "Flow", "run"]

pyoco/cli/entry.py ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+from pyoco.cli.main import main
+if __name__ == "__main__":
+    main()

pyoco/cli/main.py ADDED Viewed

@@ -0,0 +1,177 @@
+import argparse
+import sys
+import os
+from ..schemas.config import PyocoConfig
+from ..discovery.loader import TaskLoader
+from ..core.models import Flow
+from ..core.engine import Engine
+from ..trace.console import ConsoleTraceBackend
+def main():
+    parser = argparse.ArgumentParser(description="Pyoco Workflow Engine")
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    # Run command
+    run_parser = subparsers.add_parser("run", help="Run a workflow")
+    run_parser.add_argument("--config", required=True, help="Path to flow.yaml")
+    run_parser.add_argument("--flow", default="main", help="Flow name to run")
+    run_parser.add_argument("--trace", action="store_true", help="Enable tracing")
+    run_parser.add_argument("--cute", action="store_true", default=True, help="Use cute trace style")
+    run_parser.add_argument("--non-cute", action="store_false", dest="cute", help="Use plain trace style")
+    # Allow overriding params via CLI
+    run_parser.add_argument("--param", action="append", help="Override params (key=value)")
+    # Check command
+    check_parser = subparsers.add_parser("check", help="Verify a workflow")
+    check_parser.add_argument("--config", required=True, help="Path to flow.yaml")
+    check_parser.add_argument("--flow", default="main", help="Flow name to check")
+    # List tasks command
+    list_parser = subparsers.add_parser("list-tasks", help="List available tasks")
+    list_parser.add_argument("--config", required=True, help="Path to flow.yaml")
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        sys.exit(1)
+    # Load config
+    try:
+        config = PyocoConfig.from_yaml(args.config)
+    except Exception as e:
+        print(f"Error loading config: {e}")
+        sys.exit(1)
+    # Discover tasks
+    loader = TaskLoader(config)
+    loader.load()
+    if args.command == "list-tasks":
+        print("Available tasks:")
+        for name in loader.tasks:
+            print(f" - {name}")
+        return
+    if args.command == "run":
+        flow_conf = config.flows.get(args.flow)
+        if not flow_conf:
+            print(f"Flow '{args.flow}' not found in config.")
+            sys.exit(1)
+        # Build Flow from graph string
+        from ..dsl.syntax import TaskWrapper
+        eval_context = {name: TaskWrapper(task) for name, task in loader.tasks.items()}
+        try:
+            # Create Flow and add all loaded tasks
+            flow = Flow(name=args.flow)
+            for t in loader.tasks.values():
+                flow.add_task(t)
+            # Evaluate graph to set up dependencies
+            exec(flow_conf.graph, {}, eval_context)
+            # Run engine
+            backend = ConsoleTraceBackend(style="cute" if args.cute else "plain")
+            engine = Engine(trace_backend=backend)
+            # Params
+            params = flow_conf.defaults.copy()
+            if args.param:
+                for p in args.param:
+                    if "=" in p:
+                        k, v = p.split("=", 1)
+                        params[k] = v # Simple string parsing for now
+            engine.run(flow, params)
+        except Exception as e:
+            print(f"Error executing flow: {e}")
+            import traceback
+            traceback.print_exc()
+            sys.exit(1)
+    elif args.command == "check":
+        print(f"Checking flow '{args.flow}'...")
+        flow_conf = config.flows.get(args.flow)
+        if not flow_conf:
+            print(f"Flow '{args.flow}' not found in config.")
+            sys.exit(1)
+        errors = []
+        warnings = []
+        # 1. Check imports (already done by loader.load(), but we can check for missing tasks in graph)
+        # 2. Build flow to check graph
+        from ..dsl.syntax import TaskWrapper
+        eval_context = {name: TaskWrapper(task) for name, task in loader.tasks.items()}
+        try:
+            flow = Flow(name=args.flow)
+            for t in loader.tasks.values():
+                flow.add_task(t)
+            eval(flow_conf.graph, {}, eval_context)
+            # 3. Reachability / Orphans
+            # Nodes with no deps and no dependents (except if single node flow)
+            if len(flow.tasks) > 1:
+                for t in flow.tasks:
+                    if not t.dependencies and not t.dependents:
+                        warnings.append(f"Task '{t.name}' is orphaned (no dependencies or dependents).")
+            # 4. Cycles
+            # Simple DFS for cycle detection
+            visited = set()
+            path = set()
+            def visit(node):
+                if node in path:
+                    return True # Cycle
+                if node in visited:
+                    return False
+                visited.add(node)
+                path.add(node)
+                for dep in node.dependencies: # Check upstream
+                    if visit(dep):
+                        return True
+                path.remove(node)
+                return False
+            for t in flow.tasks:
+                if visit(t):
+                    errors.append(f"Cycle detected involving task '{t.name}'.")
+                    break
+            # 5. Signature Check
+            import inspect
+            for t in flow.tasks:
+                sig = inspect.signature(t.func)
+                for name, param in sig.parameters.items():
+                    if name == 'ctx': continue
+                    # Check if input provided in task config or defaults
+                    # This is hard because inputs are resolved at runtime.
+                    # But we can check if 'inputs' mapping exists for it.
+                    if name not in t.inputs and name not in flow_conf.defaults:
+                        # Warning: might be missing input
+                        warnings.append(f"Task '{t.name}' argument '{name}' might be missing input (not in inputs or defaults).")
+        except Exception as e:
+            errors.append(f"Graph evaluation failed: {e}")
+        # Report
+        print("\n--- Check Report ---")
+        if not errors and not warnings:
+            print("✅ All checks passed!")
+        else:
+            for w in warnings:
+                print(f"⚠️  {w}")
+            for e in errors:
+                print(f"❌ {e}")
+            if errors:
+                sys.exit(1)
+if __name__ == "__main__":
+    main()

pyoco/core/base_task.py ADDED Viewed

@@ -0,0 +1,65 @@
+# pyoco core - base task abstraction
+"""Common abstract base class for user‑defined tasks.
+The library already allows registering a plain function with the ``@task``
+decorator.  For more structured or reusable implementations you can
+subclass :class:`BaseTask` and implement the ``run`` method.  The ``run``
+method receives the current :class:`~pyoco.core.context.Context` instance
+so you can read inputs, write outputs, or use any other context helpers.
+Typical usage::
+    from pyoco.core.base_task import BaseTask
+    from pyoco.dsl.syntax import task
+    class MyTask(BaseTask):
+        @task
+        def run(self, ctx):
+            # ``ctx`` gives access to ``inputs`` and ``scratch`` etc.
+            data = ctx.inputs.get("my_input")
+            result = data * 2
+            return result
+In ``flow.yaml`` you reference the method as usual::
+    tasks:
+      double:
+        callable: "my_module:MyTask.run"
+        inputs:
+          my_input: "$ctx.params.value"
+        outputs:
+          - "scratch.doubled"
+The abstract base class does not enforce any particular input/output
+schema – it simply provides a clear contract for developers and makes the
+library documentation more discoverable.
+"""
+from abc import ABC, abstractmethod
+from typing import Any
+class BaseTask(ABC):
+    """Abstract base class for custom tasks.
+    Subclass this class and implement :meth:`run`.  The method must accept
+    a single ``ctx`` argument (the :class:`~pyoco.core.context.Context`
+    instance) and return a value that will be stored according to the
+    ``outputs`` configuration in ``flow.yaml``.
+    """
+    @abstractmethod
+    def run(self, ctx: Any) -> Any:
+        """Execute the task.
+        Parameters
+        ----------
+        ctx: :class:`~pyoco.core.context.Context`
+            Execution context providing access to ``inputs``, ``scratch``,
+            ``params`` and helper methods such as ``save_artifact``.
+        Returns
+        -------
+        Any
+            The value that will be saved to the paths listed in ``outputs``.
+        """
+        raise NotImplementedError

pyoco/core/context.py ADDED Viewed

@@ -0,0 +1,110 @@
+import threading
+from typing import Any, Dict, Optional
+from dataclasses import dataclass, field
+@dataclass
+class Context:
+    params: Dict[str, Any] = field(default_factory=dict)
+    env: Dict[str, str] = field(default_factory=dict)
+    results: Dict[str, Any] = field(default_factory=dict)
+    scratch: Dict[str, Any] = field(default_factory=dict)
+    artifacts: Dict[str, Any] = field(default_factory=dict)
+    run_id: Optional[str] = None
+    artifact_dir: str = field(default="./artifacts")
+    _lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
+    def __post_init__(self):
+        # Ensure artifact directory exists
+        import pathlib
+        pathlib.Path(self.artifact_dir).mkdir(parents=True, exist_ok=True)
+    def get_result(self, node_name: str) -> Any:
+        with self._lock:
+            return self.results.get(node_name)
+    def set_result(self, node_name: str, value: Any):
+        with self._lock:
+            self.results[node_name] = value
+    def save_artifact(self, name: str, data: Any) -> str:
+        import os
+        import pathlib
+        full_path = pathlib.Path(self.artifact_dir) / name
+        # Ensure parent dir exists for nested artifacts
+        full_path.parent.mkdir(parents=True, exist_ok=True)
+        mode = "w"
+        content = data
+        if isinstance(data, bytes):
+            mode = "wb"
+        elif not isinstance(data, str):
+            content = str(data)
+        with open(full_path, mode) as f:
+            f.write(content)
+        abs_path = str(full_path.absolute())
+        type_name = type(data).__name__
+        if isinstance(data, (dict, list)):
+            type_name = "object"
+        with self._lock:
+            self.artifacts[name] = {
+                "path": abs_path,
+                "type": type_name
+            }
+        return abs_path
+    def resolve(self, value: Any) -> Any:
+        if not isinstance(value, str) or not value.startswith("$"):
+            return value
+        # $node.<Name>.output
+        if value.startswith("$node."):
+            parts = value.split(".")
+            # $node.A.output -> ["$node", "A", "output"]
+            # $node.A.output.x -> ["$node", "A", "output", "x"]
+            if len(parts) < 3 or parts[2] != "output":
+                 # Malformed or unsupported node selector
+                 return value
+            node_name = parts[1]
+            if node_name not in self.results:
+                raise KeyError(f"Node '{node_name}' result not found in context.")
+            result = self.results[node_name]
+            # Handle nested access
+            if len(parts) > 3:
+                for key in parts[3:]:
+                    if isinstance(result, dict):
+                        result = result[key]
+                    else:
+                        result = getattr(result, key)
+            return result
+        # $ctx.params.<Key>
+        if value.startswith("$ctx.params."):
+            key = value[len("$ctx.params."):]
+            if key not in self.params:
+                raise KeyError(f"Param '{key}' not found in context.")
+            return self.params[key]
+        # $env.<Key>
+        if value.startswith("$env."):
+            import os
+            key = value[len("$env."):]
+            # Check ctx.env first, then os.environ
+            if key in self.env:
+                return self.env[key]
+            if key in os.environ:
+                return os.environ[key]
+            raise KeyError(f"Environment variable '{key}' not found.")
+        return value

pyoco/core/engine.py ADDED Viewed

@@ -0,0 +1,249 @@
+import time
+from typing import Dict, Any, List, Set
+from .models import Flow, Task
+from .context import Context
+from ..trace.backend import TraceBackend
+from ..trace.console import ConsoleTraceBackend
+class Engine:
+    def __init__(self, trace_backend: TraceBackend = None):
+        self.trace = trace_backend or ConsoleTraceBackend()
+    def run(self, flow: Flow, params: Dict[str, Any] = None) -> Context:
+        ctx = Context(params=params or {})
+        self.trace.on_flow_start(flow.name)
+        executed: Set[Task] = set()
+        running: Set[Any] = set() # Set of Futures
+        import concurrent.futures
+        # Use ThreadPoolExecutor for parallel execution
+        # Max workers could be configurable, default to something reasonable
+        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
+            future_to_task = {}
+            task_deadlines: Dict[Task, float] = {}
+            failed: Set[Task] = set()
+            while len(executed) + len(failed) < len(flow.tasks):
+                # Identify runnable tasks
+                runnable = []
+                for task in flow.tasks:
+                    if task in executed or task in failed or task in [future_to_task[f] for f in running]:
+                        continue
+                    # Check dependencies
+                    deps_met = True
+                    if task.trigger_policy == "ANY":
+                        # OR-join: Run if ANY dependency is executed (and successful)
+                        # But what if all failed? Then we can't run.
+                        # If at least one succeeded, we run.
+                        # If none succeeded yet, we wait.
+                        # If all failed, we fail (or skip).
+                        any_success = False
+                        all_failed = True
+                        if not task.dependencies:
+                            # No deps = ready
+                            any_success = True
+                            all_failed = False
+                        else:
+                            for dep in task.dependencies:
+                                if dep in executed:
+                                    any_success = True
+                                    all_failed = False
+                                    break # Found one success
+                                if dep not in failed:
+                                    all_failed = False # At least one is still running/pending
+                        if any_success:
+                            deps_met = True
+                        elif all_failed:
+                            # All deps failed, so we fail/skip
+                            failed.add(task)
+                            deps_met = False
+                            # Continue to next task loop to avoid adding to runnable
+                            continue
+                        else:
+                            # Still waiting
+                            deps_met = False
+                    else:
+                        # ALL (AND-join) - Default
+                        for dep in task.dependencies:
+                            if dep in failed:
+                                # Dependency failed
+                                if task.fail_policy == "isolate" or dep.fail_policy == "isolate":
+                                    failed.add(task)
+                                    deps_met = False
+                                    break
+                                else:
+                                    pass # fail=stop handled elsewhere
+                            if dep not in executed:
+                                deps_met = False
+                                break
+                    if deps_met and task not in failed:
+                        runnable.append(task)
+                # If no runnable tasks and no running tasks, we are stuck
+                # But if we have failed tasks, maybe that's why?
+                if not runnable and not running:
+                    if len(executed) + len(failed) == len(flow.tasks):
+                        # All done (some failed)
+                        break
+                    raise RuntimeError("Deadlock or cycle detected in workflow")
+                # Submit runnable tasks
+                for task in runnable:
+                    future = executor.submit(self._execute_task, task, ctx)
+                    running.add(future)
+                    future_to_task[future] = task
+                    # Record start time for timeout tracking
+                    # We need to track start times or deadlines.
+                    # Let's store deadline in a separate dict or attach to task?
+                    # Task is immutable-ish (dataclass).
+                    # Let's use a dict.
+                    if task.timeout_sec:
+                         task_deadlines[task] = time.time() + task.timeout_sec
+                # Calculate wait timeout
+                wait_timeout = None
+                if task_deadlines:
+                    now = time.time()
+                    min_deadline = min(task_deadlines.values())
+                    wait_timeout = max(0, min_deadline - now)
+                # Wait for at least one task to complete or timeout
+                if running:
+                    done, _ = concurrent.futures.wait(
+                        running,
+                        timeout=wait_timeout,
+                        return_when=concurrent.futures.FIRST_COMPLETED
+                    )
+                    # Check for timeouts first
+                    now = time.time()
+                    timed_out_tasks = []
+                    for task, deadline in list(task_deadlines.items()):
+                        if now >= deadline:
+                            # Task timed out
+                            # Find the future for this task
+                            # This is inefficient, but running set is small
+                            found_future = None
+                            for f, t in future_to_task.items():
+                                if t == task and f in running:
+                                    found_future = f
+                                    break
+                            if found_future:
+                                timed_out_tasks.append(found_future)
+                                # Remove from tracking
+                                running.remove(found_future)
+                                del task_deadlines[task]
+                                # Handle failure
+                                if task.fail_policy == "isolate":
+                                    failed.add(task)
+                                    self.trace.on_node_error(task.name, TimeoutError(f"Task exceeded timeout of {task.timeout_sec}s"))
+                                else:
+                                    raise TimeoutError(f"Task '{task.name}' exceeded timeout of {task.timeout_sec}s")
+                    for future in done:
+                        if future in running: # Might have been removed by timeout check above (unlikely if wait returned due to completion, but possible race)
+                            running.remove(future)
+                            task = future_to_task[future]
+                            if task in task_deadlines:
+                                del task_deadlines[task]
+                            try:
+                                future.result() # Re-raise exception if any
+                                executed.add(task)
+                            except Exception as e:
+                                if task.fail_policy == "isolate":
+                                    failed.add(task)
+                                    self.trace.on_node_error(task.name, e) # Log it
+                                else:
+                                    # fail=stop (default)
+                                    raise e
+        self.trace.on_flow_end(flow.name)
+        return ctx
+    def _execute_task(self, task: Task, ctx: Context):
+        self.trace.on_node_start(task.name)
+        start_time = time.time()
+        # Retry loop
+        retries_left = task.retries
+        while True:
+            try:
+                # Resolve inputs from task configuration
+                kwargs = {}
+                for key, value in task.inputs.items():
+                    kwargs[key] = ctx.resolve(value)
+                # Inspect function signature to inject 'ctx' if needed
+                import inspect
+                sig = inspect.signature(task.func)
+                # Inject 'ctx' if requested
+                if 'ctx' in sig.parameters:
+                    kwargs['ctx'] = ctx
+                # Auto-wiring (legacy/convenience)
+                for param_name in sig.parameters:
+                    if param_name in kwargs:
+                        continue
+                    if param_name == 'ctx':
+                        continue
+                    if param_name in ctx.params:
+                        kwargs[param_name] = ctx.params[param_name]
+                    elif param_name in ctx.results:
+                        kwargs[param_name] = ctx.results[param_name]
+                result = task.func(**kwargs)
+                ctx.set_result(task.name, result)
+                # Handle outputs saving
+                for target_path in task.outputs:
+                    parts = target_path.split(".")
+                    root_name = parts[0]
+                    root_obj = None
+                    if root_name == "scratch":
+                        root_obj = ctx.scratch
+                    elif root_name == "results":
+                        root_obj = ctx.results
+                    elif root_name == "params":
+                        root_obj = ctx.params
+                    if root_obj is not None:
+                        current = root_obj
+                        for i, part in enumerate(parts[1:-1]):
+                            if part not in current:
+                                current[part] = {}
+                            current = current[part]
+                            if not isinstance(current, dict):
+                                    break
+                        else:
+                            current[parts[-1]] = result
+                duration = (time.time() - start_time) * 1000
+                self.trace.on_node_end(task.name, duration)
+                return # Success
+            except Exception as e:
+                if retries_left > 0:
+                    retries_left -= 1
+                    # Log retry?
+                    # self.trace.on_node_retry(task.name, e, retries_left) # If method exists
+                    # For now just continue
+                    time.sleep(0.1) # Small backoff?
+                    continue
+                else:
+                    self.trace.on_node_error(task.name, e)
+                    raise e

pyoco/core/models.py ADDED Viewed

@@ -0,0 +1,116 @@
+from typing import Any, Callable, Dict, List, Optional, Set, Union, ForwardRef
+from dataclasses import dataclass, field
+@dataclass
+class Task:
+    func: Callable
+    name: str
+    dependencies: Set['Task'] = field(default_factory=set)
+    dependents: Set['Task'] = field(default_factory=set)
+    # Inputs configuration from flow.yaml
+    inputs: Dict[str, Any] = field(default_factory=dict)
+    outputs: List[str] = field(default_factory=list) # list of context paths to save result to
+    # For parallel execution grouping
+    parallel_group: Optional[str] = None
+    # Failure handling
+    fail_policy: str = "stop" # stop, isolate, retry
+    retries: int = 0
+    timeout_sec: Optional[float] = None
+    # Trigger policy
+    trigger_policy: str = "ALL" # ALL (AND-join), ANY (OR-join)
+    def __hash__(self):
+        return hash(self.name)
+    def __eq__(self, other):
+        if isinstance(other, Task):
+            return self.name == other.name
+        return False
+    def __repr__(self):
+        return f"<Task {self.name}>"
+@dataclass
+class Flow:
+    name: str = "main"
+    tasks: Set[Task] = field(default_factory=set)
+    _tail: Set[Task] = field(default_factory=set)
+    def __rshift__(self, other):
+        # Flow >> Task/List/Branch
+        new_tasks = []
+        is_branch = False
+        # Check if we are connecting FROM a Branch?
+        # No, Flow >> X means we are adding X to the flow, and connecting current tail to X.
+        # If X is a Branch (A | B), it means we add A and B, and tail connects to both.
+        # But wait, the OR logic applies when (A | B) >> C.
+        # Here we are just building the graph.
+        # If we do Flow >> (A | B) >> C
+        # 1. Flow >> (A | B) -> Adds A, B. Tail = {A, B}.
+        # 2. (Flow... which returns Flow) >> C -> Tail {A, B} >> C.
+        # Here C depends on A AND B by default.
+        # We need to detect if the tail came from a Branch?
+        # But Flow._tail is just a set of tasks.
+        # We need to know if those tasks were added as a Branch.
+        # This is tricky with the current Flow implementation which just tracks tail tasks.
+        # However, the syntax (A | B) returns a Branch object.
+        # If we do:
+        # branch = (A | B)
+        # branch >> C
+        # We need to handle this in Branch.__rshift__ or TaskWrapper.__rshift__?
+        # Wait, (A | B) returns a Branch (list subclass).
+        # Python's list doesn't have __rshift__.
+        # We need Branch to implement __rshift__.
+        if hasattr(other, 'task'): # TaskWrapper
+            new_tasks = [other.task]
+        elif isinstance(other, Task):
+            new_tasks = [other]
+        elif isinstance(other, (list, tuple)):
+            # Check if it's a Branch
+            from ..dsl.syntax import Branch # Import here to avoid circular import if possible, or move Branch to models?
+            # Actually Branch is defined in syntax.py, but Flow is in models.py.
+            # We can't import syntax in models easily.
+            # Maybe check class name?
+            if type(other).__name__ == "Branch":
+                is_branch = True
+            for item in other:
+                if hasattr(item, 'task'):
+                    new_tasks.append(item.task)
+                elif isinstance(item, Task):
+                    new_tasks.append(item)
+        # Add tasks and link from current tail
+        for t in new_tasks:
+            self.add_task(t)
+            for tail_task in self._tail:
+                tail_task.dependents.add(t)
+                t.dependencies.add(tail_task)
+            # If the tail was a Branch, does it mean t should be ANY?
+            # No, Flow tracks tail. If we want (A | B) >> C to mean C waits for ANY,
+            # we need to know that A and B are "OR-grouped".
+            # But Flow just sees A and B in tail.
+            # If we want to support this, we might need to change how we link.
+            # OR, we rely on the fact that the USER does:
+            # (A | B) >> C
+            # This calls Branch.__rshift__(C).
+            # So we need to implement Branch.__rshift__.
+            # Flow.__rshift__ is only used when we do `flow >> ...`.
+            # So `flow >> (A | B)` just adds A and B.
+            # Then `(A | B) >> C` is handled by Branch.
+            pass
+        # Update tail
+        if new_tasks:
+            self._tail = set(new_tasks)
+        return self
+    def add_task(self, task: Task):
+        self.tasks.add(task)

pyoco/discovery/loader.py ADDED Viewed

@@ -0,0 +1,134 @@
+import importlib
+import pkgutil
+import sys
+from typing import Dict, List, Any
+from ..core.models import Task
+from ..dsl.syntax import TaskWrapper
+class TaskLoader:
+    def __init__(self, config: Any, strict: bool = False):
+        self.config = config
+        self.strict = strict
+        self.tasks: Dict[str, Task] = {}
+        self._explicit_tasks: Set[str] = set()
+    def load(self):
+        # Load explicitly defined tasks in config FIRST (Higher priority)
+        for task_name, task_conf in self.config.tasks.items():
+            if task_conf.callable:
+                self._load_explicit_task(task_name, task_conf)
+                self._explicit_tasks.add(task_name)
+        # Load from packages
+        for package in self.config.discovery.packages:
+            self._load_package(package)
+        # Load from entry points (simplified)
+        for ep in self.config.discovery.entry_points:
+            self._load_module(ep)
+        # Load from glob modules
+        for pattern in self.config.discovery.glob_modules:
+            self._load_glob_modules(pattern)
+    def _register_task(self, name: str, task: Task):
+        if name in self.tasks:
+            if name in self._explicit_tasks:
+                # Explicit wins, ignore implicit
+                return
+            # Collision between implicits
+            msg = f"Task '{name}' already defined."
+            if self.strict:
+                raise ValueError(f"{msg} (Strict mode enabled)")
+            else:
+                print(f"Warning: {msg} Overwriting.")
+        # Apply config overlay if exists
+        if name in self.config.tasks:
+            conf = self.config.tasks[name]
+            if not conf.callable:
+                if conf.inputs:
+                    task.inputs.update(conf.inputs)
+                if conf.outputs:
+                    task.outputs.extend(conf.outputs)
+        self.tasks[name] = task
+    def _load_package(self, package_name: str):
+        try:
+            pkg = importlib.import_module(package_name)
+            if hasattr(pkg, '__path__'):
+                for _, name, _ in pkgutil.iter_modules(pkg.__path__, pkg.__name__ + "."):
+                    self._load_module(name)
+            else:
+                self._scan_module(pkg)
+        except ImportError as e:
+            print(f"Warning: Could not import package {package_name}: {e}")
+    def _load_module(self, module_name: str):
+        try:
+            mod = importlib.import_module(module_name)
+            self._scan_module(mod)
+        except ImportError as e:
+            print(f"Warning: Could not import module {module_name}: {e}")
+    def _load_glob_modules(self, pattern: str):
+        import glob
+        import os
+        # Pattern is likely a file path glob, e.g. "jobs/*.py"
+        # We need to convert file paths to module paths
+        files = glob.glob(pattern, recursive=True)
+        for file_path in files:
+            if not file_path.endswith(".py"):
+                continue
+            # Convert path to module
+            # This is tricky without knowing the root.
+            # Assumption: running from root, and file path is relative to root.
+            # e.g. "myproject/tasks/foo.py" -> "myproject.tasks.foo"
+            rel_path = os.path.relpath(file_path)
+            if rel_path.startswith(".."):
+                # Out of tree, skip or warn
+                continue
+            module_name = rel_path.replace(os.sep, ".")[:-3] # strip .py
+            self._load_module(module_name)
+    def _scan_module(self, module: Any):
+        for name, obj in vars(module).items():
+            if isinstance(obj, TaskWrapper):
+                self._register_task(name, obj.task)
+            elif isinstance(obj, Task):
+                self._register_task(name, obj)
+            elif callable(obj) and getattr(obj, '__pyoco_task__', False):
+                # Convert to Task if not already
+                pass
+    def _load_explicit_task(self, name: str, conf: Any):
+        # Load callable
+        module_path, func_name = conf.callable.split(':')
+        try:
+            mod = importlib.import_module(module_path)
+            obj = getattr(mod, func_name)
+            # Unwrap if it's a TaskWrapper or Task
+            real_func = obj
+            if isinstance(obj, TaskWrapper):
+                real_func = obj.task.func
+            elif isinstance(obj, Task):
+                real_func = obj.func
+            # Create a Task wrapper
+            t = Task(func=real_func, name=name)
+            t.inputs = conf.inputs
+            t.inputs = conf.inputs
+            t.outputs = conf.outputs
+            self.tasks[name] = t
+        except (ImportError, AttributeError) as e:
+            print(f"Error loading task {name}: {e}")
+    def get_task(self, name: str) -> Task:
+        return self.tasks.get(name)

pyoco/dsl/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .syntax import task
+__all__ = ["task"]

pyoco/dsl/syntax.py ADDED Viewed

@@ -0,0 +1,122 @@
+from typing import Callable, Union, List, Tuple
+from ..core.models import Task, Flow
+from ..core import engine
+# Global context
+_active_flow: Flow = None
+def task(func: Callable) -> Task:
+    t = Task(func=func, name=func.__name__)
+    return t
+class Branch(list):
+    """Represents a branch of tasks (OR-split/join logic placeholder)."""
+    def __rshift__(self, other):
+        # (A | B) >> C
+        # C depends on A and B.
+        # AND C.trigger_policy = "ANY"
+        targets = []
+        if hasattr(other, 'task'):
+            targets = [other.task]
+        elif isinstance(other, (list, tuple)):
+             for item in other:
+                if hasattr(item, 'task'):
+                    targets.append(item.task)
+        for target in targets:
+            target.trigger_policy = "ANY"
+            for source in self:
+                if hasattr(source, 'task'):
+                    target.dependencies.add(source.task)
+                    source.task.dependents.add(target)
+        return other
+class Parallel(list):
+    """Represents a parallel group of tasks (AND-split/join)."""
+    def __rshift__(self, other):
+        # (A & B) >> C
+        # C depends on A AND B.
+        targets = []
+        if hasattr(other, 'task'):
+            targets = [other.task]
+        elif isinstance(other, (list, tuple)):
+             for item in other:
+                if hasattr(item, 'task'):
+                    targets.append(item.task)
+        for target in targets:
+            for source in self:
+                if hasattr(source, 'task'):
+                    target.dependencies.add(source.task)
+                    source.task.dependents.add(target)
+        return other
+class TaskWrapper:
+    """
+    Wraps a Task to handle DSL operators and registration.
+    """
+    def __init__(self, task: Task):
+        self.task = task
+    def __call__(self, *args, **kwargs):
+        # In this new spec, calling a task might not be strictly necessary for registration
+        # if we assume tasks are added to flow explicitly or via >>
+        # But let's keep the pattern: calling it returns a wrapper that can be chained
+        # We might need to store args/kwargs if we want to support them
+        return self
+    def __rshift__(self, other):
+        # self >> other
+        if isinstance(other, TaskWrapper):
+            other.task.dependencies.add(self.task)
+            self.task.dependents.add(other.task)
+            return other
+        elif isinstance(other, (list, tuple)):
+            # self >> (A & B) or self >> (A | B)
+            # If it's a Branch (from |), does it imply something different?
+            # Spec says: "Update Flow to handle Branch >> Task (set trigger_policy=ANY)"
+            # But here we are doing Task >> Branch.
+            # Task >> (A | B) means Task triggers both A and B?
+            # Usually >> means "follows".
+            # A >> (B | C) -> A triggers B and C?
+            # Or does it mean B and C depend on A? Yes.
+            # The difference between & and | is usually how they JOIN later, or how they are triggered?
+            # In Airflow, >> [A, B] means A and B depend on upstream.
+            # If we have (A | B) >> C, then C depends on A OR B.
+            # So if 'other' is a Branch, we just add dependencies as usual.
+            # The "OR" logic is relevant when 'other' connects to downstream.
+            for item in other:
+                if isinstance(item, TaskWrapper):
+                    item.task.dependencies.add(self.task)
+                    self.task.dependents.add(item.task)
+            return other
+        return other
+    def __and__(self, other):
+        # self & other (Parallel)
+        return Parallel([self, other])
+    def __or__(self, other):
+        # self | other (Branch)
+        # Return a Branch object containing both
+        return Branch([self, other])
+# We need to adapt the DSL to match the spec:
+# @task
+# def A(ctx, x:int)->int: ...
+# flow = Flow() >> A >> (B & C)
+# So A, B, C must be usable in the expression.
+# The @task decorator should return something that supports >>, &, |
+def task_decorator(func: Callable):
+    t = Task(func=func, name=func.__name__)
+    return TaskWrapper(t)
+# Re-export as task
+task = task_decorator

pyoco/schemas/config.py ADDED Viewed

@@ -0,0 +1,57 @@
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+import yaml
+@dataclass
+class TaskConfig:
+    callable: Optional[str] = None
+    inputs: Dict[str, Any] = field(default_factory=dict)
+    outputs: List[str] = field(default_factory=list)
+@dataclass
+class FlowConfig:
+    graph: str
+    defaults: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class DiscoveryConfig:
+    entry_points: List[str] = field(default_factory=list)
+    packages: List[str] = field(default_factory=list)
+    glob_modules: List[str] = field(default_factory=list)
+@dataclass
+class RuntimeConfig:
+    expose_env: List[str] = field(default_factory=list)
+@dataclass
+class PyocoConfig:
+    version: int
+    flows: Dict[str, FlowConfig]
+    tasks: Dict[str, TaskConfig]
+    discovery: DiscoveryConfig = field(default_factory=DiscoveryConfig)
+    runtime: RuntimeConfig = field(default_factory=RuntimeConfig)
+    @classmethod
+    def from_yaml(cls, path: str) -> 'PyocoConfig':
+        with open(path, 'r') as f:
+            data = yaml.safe_load(f)
+        # Simple manual parsing/validation for MVP
+        # In a real app, use pydantic or similar
+        flows = {k: FlowConfig(**v) for k, v in data.get('flows', {}).items()}
+        tasks = {k: TaskConfig(**v) for k, v in data.get('tasks', {}).items()}
+        disc_data = data.get('discovery', {})
+        discovery = DiscoveryConfig(**disc_data)
+        run_data = data.get('runtime', {})
+        runtime = RuntimeConfig(**run_data)
+        return cls(
+            version=data.get('version', 1),
+            flows=flows,
+            tasks=tasks,
+            discovery=discovery,
+            runtime=runtime
+        )

pyoco/trace/backend.py ADDED Viewed

@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+class TraceBackend(ABC):
+    @abstractmethod
+    def on_flow_start(self, flow_name: str):
+        pass
+    @abstractmethod
+    def on_flow_end(self, flow_name: str):
+        pass
+    @abstractmethod
+    def on_node_start(self, node_name: str):
+        pass
+    @abstractmethod
+    def on_node_end(self, node_name: str, duration_ms: float):
+        pass
+    @abstractmethod
+    def on_node_error(self, node_name: str, error: Exception):
+        pass
+    def on_node_transition(self, source: str, target: str):
+        pass

pyoco/trace/console.py ADDED Viewed

@@ -0,0 +1,40 @@
+import time
+from .backend import TraceBackend
+class ConsoleTraceBackend(TraceBackend):
+    def __init__(self, style: str = "cute"):
+        self.style = style
+    def on_flow_start(self, flow_name: str):
+        if self.style == "cute":
+            print(f"🐇 pyoco > start flow={flow_name}")
+        else:
+            print(f"INFO pyoco start flow={flow_name}")
+    def on_flow_end(self, flow_name: str):
+        if self.style == "cute":
+            print(f"🥕 done flow={flow_name}")
+        else:
+            print(f"INFO pyoco end flow={flow_name}")
+    def on_node_start(self, node_name: str):
+        if self.style == "cute":
+            print(f"🏃 start node={node_name}")
+        else:
+            print(f"INFO pyoco start node={node_name}")
+    def on_node_end(self, node_name: str, duration_ms: float):
+        if self.style == "cute":
+            print(f"✅ done node={node_name} ({duration_ms:.2f} ms)")
+        else:
+            print(f"INFO pyoco end node={node_name} dur_ms={duration_ms:.2f}")
+    def on_node_error(self, node_name: str, error: Exception):
+        if self.style == "cute":
+            print(f"💥 error node={node_name} {error}")
+        else:
+            print(f"ERROR pyoco error node={node_name} {error}")
+    def on_node_transition(self, source: str, target: str):
+        if self.style == "cute":
+            print(f"🐇 {source} -> {target}")

pyoco-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,7 @@
+Metadata-Version: 2.4
+Name: pyoco
+Version: 0.1.0
+Summary: A workflow engine with sugar syntax
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: pyyaml>=6.0.3

pyoco-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+pyoco/__init__.py,sha256=E2pgDGvGRSVon7dSqIM4UD55LgVpf4jiZZA-70kOcuw,409
+pyoco/cli/entry.py,sha256=zPIG0Gx-cFO8Cf1Z3wD3Ifz_2sHaryHZ6mCRri2WEqE,93
+pyoco/cli/main.py,sha256=uRc6CzUTVRYF4JbehlbrprT7GvWQ-WyBZ8k12NrSxO8,6502
+pyoco/core/base_task.py,sha256=z7hOFntAPv4yCADapS-fhtLe5eWqaO8k3T1r05YEEUE,2106
+pyoco/core/context.py,sha256=SnoTz3vRghO1A-FNOrw2NEjbx1HySDqrBnQU5-KWGbk,3696
+pyoco/core/engine.py,sha256=m5LrEsXcpUAran5DxULtWbvhsMNj5mv17wE6lDFkFmQ,11416
+pyoco/core/models.py,sha256=zTt5HTSBChwRpOuw3qY2pvjRGZVsq4OQ-ZBHE3ujMWA,4548
+pyoco/discovery/loader.py,sha256=XzZzOAyFYrdA8K6APuEGWgjSIyp4Bgwlr834MyJc8vk,4950
+pyoco/dsl/__init__.py,sha256=xWdb60pSRL8lNFk4GHF3EJ4hon0uiWqpv264g6-4gdg,45
+pyoco/dsl/syntax.py,sha256=AkFcD5gLlbJLFN0KkMIyttpHUV3v21pjz_ZqwreZkdM,4312
+pyoco/schemas/config.py,sha256=KkGZK3GxTHoIHEGb4f4k8GE2W-aBN4iPzmc_HrwuROU,1735
+pyoco/trace/backend.py,sha256=h7l1PU8zuCSOo_VA5T1ax4znN_Az3Xuvx-KXibg3e-U,597
+pyoco/trace/console.py,sha256=Kf2-vma98ojhVQZHFzCUYfD_46Lr1WfAfI56smZkSZM,1397
+pyoco-0.1.0.dist-info/METADATA,sha256=bA_qJXUkIiC7TIOSo8CEzJ6PXp01pLQ1Q1LoMOrIw_k,187
+pyoco-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pyoco-0.1.0.dist-info/top_level.txt,sha256=2JRVocfaWRbX1VJ3zq1c5wQaOK6fMARS6ptVFWyvRF4,6
+pyoco-0.1.0.dist-info/RECORD,,

pyoco-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

pyoco-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pyoco