pyoco 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyoco/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ from .core.models import Flow, Task
2
+ from .core.engine import Engine
3
+ from .dsl.syntax import task
4
+ from .trace.console import ConsoleTraceBackend
5
+
6
+ def run(flow: Flow, params: dict = None, trace: bool = True, cute: bool = True):
7
+ backend = ConsoleTraceBackend(style="cute" if cute else "plain")
8
+ engine = Engine(trace_backend=backend)
9
+ return engine.run(flow, params)
10
+
11
+ __all__ = ["task", "Flow", "run"]
pyoco/cli/entry.py ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env python
2
+ from pyoco.cli.main import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
pyoco/cli/main.py ADDED
@@ -0,0 +1,177 @@
1
+ import argparse
2
+ import sys
3
+ import os
4
+ from ..schemas.config import PyocoConfig
5
+ from ..discovery.loader import TaskLoader
6
+ from ..core.models import Flow
7
+ from ..core.engine import Engine
8
+ from ..trace.console import ConsoleTraceBackend
9
+
10
+ def main():
11
+ parser = argparse.ArgumentParser(description="Pyoco Workflow Engine")
12
+ subparsers = parser.add_subparsers(dest="command", help="Command to run")
13
+
14
+ # Run command
15
+ run_parser = subparsers.add_parser("run", help="Run a workflow")
16
+ run_parser.add_argument("--config", required=True, help="Path to flow.yaml")
17
+ run_parser.add_argument("--flow", default="main", help="Flow name to run")
18
+ run_parser.add_argument("--trace", action="store_true", help="Enable tracing")
19
+ run_parser.add_argument("--cute", action="store_true", default=True, help="Use cute trace style")
20
+ run_parser.add_argument("--non-cute", action="store_false", dest="cute", help="Use plain trace style")
21
+ # Allow overriding params via CLI
22
+ run_parser.add_argument("--param", action="append", help="Override params (key=value)")
23
+
24
+ # Check command
25
+ check_parser = subparsers.add_parser("check", help="Verify a workflow")
26
+ check_parser.add_argument("--config", required=True, help="Path to flow.yaml")
27
+ check_parser.add_argument("--flow", default="main", help="Flow name to check")
28
+
29
+ # List tasks command
30
+ list_parser = subparsers.add_parser("list-tasks", help="List available tasks")
31
+ list_parser.add_argument("--config", required=True, help="Path to flow.yaml")
32
+
33
+ args = parser.parse_args()
34
+
35
+ if not args.command:
36
+ parser.print_help()
37
+ sys.exit(1)
38
+
39
+ # Load config
40
+ try:
41
+ config = PyocoConfig.from_yaml(args.config)
42
+ except Exception as e:
43
+ print(f"Error loading config: {e}")
44
+ sys.exit(1)
45
+
46
+ # Discover tasks
47
+ loader = TaskLoader(config)
48
+ loader.load()
49
+
50
+ if args.command == "list-tasks":
51
+ print("Available tasks:")
52
+ for name in loader.tasks:
53
+ print(f" - {name}")
54
+ return
55
+
56
+ if args.command == "run":
57
+ flow_conf = config.flows.get(args.flow)
58
+ if not flow_conf:
59
+ print(f"Flow '{args.flow}' not found in config.")
60
+ sys.exit(1)
61
+
62
+ # Build Flow from graph string
63
+ from ..dsl.syntax import TaskWrapper
64
+ eval_context = {name: TaskWrapper(task) for name, task in loader.tasks.items()}
65
+
66
+ try:
67
+ # Create Flow and add all loaded tasks
68
+ flow = Flow(name=args.flow)
69
+ for t in loader.tasks.values():
70
+ flow.add_task(t)
71
+
72
+ # Evaluate graph to set up dependencies
73
+ exec(flow_conf.graph, {}, eval_context)
74
+
75
+ # Run engine
76
+ backend = ConsoleTraceBackend(style="cute" if args.cute else "plain")
77
+ engine = Engine(trace_backend=backend)
78
+
79
+ # Params
80
+ params = flow_conf.defaults.copy()
81
+ if args.param:
82
+ for p in args.param:
83
+ if "=" in p:
84
+ k, v = p.split("=", 1)
85
+ params[k] = v # Simple string parsing for now
86
+
87
+ engine.run(flow, params)
88
+
89
+ except Exception as e:
90
+ print(f"Error executing flow: {e}")
91
+ import traceback
92
+ traceback.print_exc()
93
+ sys.exit(1)
94
+
95
+ elif args.command == "check":
96
+ print(f"Checking flow '{args.flow}'...")
97
+ flow_conf = config.flows.get(args.flow)
98
+ if not flow_conf:
99
+ print(f"Flow '{args.flow}' not found in config.")
100
+ sys.exit(1)
101
+
102
+ errors = []
103
+ warnings = []
104
+
105
+ # 1. Check imports (already done by loader.load(), but we can check for missing tasks in graph)
106
+ # 2. Build flow to check graph
107
+ from ..dsl.syntax import TaskWrapper
108
+ eval_context = {name: TaskWrapper(task) for name, task in loader.tasks.items()}
109
+
110
+ try:
111
+ flow = Flow(name=args.flow)
112
+ for t in loader.tasks.values():
113
+ flow.add_task(t)
114
+
115
+ eval(flow_conf.graph, {}, eval_context)
116
+
117
+ # 3. Reachability / Orphans
118
+ # Nodes with no deps and no dependents (except if single node flow)
119
+ if len(flow.tasks) > 1:
120
+ for t in flow.tasks:
121
+ if not t.dependencies and not t.dependents:
122
+ warnings.append(f"Task '{t.name}' is orphaned (no dependencies or dependents).")
123
+
124
+ # 4. Cycles
125
+ # Simple DFS for cycle detection
126
+ visited = set()
127
+ path = set()
128
+ def visit(node):
129
+ if node in path:
130
+ return True # Cycle
131
+ if node in visited:
132
+ return False
133
+
134
+ visited.add(node)
135
+ path.add(node)
136
+ for dep in node.dependencies: # Check upstream
137
+ if visit(dep):
138
+ return True
139
+ path.remove(node)
140
+ return False
141
+
142
+ for t in flow.tasks:
143
+ if visit(t):
144
+ errors.append(f"Cycle detected involving task '{t.name}'.")
145
+ break
146
+
147
+ # 5. Signature Check
148
+ import inspect
149
+ for t in flow.tasks:
150
+ sig = inspect.signature(t.func)
151
+ for name, param in sig.parameters.items():
152
+ if name == 'ctx': continue
153
+ # Check if input provided in task config or defaults
154
+ # This is hard because inputs are resolved at runtime.
155
+ # But we can check if 'inputs' mapping exists for it.
156
+ if name not in t.inputs and name not in flow_conf.defaults:
157
+ # Warning: might be missing input
158
+ warnings.append(f"Task '{t.name}' argument '{name}' might be missing input (not in inputs or defaults).")
159
+
160
+ except Exception as e:
161
+ errors.append(f"Graph evaluation failed: {e}")
162
+
163
+ # Report
164
+ print("\n--- Check Report ---")
165
+ if not errors and not warnings:
166
+ print("✅ All checks passed!")
167
+ else:
168
+ for w in warnings:
169
+ print(f"⚠️ {w}")
170
+ for e in errors:
171
+ print(f"❌ {e}")
172
+
173
+ if errors:
174
+ sys.exit(1)
175
+
176
+ if __name__ == "__main__":
177
+ main()
@@ -0,0 +1,65 @@
1
+ # pyoco core - base task abstraction
2
+ """Common abstract base class for user‑defined tasks.
3
+
4
+ The library already allows registering a plain function with the ``@task``
5
+ decorator. For more structured or reusable implementations you can
6
+ subclass :class:`BaseTask` and implement the ``run`` method. The ``run``
7
+ method receives the current :class:`~pyoco.core.context.Context` instance
8
+ so you can read inputs, write outputs, or use any other context helpers.
9
+
10
+ Typical usage::
11
+
12
+ from pyoco.core.base_task import BaseTask
13
+ from pyoco.dsl.syntax import task
14
+
15
+ class MyTask(BaseTask):
16
+ @task
17
+ def run(self, ctx):
18
+ # ``ctx`` gives access to ``inputs`` and ``scratch`` etc.
19
+ data = ctx.inputs.get("my_input")
20
+ result = data * 2
21
+ return result
22
+
23
+ In ``flow.yaml`` you reference the method as usual::
24
+
25
+ tasks:
26
+ double:
27
+ callable: "my_module:MyTask.run"
28
+ inputs:
29
+ my_input: "$ctx.params.value"
30
+ outputs:
31
+ - "scratch.doubled"
32
+
33
+ The abstract base class does not enforce any particular input/output
34
+ schema – it simply provides a clear contract for developers and makes the
35
+ library documentation more discoverable.
36
+ """
37
+
38
+ from abc import ABC, abstractmethod
39
+ from typing import Any
40
+
41
+ class BaseTask(ABC):
42
+ """Abstract base class for custom tasks.
43
+
44
+ Subclass this class and implement :meth:`run`. The method must accept
45
+ a single ``ctx`` argument (the :class:`~pyoco.core.context.Context`
46
+ instance) and return a value that will be stored according to the
47
+ ``outputs`` configuration in ``flow.yaml``.
48
+ """
49
+
50
+ @abstractmethod
51
+ def run(self, ctx: Any) -> Any:
52
+ """Execute the task.
53
+
54
+ Parameters
55
+ ----------
56
+ ctx: :class:`~pyoco.core.context.Context`
57
+ Execution context providing access to ``inputs``, ``scratch``,
58
+ ``params`` and helper methods such as ``save_artifact``.
59
+
60
+ Returns
61
+ -------
62
+ Any
63
+ The value that will be saved to the paths listed in ``outputs``.
64
+ """
65
+ raise NotImplementedError
pyoco/core/context.py ADDED
@@ -0,0 +1,110 @@
1
+ import threading
2
+ from typing import Any, Dict, Optional
3
+ from dataclasses import dataclass, field
4
+
5
+ @dataclass
6
+ class Context:
7
+ params: Dict[str, Any] = field(default_factory=dict)
8
+ env: Dict[str, str] = field(default_factory=dict)
9
+ results: Dict[str, Any] = field(default_factory=dict)
10
+ scratch: Dict[str, Any] = field(default_factory=dict)
11
+ artifacts: Dict[str, Any] = field(default_factory=dict)
12
+ run_id: Optional[str] = None
13
+ artifact_dir: str = field(default="./artifacts")
14
+
15
+ _lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
16
+
17
+ def __post_init__(self):
18
+ # Ensure artifact directory exists
19
+ import pathlib
20
+ pathlib.Path(self.artifact_dir).mkdir(parents=True, exist_ok=True)
21
+
22
+ def get_result(self, node_name: str) -> Any:
23
+ with self._lock:
24
+ return self.results.get(node_name)
25
+
26
+ def set_result(self, node_name: str, value: Any):
27
+ with self._lock:
28
+ self.results[node_name] = value
29
+
30
+ def save_artifact(self, name: str, data: Any) -> str:
31
+ import os
32
+ import pathlib
33
+
34
+ full_path = pathlib.Path(self.artifact_dir) / name
35
+ # Ensure parent dir exists for nested artifacts
36
+ full_path.parent.mkdir(parents=True, exist_ok=True)
37
+
38
+ mode = "w"
39
+ content = data
40
+
41
+ if isinstance(data, bytes):
42
+ mode = "wb"
43
+ elif not isinstance(data, str):
44
+ content = str(data)
45
+
46
+ with open(full_path, mode) as f:
47
+ f.write(content)
48
+
49
+ abs_path = str(full_path.absolute())
50
+
51
+ type_name = type(data).__name__
52
+ if isinstance(data, (dict, list)):
53
+ type_name = "object"
54
+
55
+ with self._lock:
56
+ self.artifacts[name] = {
57
+ "path": abs_path,
58
+ "type": type_name
59
+ }
60
+
61
+ return abs_path
62
+
63
+ def resolve(self, value: Any) -> Any:
64
+ if not isinstance(value, str) or not value.startswith("$"):
65
+ return value
66
+
67
+ # $node.<Name>.output
68
+ if value.startswith("$node."):
69
+ parts = value.split(".")
70
+ # $node.A.output -> ["$node", "A", "output"]
71
+ # $node.A.output.x -> ["$node", "A", "output", "x"]
72
+ if len(parts) < 3 or parts[2] != "output":
73
+ # Malformed or unsupported node selector
74
+ return value
75
+
76
+ node_name = parts[1]
77
+ if node_name not in self.results:
78
+ raise KeyError(f"Node '{node_name}' result not found in context.")
79
+
80
+ result = self.results[node_name]
81
+
82
+ # Handle nested access
83
+ if len(parts) > 3:
84
+ for key in parts[3:]:
85
+ if isinstance(result, dict):
86
+ result = result[key]
87
+ else:
88
+ result = getattr(result, key)
89
+ return result
90
+
91
+ # $ctx.params.<Key>
92
+ if value.startswith("$ctx.params."):
93
+ key = value[len("$ctx.params."):]
94
+ if key not in self.params:
95
+ raise KeyError(f"Param '{key}' not found in context.")
96
+ return self.params[key]
97
+
98
+ # $env.<Key>
99
+ if value.startswith("$env."):
100
+ import os
101
+ key = value[len("$env."):]
102
+ # Check ctx.env first, then os.environ
103
+ if key in self.env:
104
+ return self.env[key]
105
+ if key in os.environ:
106
+ return os.environ[key]
107
+ raise KeyError(f"Environment variable '{key}' not found.")
108
+
109
+ return value
110
+
pyoco/core/engine.py ADDED
@@ -0,0 +1,249 @@
1
+ import time
2
+ from typing import Dict, Any, List, Set
3
+ from .models import Flow, Task
4
+ from .context import Context
5
+ from ..trace.backend import TraceBackend
6
+ from ..trace.console import ConsoleTraceBackend
7
+
8
+ class Engine:
9
+ def __init__(self, trace_backend: TraceBackend = None):
10
+ self.trace = trace_backend or ConsoleTraceBackend()
11
+
12
+ def run(self, flow: Flow, params: Dict[str, Any] = None) -> Context:
13
+ ctx = Context(params=params or {})
14
+ self.trace.on_flow_start(flow.name)
15
+
16
+ executed: Set[Task] = set()
17
+ running: Set[Any] = set() # Set of Futures
18
+
19
+ import concurrent.futures
20
+
21
+ # Use ThreadPoolExecutor for parallel execution
22
+ # Max workers could be configurable, default to something reasonable
23
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
24
+ future_to_task = {}
25
+ task_deadlines: Dict[Task, float] = {}
26
+
27
+ failed: Set[Task] = set()
28
+
29
+ while len(executed) + len(failed) < len(flow.tasks):
30
+ # Identify runnable tasks
31
+ runnable = []
32
+ for task in flow.tasks:
33
+ if task in executed or task in failed or task in [future_to_task[f] for f in running]:
34
+ continue
35
+
36
+ # Check dependencies
37
+ deps_met = True
38
+
39
+ if task.trigger_policy == "ANY":
40
+ # OR-join: Run if ANY dependency is executed (and successful)
41
+ # But what if all failed? Then we can't run.
42
+ # If at least one succeeded, we run.
43
+ # If none succeeded yet, we wait.
44
+ # If all failed, we fail (or skip).
45
+
46
+ any_success = False
47
+ all_failed = True
48
+
49
+ if not task.dependencies:
50
+ # No deps = ready
51
+ any_success = True
52
+ all_failed = False
53
+ else:
54
+ for dep in task.dependencies:
55
+ if dep in executed:
56
+ any_success = True
57
+ all_failed = False
58
+ break # Found one success
59
+ if dep not in failed:
60
+ all_failed = False # At least one is still running/pending
61
+
62
+ if any_success:
63
+ deps_met = True
64
+ elif all_failed:
65
+ # All deps failed, so we fail/skip
66
+ failed.add(task)
67
+ deps_met = False
68
+ # Continue to next task loop to avoid adding to runnable
69
+ continue
70
+ else:
71
+ # Still waiting
72
+ deps_met = False
73
+
74
+ else:
75
+ # ALL (AND-join) - Default
76
+ for dep in task.dependencies:
77
+ if dep in failed:
78
+ # Dependency failed
79
+ if task.fail_policy == "isolate" or dep.fail_policy == "isolate":
80
+ failed.add(task)
81
+ deps_met = False
82
+ break
83
+ else:
84
+ pass # fail=stop handled elsewhere
85
+
86
+ if dep not in executed:
87
+ deps_met = False
88
+ break
89
+
90
+ if deps_met and task not in failed:
91
+ runnable.append(task)
92
+
93
+ # If no runnable tasks and no running tasks, we are stuck
94
+ # But if we have failed tasks, maybe that's why?
95
+ if not runnable and not running:
96
+ if len(executed) + len(failed) == len(flow.tasks):
97
+ # All done (some failed)
98
+ break
99
+ raise RuntimeError("Deadlock or cycle detected in workflow")
100
+
101
+ # Submit runnable tasks
102
+ for task in runnable:
103
+ future = executor.submit(self._execute_task, task, ctx)
104
+ running.add(future)
105
+ future_to_task[future] = task
106
+ # Record start time for timeout tracking
107
+ # We need to track start times or deadlines.
108
+ # Let's store deadline in a separate dict or attach to task?
109
+ # Task is immutable-ish (dataclass).
110
+ # Let's use a dict.
111
+ if task.timeout_sec:
112
+ task_deadlines[task] = time.time() + task.timeout_sec
113
+
114
+ # Calculate wait timeout
115
+ wait_timeout = None
116
+ if task_deadlines:
117
+ now = time.time()
118
+ min_deadline = min(task_deadlines.values())
119
+ wait_timeout = max(0, min_deadline - now)
120
+
121
+ # Wait for at least one task to complete or timeout
122
+ if running:
123
+ done, _ = concurrent.futures.wait(
124
+ running,
125
+ timeout=wait_timeout,
126
+ return_when=concurrent.futures.FIRST_COMPLETED
127
+ )
128
+
129
+ # Check for timeouts first
130
+ now = time.time()
131
+ timed_out_tasks = []
132
+ for task, deadline in list(task_deadlines.items()):
133
+ if now >= deadline:
134
+ # Task timed out
135
+ # Find the future for this task
136
+ # This is inefficient, but running set is small
137
+ found_future = None
138
+ for f, t in future_to_task.items():
139
+ if t == task and f in running:
140
+ found_future = f
141
+ break
142
+
143
+ if found_future:
144
+ timed_out_tasks.append(found_future)
145
+ # Remove from tracking
146
+ running.remove(found_future)
147
+ del task_deadlines[task]
148
+
149
+ # Handle failure
150
+ if task.fail_policy == "isolate":
151
+ failed.add(task)
152
+ self.trace.on_node_error(task.name, TimeoutError(f"Task exceeded timeout of {task.timeout_sec}s"))
153
+ else:
154
+ raise TimeoutError(f"Task '{task.name}' exceeded timeout of {task.timeout_sec}s")
155
+
156
+ for future in done:
157
+ if future in running: # Might have been removed by timeout check above (unlikely if wait returned due to completion, but possible race)
158
+ running.remove(future)
159
+ task = future_to_task[future]
160
+ if task in task_deadlines:
161
+ del task_deadlines[task]
162
+
163
+ try:
164
+ future.result() # Re-raise exception if any
165
+ executed.add(task)
166
+ except Exception as e:
167
+ if task.fail_policy == "isolate":
168
+ failed.add(task)
169
+ self.trace.on_node_error(task.name, e) # Log it
170
+ else:
171
+ # fail=stop (default)
172
+ raise e
173
+
174
+ self.trace.on_flow_end(flow.name)
175
+ return ctx
176
+
177
+ def _execute_task(self, task: Task, ctx: Context):
178
+ self.trace.on_node_start(task.name)
179
+ start_time = time.time()
180
+ # Retry loop
181
+ retries_left = task.retries
182
+ while True:
183
+ try:
184
+ # Resolve inputs from task configuration
185
+ kwargs = {}
186
+ for key, value in task.inputs.items():
187
+ kwargs[key] = ctx.resolve(value)
188
+
189
+ # Inspect function signature to inject 'ctx' if needed
190
+ import inspect
191
+ sig = inspect.signature(task.func)
192
+
193
+ # Inject 'ctx' if requested
194
+ if 'ctx' in sig.parameters:
195
+ kwargs['ctx'] = ctx
196
+
197
+ # Auto-wiring (legacy/convenience)
198
+ for param_name in sig.parameters:
199
+ if param_name in kwargs:
200
+ continue
201
+ if param_name == 'ctx':
202
+ continue
203
+
204
+ if param_name in ctx.params:
205
+ kwargs[param_name] = ctx.params[param_name]
206
+ elif param_name in ctx.results:
207
+ kwargs[param_name] = ctx.results[param_name]
208
+
209
+ result = task.func(**kwargs)
210
+ ctx.set_result(task.name, result)
211
+
212
+ # Handle outputs saving
213
+ for target_path in task.outputs:
214
+ parts = target_path.split(".")
215
+ root_name = parts[0]
216
+ root_obj = None
217
+ if root_name == "scratch":
218
+ root_obj = ctx.scratch
219
+ elif root_name == "results":
220
+ root_obj = ctx.results
221
+ elif root_name == "params":
222
+ root_obj = ctx.params
223
+
224
+ if root_obj is not None:
225
+ current = root_obj
226
+ for i, part in enumerate(parts[1:-1]):
227
+ if part not in current:
228
+ current[part] = {}
229
+ current = current[part]
230
+ if not isinstance(current, dict):
231
+ break
232
+ else:
233
+ current[parts[-1]] = result
234
+
235
+ duration = (time.time() - start_time) * 1000
236
+ self.trace.on_node_end(task.name, duration)
237
+ return # Success
238
+
239
+ except Exception as e:
240
+ if retries_left > 0:
241
+ retries_left -= 1
242
+ # Log retry?
243
+ # self.trace.on_node_retry(task.name, e, retries_left) # If method exists
244
+ # For now just continue
245
+ time.sleep(0.1) # Small backoff?
246
+ continue
247
+ else:
248
+ self.trace.on_node_error(task.name, e)
249
+ raise e
pyoco/core/models.py ADDED
@@ -0,0 +1,116 @@
1
+ from typing import Any, Callable, Dict, List, Optional, Set, Union, ForwardRef
2
+ from dataclasses import dataclass, field
3
+
4
+ @dataclass
5
+ class Task:
6
+ func: Callable
7
+ name: str
8
+ dependencies: Set['Task'] = field(default_factory=set)
9
+ dependents: Set['Task'] = field(default_factory=set)
10
+ # Inputs configuration from flow.yaml
11
+ inputs: Dict[str, Any] = field(default_factory=dict)
12
+ outputs: List[str] = field(default_factory=list) # list of context paths to save result to
13
+ # For parallel execution grouping
14
+ parallel_group: Optional[str] = None
15
+
16
+ # Failure handling
17
+ fail_policy: str = "stop" # stop, isolate, retry
18
+ retries: int = 0
19
+ timeout_sec: Optional[float] = None
20
+
21
+ # Trigger policy
22
+ trigger_policy: str = "ALL" # ALL (AND-join), ANY (OR-join)
23
+
24
+ def __hash__(self):
25
+ return hash(self.name)
26
+
27
+ def __eq__(self, other):
28
+ if isinstance(other, Task):
29
+ return self.name == other.name
30
+ return False
31
+
32
+ def __repr__(self):
33
+ return f"<Task {self.name}>"
34
+
35
+ @dataclass
36
+ class Flow:
37
+ name: str = "main"
38
+ tasks: Set[Task] = field(default_factory=set)
39
+ _tail: Set[Task] = field(default_factory=set)
40
+
41
+ def __rshift__(self, other):
42
+ # Flow >> Task/List/Branch
43
+ new_tasks = []
44
+ is_branch = False
45
+
46
+ # Check if we are connecting FROM a Branch?
47
+ # No, Flow >> X means we are adding X to the flow, and connecting current tail to X.
48
+ # If X is a Branch (A | B), it means we add A and B, and tail connects to both.
49
+ # But wait, the OR logic applies when (A | B) >> C.
50
+ # Here we are just building the graph.
51
+ # If we do Flow >> (A | B) >> C
52
+ # 1. Flow >> (A | B) -> Adds A, B. Tail = {A, B}.
53
+ # 2. (Flow... which returns Flow) >> C -> Tail {A, B} >> C.
54
+ # Here C depends on A AND B by default.
55
+ # We need to detect if the tail came from a Branch?
56
+ # But Flow._tail is just a set of tasks.
57
+ # We need to know if those tasks were added as a Branch.
58
+ # This is tricky with the current Flow implementation which just tracks tail tasks.
59
+ # However, the syntax (A | B) returns a Branch object.
60
+ # If we do:
61
+ # branch = (A | B)
62
+ # branch >> C
63
+ # We need to handle this in Branch.__rshift__ or TaskWrapper.__rshift__?
64
+ # Wait, (A | B) returns a Branch (list subclass).
65
+ # Python's list doesn't have __rshift__.
66
+ # We need Branch to implement __rshift__.
67
+
68
+ if hasattr(other, 'task'): # TaskWrapper
69
+ new_tasks = [other.task]
70
+ elif isinstance(other, Task):
71
+ new_tasks = [other]
72
+ elif isinstance(other, (list, tuple)):
73
+ # Check if it's a Branch
74
+ from ..dsl.syntax import Branch # Import here to avoid circular import if possible, or move Branch to models?
75
+ # Actually Branch is defined in syntax.py, but Flow is in models.py.
76
+ # We can't import syntax in models easily.
77
+ # Maybe check class name?
78
+ if type(other).__name__ == "Branch":
79
+ is_branch = True
80
+
81
+ for item in other:
82
+ if hasattr(item, 'task'):
83
+ new_tasks.append(item.task)
84
+ elif isinstance(item, Task):
85
+ new_tasks.append(item)
86
+
87
+ # Add tasks and link from current tail
88
+ for t in new_tasks:
89
+ self.add_task(t)
90
+ for tail_task in self._tail:
91
+ tail_task.dependents.add(t)
92
+ t.dependencies.add(tail_task)
93
+
94
+ # If the tail was a Branch, does it mean t should be ANY?
95
+ # No, Flow tracks tail. If we want (A | B) >> C to mean C waits for ANY,
96
+ # we need to know that A and B are "OR-grouped".
97
+ # But Flow just sees A and B in tail.
98
+ # If we want to support this, we might need to change how we link.
99
+ # OR, we rely on the fact that the USER does:
100
+ # (A | B) >> C
101
+ # This calls Branch.__rshift__(C).
102
+ # So we need to implement Branch.__rshift__.
103
+ # Flow.__rshift__ is only used when we do `flow >> ...`.
104
+ # So `flow >> (A | B)` just adds A and B.
105
+ # Then `(A | B) >> C` is handled by Branch.
106
+ pass
107
+
108
+ # Update tail
109
+ if new_tasks:
110
+ self._tail = set(new_tasks)
111
+
112
+ return self
113
+
114
+ def add_task(self, task: Task):
115
+ self.tasks.add(task)
116
+
@@ -0,0 +1,134 @@
1
+ import importlib
2
+ import pkgutil
3
+ import sys
4
+ from typing import Dict, List, Any
5
+ from ..core.models import Task
6
+ from ..dsl.syntax import TaskWrapper
7
+
8
+ class TaskLoader:
9
+ def __init__(self, config: Any, strict: bool = False):
10
+ self.config = config
11
+ self.strict = strict
12
+ self.tasks: Dict[str, Task] = {}
13
+ self._explicit_tasks: Set[str] = set()
14
+
15
+ def load(self):
16
+ # Load explicitly defined tasks in config FIRST (Higher priority)
17
+ for task_name, task_conf in self.config.tasks.items():
18
+ if task_conf.callable:
19
+ self._load_explicit_task(task_name, task_conf)
20
+ self._explicit_tasks.add(task_name)
21
+
22
+ # Load from packages
23
+ for package in self.config.discovery.packages:
24
+ self._load_package(package)
25
+
26
+ # Load from entry points (simplified)
27
+ for ep in self.config.discovery.entry_points:
28
+ self._load_module(ep)
29
+
30
+ # Load from glob modules
31
+ for pattern in self.config.discovery.glob_modules:
32
+ self._load_glob_modules(pattern)
33
+
34
+ def _register_task(self, name: str, task: Task):
35
+ if name in self.tasks:
36
+ if name in self._explicit_tasks:
37
+ # Explicit wins, ignore implicit
38
+ return
39
+
40
+ # Collision between implicits
41
+ msg = f"Task '{name}' already defined."
42
+ if self.strict:
43
+ raise ValueError(f"{msg} (Strict mode enabled)")
44
+ else:
45
+ print(f"Warning: {msg} Overwriting.")
46
+
47
+ # Apply config overlay if exists
48
+ if name in self.config.tasks:
49
+ conf = self.config.tasks[name]
50
+ if not conf.callable:
51
+ if conf.inputs:
52
+ task.inputs.update(conf.inputs)
53
+ if conf.outputs:
54
+ task.outputs.extend(conf.outputs)
55
+
56
+ self.tasks[name] = task
57
+
58
+ def _load_package(self, package_name: str):
59
+ try:
60
+ pkg = importlib.import_module(package_name)
61
+ if hasattr(pkg, '__path__'):
62
+ for _, name, _ in pkgutil.iter_modules(pkg.__path__, pkg.__name__ + "."):
63
+ self._load_module(name)
64
+ else:
65
+ self._scan_module(pkg)
66
+ except ImportError as e:
67
+ print(f"Warning: Could not import package {package_name}: {e}")
68
+
69
+ def _load_module(self, module_name: str):
70
+ try:
71
+ mod = importlib.import_module(module_name)
72
+ self._scan_module(mod)
73
+ except ImportError as e:
74
+ print(f"Warning: Could not import module {module_name}: {e}")
75
+
76
+ def _load_glob_modules(self, pattern: str):
77
+ import glob
78
+ import os
79
+
80
+ # Pattern is likely a file path glob, e.g. "jobs/*.py"
81
+ # We need to convert file paths to module paths
82
+ files = glob.glob(pattern, recursive=True)
83
+ for file_path in files:
84
+ if not file_path.endswith(".py"):
85
+ continue
86
+
87
+ # Convert path to module
88
+ # This is tricky without knowing the root.
89
+ # Assumption: running from root, and file path is relative to root.
90
+ # e.g. "myproject/tasks/foo.py" -> "myproject.tasks.foo"
91
+
92
+ rel_path = os.path.relpath(file_path)
93
+ if rel_path.startswith(".."):
94
+ # Out of tree, skip or warn
95
+ continue
96
+
97
+ module_name = rel_path.replace(os.sep, ".")[:-3] # strip .py
98
+ self._load_module(module_name)
99
+
100
+ def _scan_module(self, module: Any):
101
+ for name, obj in vars(module).items():
102
+ if isinstance(obj, TaskWrapper):
103
+ self._register_task(name, obj.task)
104
+ elif isinstance(obj, Task):
105
+ self._register_task(name, obj)
106
+ elif callable(obj) and getattr(obj, '__pyoco_task__', False):
107
+ # Convert to Task if not already
108
+ pass
109
+
110
+ def _load_explicit_task(self, name: str, conf: Any):
111
+ # Load callable
112
+ module_path, func_name = conf.callable.split(':')
113
+ try:
114
+ mod = importlib.import_module(module_path)
115
+ obj = getattr(mod, func_name)
116
+
117
+ # Unwrap if it's a TaskWrapper or Task
118
+ real_func = obj
119
+ if isinstance(obj, TaskWrapper):
120
+ real_func = obj.task.func
121
+ elif isinstance(obj, Task):
122
+ real_func = obj.func
123
+
124
+ # Create a Task wrapper
125
+ t = Task(func=real_func, name=name)
126
+ t.inputs = conf.inputs
127
+ t.inputs = conf.inputs
128
+ t.outputs = conf.outputs
129
+ self.tasks[name] = t
130
+ except (ImportError, AttributeError) as e:
131
+ print(f"Error loading task {name}: {e}")
132
+
133
+ def get_task(self, name: str) -> Task:
134
+ return self.tasks.get(name)
pyoco/dsl/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .syntax import task
2
+
3
+ __all__ = ["task"]
pyoco/dsl/syntax.py ADDED
@@ -0,0 +1,122 @@
1
+ from typing import Callable, Union, List, Tuple
2
+ from ..core.models import Task, Flow
3
+ from ..core import engine
4
+
5
+ # Global context
6
+ _active_flow: Flow = None
7
+
8
+ def task(func: Callable) -> Task:
9
+ t = Task(func=func, name=func.__name__)
10
+ return t
11
+
12
+ class Branch(list):
13
+ """Represents a branch of tasks (OR-split/join logic placeholder)."""
14
+ def __rshift__(self, other):
15
+ # (A | B) >> C
16
+ # C depends on A and B.
17
+ # AND C.trigger_policy = "ANY"
18
+
19
+ targets = []
20
+ if hasattr(other, 'task'):
21
+ targets = [other.task]
22
+ elif isinstance(other, (list, tuple)):
23
+ for item in other:
24
+ if hasattr(item, 'task'):
25
+ targets.append(item.task)
26
+
27
+ for target in targets:
28
+ target.trigger_policy = "ANY"
29
+ for source in self:
30
+ if hasattr(source, 'task'):
31
+ target.dependencies.add(source.task)
32
+ source.task.dependents.add(target)
33
+
34
+ return other
35
+
36
+ class Parallel(list):
37
+ """Represents a parallel group of tasks (AND-split/join)."""
38
+ def __rshift__(self, other):
39
+ # (A & B) >> C
40
+ # C depends on A AND B.
41
+
42
+ targets = []
43
+ if hasattr(other, 'task'):
44
+ targets = [other.task]
45
+ elif isinstance(other, (list, tuple)):
46
+ for item in other:
47
+ if hasattr(item, 'task'):
48
+ targets.append(item.task)
49
+
50
+ for target in targets:
51
+ for source in self:
52
+ if hasattr(source, 'task'):
53
+ target.dependencies.add(source.task)
54
+ source.task.dependents.add(target)
55
+
56
+ return other
57
+
58
+ class TaskWrapper:
59
+ """
60
+ Wraps a Task to handle DSL operators and registration.
61
+ """
62
+ def __init__(self, task: Task):
63
+ self.task = task
64
+
65
+ def __call__(self, *args, **kwargs):
66
+ # In this new spec, calling a task might not be strictly necessary for registration
67
+ # if we assume tasks are added to flow explicitly or via >>
68
+ # But let's keep the pattern: calling it returns a wrapper that can be chained
69
+ # We might need to store args/kwargs if we want to support them
70
+ return self
71
+
72
+ def __rshift__(self, other):
73
+ # self >> other
74
+ if isinstance(other, TaskWrapper):
75
+ other.task.dependencies.add(self.task)
76
+ self.task.dependents.add(other.task)
77
+ return other
78
+ elif isinstance(other, (list, tuple)):
79
+ # self >> (A & B) or self >> (A | B)
80
+ # If it's a Branch (from |), does it imply something different?
81
+ # Spec says: "Update Flow to handle Branch >> Task (set trigger_policy=ANY)"
82
+ # But here we are doing Task >> Branch.
83
+ # Task >> (A | B) means Task triggers both A and B?
84
+ # Usually >> means "follows".
85
+ # A >> (B | C) -> A triggers B and C?
86
+ # Or does it mean B and C depend on A? Yes.
87
+ # The difference between & and | is usually how they JOIN later, or how they are triggered?
88
+ # In Airflow, >> [A, B] means A and B depend on upstream.
89
+ # If we have (A | B) >> C, then C depends on A OR B.
90
+ # So if 'other' is a Branch, we just add dependencies as usual.
91
+ # The "OR" logic is relevant when 'other' connects to downstream.
92
+
93
+ for item in other:
94
+ if isinstance(item, TaskWrapper):
95
+ item.task.dependencies.add(self.task)
96
+ self.task.dependents.add(item.task)
97
+ return other
98
+ return other
99
+
100
+ def __and__(self, other):
101
+ # self & other (Parallel)
102
+ return Parallel([self, other])
103
+
104
+ def __or__(self, other):
105
+ # self | other (Branch)
106
+ # Return a Branch object containing both
107
+ return Branch([self, other])
108
+
109
+ # We need to adapt the DSL to match the spec:
110
+ # @task
111
+ # def A(ctx, x:int)->int: ...
112
+ # flow = Flow() >> A >> (B & C)
113
+
114
+ # So A, B, C must be usable in the expression.
115
+ # The @task decorator should return something that supports >>, &, |
116
+
117
+ def task_decorator(func: Callable):
118
+ t = Task(func=func, name=func.__name__)
119
+ return TaskWrapper(t)
120
+
121
+ # Re-export as task
122
+ task = task_decorator
@@ -0,0 +1,57 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Dict, Any, Optional
3
+ import yaml
4
+
5
+ @dataclass
6
+ class TaskConfig:
7
+ callable: Optional[str] = None
8
+ inputs: Dict[str, Any] = field(default_factory=dict)
9
+ outputs: List[str] = field(default_factory=list)
10
+
11
+ @dataclass
12
+ class FlowConfig:
13
+ graph: str
14
+ defaults: Dict[str, Any] = field(default_factory=dict)
15
+
16
+ @dataclass
17
+ class DiscoveryConfig:
18
+ entry_points: List[str] = field(default_factory=list)
19
+ packages: List[str] = field(default_factory=list)
20
+ glob_modules: List[str] = field(default_factory=list)
21
+
22
+ @dataclass
23
+ class RuntimeConfig:
24
+ expose_env: List[str] = field(default_factory=list)
25
+
26
+ @dataclass
27
+ class PyocoConfig:
28
+ version: int
29
+ flows: Dict[str, FlowConfig]
30
+ tasks: Dict[str, TaskConfig]
31
+ discovery: DiscoveryConfig = field(default_factory=DiscoveryConfig)
32
+ runtime: RuntimeConfig = field(default_factory=RuntimeConfig)
33
+
34
+ @classmethod
35
+ def from_yaml(cls, path: str) -> 'PyocoConfig':
36
+ with open(path, 'r') as f:
37
+ data = yaml.safe_load(f)
38
+
39
+ # Simple manual parsing/validation for MVP
40
+ # In a real app, use pydantic or similar
41
+
42
+ flows = {k: FlowConfig(**v) for k, v in data.get('flows', {}).items()}
43
+ tasks = {k: TaskConfig(**v) for k, v in data.get('tasks', {}).items()}
44
+
45
+ disc_data = data.get('discovery', {})
46
+ discovery = DiscoveryConfig(**disc_data)
47
+
48
+ run_data = data.get('runtime', {})
49
+ runtime = RuntimeConfig(**run_data)
50
+
51
+ return cls(
52
+ version=data.get('version', 1),
53
+ flows=flows,
54
+ tasks=tasks,
55
+ discovery=discovery,
56
+ runtime=runtime
57
+ )
pyoco/trace/backend.py ADDED
@@ -0,0 +1,26 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Optional
3
+
4
+ class TraceBackend(ABC):
5
+ @abstractmethod
6
+ def on_flow_start(self, flow_name: str):
7
+ pass
8
+
9
+ @abstractmethod
10
+ def on_flow_end(self, flow_name: str):
11
+ pass
12
+
13
+ @abstractmethod
14
+ def on_node_start(self, node_name: str):
15
+ pass
16
+
17
+ @abstractmethod
18
+ def on_node_end(self, node_name: str, duration_ms: float):
19
+ pass
20
+
21
+ @abstractmethod
22
+ def on_node_error(self, node_name: str, error: Exception):
23
+ pass
24
+
25
+ def on_node_transition(self, source: str, target: str):
26
+ pass
pyoco/trace/console.py ADDED
@@ -0,0 +1,40 @@
1
+ import time
2
+ from .backend import TraceBackend
3
+
4
+ class ConsoleTraceBackend(TraceBackend):
5
+ def __init__(self, style: str = "cute"):
6
+ self.style = style
7
+
8
+ def on_flow_start(self, flow_name: str):
9
+ if self.style == "cute":
10
+ print(f"🐇 pyoco > start flow={flow_name}")
11
+ else:
12
+ print(f"INFO pyoco start flow={flow_name}")
13
+
14
+ def on_flow_end(self, flow_name: str):
15
+ if self.style == "cute":
16
+ print(f"🥕 done flow={flow_name}")
17
+ else:
18
+ print(f"INFO pyoco end flow={flow_name}")
19
+
20
+ def on_node_start(self, node_name: str):
21
+ if self.style == "cute":
22
+ print(f"🏃 start node={node_name}")
23
+ else:
24
+ print(f"INFO pyoco start node={node_name}")
25
+
26
+ def on_node_end(self, node_name: str, duration_ms: float):
27
+ if self.style == "cute":
28
+ print(f"✅ done node={node_name} ({duration_ms:.2f} ms)")
29
+ else:
30
+ print(f"INFO pyoco end node={node_name} dur_ms={duration_ms:.2f}")
31
+
32
+ def on_node_error(self, node_name: str, error: Exception):
33
+ if self.style == "cute":
34
+ print(f"💥 error node={node_name} {error}")
35
+ else:
36
+ print(f"ERROR pyoco error node={node_name} {error}")
37
+
38
+ def on_node_transition(self, source: str, target: str):
39
+ if self.style == "cute":
40
+ print(f"🐇 {source} -> {target}")
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyoco
3
+ Version: 0.1.0
4
+ Summary: A workflow engine with sugar syntax
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pyyaml>=6.0.3
@@ -0,0 +1,17 @@
1
+ pyoco/__init__.py,sha256=E2pgDGvGRSVon7dSqIM4UD55LgVpf4jiZZA-70kOcuw,409
2
+ pyoco/cli/entry.py,sha256=zPIG0Gx-cFO8Cf1Z3wD3Ifz_2sHaryHZ6mCRri2WEqE,93
3
+ pyoco/cli/main.py,sha256=uRc6CzUTVRYF4JbehlbrprT7GvWQ-WyBZ8k12NrSxO8,6502
4
+ pyoco/core/base_task.py,sha256=z7hOFntAPv4yCADapS-fhtLe5eWqaO8k3T1r05YEEUE,2106
5
+ pyoco/core/context.py,sha256=SnoTz3vRghO1A-FNOrw2NEjbx1HySDqrBnQU5-KWGbk,3696
6
+ pyoco/core/engine.py,sha256=m5LrEsXcpUAran5DxULtWbvhsMNj5mv17wE6lDFkFmQ,11416
7
+ pyoco/core/models.py,sha256=zTt5HTSBChwRpOuw3qY2pvjRGZVsq4OQ-ZBHE3ujMWA,4548
8
+ pyoco/discovery/loader.py,sha256=XzZzOAyFYrdA8K6APuEGWgjSIyp4Bgwlr834MyJc8vk,4950
9
+ pyoco/dsl/__init__.py,sha256=xWdb60pSRL8lNFk4GHF3EJ4hon0uiWqpv264g6-4gdg,45
10
+ pyoco/dsl/syntax.py,sha256=AkFcD5gLlbJLFN0KkMIyttpHUV3v21pjz_ZqwreZkdM,4312
11
+ pyoco/schemas/config.py,sha256=KkGZK3GxTHoIHEGb4f4k8GE2W-aBN4iPzmc_HrwuROU,1735
12
+ pyoco/trace/backend.py,sha256=h7l1PU8zuCSOo_VA5T1ax4znN_Az3Xuvx-KXibg3e-U,597
13
+ pyoco/trace/console.py,sha256=Kf2-vma98ojhVQZHFzCUYfD_46Lr1WfAfI56smZkSZM,1397
14
+ pyoco-0.1.0.dist-info/METADATA,sha256=bA_qJXUkIiC7TIOSo8CEzJ6PXp01pLQ1Q1LoMOrIw_k,187
15
+ pyoco-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ pyoco-0.1.0.dist-info/top_level.txt,sha256=2JRVocfaWRbX1VJ3zq1c5wQaOK6fMARS6ptVFWyvRF4,6
17
+ pyoco-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pyoco