pyoco 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyoco/core/engine.py CHANGED
@@ -1,180 +1,275 @@
1
1
  import time
2
- from typing import Dict, Any, List, Set
3
- from .models import Flow, Task
2
+ from typing import Dict, Any, List, Set, Optional
3
+ from .models import Flow, Task, RunContext, TaskState, RunStatus
4
4
  from .context import Context
5
5
  from ..trace.backend import TraceBackend
6
6
  from ..trace.console import ConsoleTraceBackend
7
7
 
8
8
  class Engine:
9
+ """
10
+ The core execution engine for Pyoco flows.
11
+
12
+ Responsible for:
13
+ - Resolving task dependencies
14
+ - Managing parallel execution (using ThreadPoolExecutor)
15
+ - Handling input injection and artifact storage
16
+ - Delegating logging to the TraceBackend
17
+
18
+ Intentionally keeps scheduling logic simple (no distributed queue, no external DB).
19
+ """
9
20
  def __init__(self, trace_backend: TraceBackend = None):
10
21
  self.trace = trace_backend or ConsoleTraceBackend()
22
+ # Track active runs: run_id -> RunContext
23
+ from .models import RunContext
24
+ self.active_runs: Dict[str, RunContext] = {}
11
25
 
12
- def run(self, flow: Flow, params: Dict[str, Any] = None) -> Context:
13
- ctx = Context(params=params or {})
14
- self.trace.on_flow_start(flow.name)
26
+ def get_run(self, run_id: str) -> Any:
27
+ # Return RunContext if active, else None (for now)
28
+ return self.active_runs.get(run_id)
29
+
30
+ def cancel(self, run_id: str):
31
+ """
32
+ Cancel an active run.
33
+ """
34
+ from .models import RunStatus
35
+ run_ctx = self.active_runs.get(run_id)
36
+ if run_ctx:
37
+ if run_ctx.status == RunStatus.RUNNING:
38
+ run_ctx.status = RunStatus.CANCELLING
39
+ # We don't force kill threads here, the loop will handle it.
40
+
41
+ def run(self, flow: Flow, params: Dict[str, Any] = None, run_context: Optional[RunContext] = None) -> Context:
42
+ # Initialize RunContext (v0.2.0)
43
+ if run_context is None:
44
+ run_context = RunContext()
45
+
46
+ run_ctx = run_context
15
47
 
16
- executed: Set[Task] = set()
17
- running: Set[Any] = set() # Set of Futures
48
+ # Initialize all tasks as PENDING
49
+ for task in flow.tasks:
50
+ run_ctx.tasks[task.name] = TaskState.PENDING
51
+
52
+ ctx = Context(params=params or {}, run_context=run_ctx)
53
+ self.trace.on_flow_start(flow.name, run_id=run_ctx.run_id)
18
54
 
19
- import concurrent.futures
55
+ # Register active run
56
+ self.active_runs[run_ctx.run_id] = run_ctx
20
57
 
21
- # Use ThreadPoolExecutor for parallel execution
22
- # Max workers could be configurable, default to something reasonable
23
- with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
24
- future_to_task = {}
25
- task_deadlines: Dict[Task, float] = {}
58
+ try:
59
+ executed: Set[Task] = set()
60
+ running: Set[Any] = set() # Set of Futures
26
61
 
27
- failed: Set[Task] = set()
62
+ import concurrent.futures
63
+
64
+ # Use ThreadPoolExecutor for parallel execution
65
+ # Max workers could be configurable, default to something reasonable
66
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
67
+ future_to_task = {}
68
+ task_deadlines: Dict[Task, float] = {}
69
+
70
+ failed: Set[Task] = set()
28
71
 
29
- while len(executed) + len(failed) < len(flow.tasks):
30
- # Identify runnable tasks
31
- runnable = []
32
- for task in flow.tasks:
33
- if task in executed or task in failed or task in [future_to_task[f] for f in running]:
34
- continue
35
-
36
- # Check dependencies
37
- deps_met = True
38
-
39
- if task.trigger_policy == "ANY":
40
- # OR-join: Run if ANY dependency is executed (and successful)
41
- # But what if all failed? Then we can't run.
42
- # If at least one succeeded, we run.
43
- # If none succeeded yet, we wait.
44
- # If all failed, we fail (or skip).
72
+ while len(executed) + len(failed) < len(flow.tasks):
73
+ # Check for cancellation
74
+ if run_ctx.status in [RunStatus.CANCELLING, RunStatus.CANCELLED]:
75
+ # Stop submitting new tasks
76
+ # Mark all PENDING tasks as CANCELLED
77
+ for t_name, t_state in run_ctx.tasks.items():
78
+ if t_state == TaskState.PENDING:
79
+ run_ctx.tasks[t_name] = TaskState.CANCELLED
45
80
 
46
- any_success = False
47
- all_failed = True
48
-
49
- if not task.dependencies:
50
- # No deps = ready
51
- any_success = True
52
- all_failed = False
53
- else:
54
- for dep in task.dependencies:
55
- if dep in executed:
81
+ # If no running tasks, we are done
82
+ if not running:
83
+ run_ctx.status = RunStatus.CANCELLED
84
+ break
85
+ # Else continue loop to wait for running tasks (graceful shutdown)
86
+ # We still need to wait, so we fall through to the wait logic,
87
+ # but 'runnable' will be empty because we won't add anything.
88
+
89
+ # Identify runnable tasks
90
+ runnable = []
91
+ if run_ctx.status == RunStatus.RUNNING:
92
+ for task in flow.tasks:
93
+ if task in executed or task in failed or task in [future_to_task[f] for f in running]:
94
+ continue
95
+
96
+ # Check dependencies
97
+ deps_met = True
98
+
99
+ if task.trigger_policy == "ANY":
100
+ # OR-join: Run if ANY dependency is executed (and successful)
101
+ # But what if all failed? Then we can't run.
102
+ # If at least one succeeded, we run.
103
+ # If none succeeded yet, we wait.
104
+ # If all failed, we fail (or skip).
105
+
106
+ any_success = False
107
+ all_failed = True
108
+
109
+ if not task.dependencies:
110
+ # No deps = ready
56
111
  any_success = True
57
112
  all_failed = False
58
- break # Found one success
59
- if dep not in failed:
60
- all_failed = False # At least one is still running/pending
61
-
62
- if any_success:
63
- deps_met = True
64
- elif all_failed:
65
- # All deps failed, so we fail/skip
66
- failed.add(task)
67
- deps_met = False
68
- # Continue to next task loop to avoid adding to runnable
69
- continue
70
- else:
71
- # Still waiting
72
- deps_met = False
73
-
74
- else:
75
- # ALL (AND-join) - Default
76
- for dep in task.dependencies:
77
- if dep in failed:
78
- # Dependency failed
79
- if task.fail_policy == "isolate" or dep.fail_policy == "isolate":
113
+ else:
114
+ for dep in task.dependencies:
115
+ if dep in executed:
116
+ any_success = True
117
+ all_failed = False
118
+ break # Found one success
119
+ if dep not in failed:
120
+ all_failed = False # At least one is still running/pending
121
+
122
+ if any_success:
123
+ deps_met = True
124
+ elif all_failed:
125
+ # All deps failed, so we fail/skip
80
126
  failed.add(task)
127
+ run_ctx.tasks[task.name] = TaskState.FAILED
81
128
  deps_met = False
82
- break
129
+ # Continue to next task loop to avoid adding to runnable
130
+ continue
83
131
  else:
84
- pass # fail=stop handled elsewhere
132
+ # Still waiting
133
+ deps_met = False
134
+
135
+ else:
136
+ # ALL (AND-join) - Default
137
+ for dep in task.dependencies:
138
+ if dep in failed:
139
+ # Dependency failed
140
+ if task.fail_policy == "isolate" or dep.fail_policy == "isolate":
141
+ failed.add(task)
142
+ run_ctx.tasks[task.name] = TaskState.FAILED # Mark as FAILED (or SKIPPED if we had it)
143
+ deps_met = False
144
+ break
145
+ else:
146
+ pass # fail=stop handled elsewhere
147
+
148
+ if dep not in executed:
149
+ deps_met = False
150
+ break
85
151
 
86
- if dep not in executed:
87
- deps_met = False
88
- break
152
+ if deps_met and task not in failed:
153
+ runnable.append(task)
89
154
 
90
- if deps_met and task not in failed:
91
- runnable.append(task)
92
-
93
- # If no runnable tasks and no running tasks, we are stuck
94
- # But if we have failed tasks, maybe that's why?
95
- if not runnable and not running:
96
- if len(executed) + len(failed) == len(flow.tasks):
97
- # All done (some failed)
98
- break
99
- raise RuntimeError("Deadlock or cycle detected in workflow")
100
-
101
- # Submit runnable tasks
102
- for task in runnable:
103
- future = executor.submit(self._execute_task, task, ctx)
104
- running.add(future)
105
- future_to_task[future] = task
106
- # Record start time for timeout tracking
107
- # We need to track start times or deadlines.
108
- # Let's store deadline in a separate dict or attach to task?
109
- # Task is immutable-ish (dataclass).
110
- # Let's use a dict.
111
- if task.timeout_sec:
112
- task_deadlines[task] = time.time() + task.timeout_sec
155
+ # If no runnable tasks and no running tasks, we are stuck
156
+ # But if we have failed tasks, maybe that's why?
157
+ if not runnable and not running:
158
+ if len(executed) + len(failed) == len(flow.tasks):
159
+ # All done (some failed)
160
+ break
161
+
162
+ run_ctx.status = RunStatus.FAILED
163
+ run_ctx.end_time = time.time()
164
+ raise RuntimeError("Deadlock or cycle detected in workflow")
165
+
166
+ # Submit runnable tasks
167
+ for task in runnable:
168
+ future = executor.submit(self._execute_task, task, ctx)
169
+ running.add(future)
170
+ future_to_task[future] = task
171
+ # Record start time for timeout tracking
172
+ if task.timeout_sec:
173
+ task_deadlines[task] = time.time() + task.timeout_sec
113
174
 
114
- # Calculate wait timeout
115
- wait_timeout = None
116
- if task_deadlines:
117
- now = time.time()
118
- min_deadline = min(task_deadlines.values())
119
- wait_timeout = max(0, min_deadline - now)
120
-
121
- # Wait for at least one task to complete or timeout
122
- if running:
123
- done, _ = concurrent.futures.wait(
124
- running,
125
- timeout=wait_timeout,
126
- return_when=concurrent.futures.FIRST_COMPLETED
127
- )
175
+ # Calculate wait timeout
176
+ wait_timeout = None
177
+ if task_deadlines:
178
+ now = time.time()
179
+ min_deadline = min(task_deadlines.values())
180
+ wait_timeout = max(0, min_deadline - now)
128
181
 
129
- # Check for timeouts first
130
- now = time.time()
131
- timed_out_tasks = []
132
- for task, deadline in list(task_deadlines.items()):
133
- if now >= deadline:
134
- # Task timed out
135
- # Find the future for this task
136
- # This is inefficient, but running set is small
137
- found_future = None
138
- for f, t in future_to_task.items():
139
- if t == task and f in running:
140
- found_future = f
141
- break
142
-
143
- if found_future:
144
- timed_out_tasks.append(found_future)
145
- # Remove from tracking
146
- running.remove(found_future)
147
- del task_deadlines[task]
182
+ # Wait for at least one task to complete or timeout
183
+ if running:
184
+ done, _ = concurrent.futures.wait(
185
+ running,
186
+ timeout=wait_timeout,
187
+ return_when=concurrent.futures.FIRST_COMPLETED
188
+ )
189
+
190
+ # Check for timeouts first
191
+ now = time.time()
192
+ timed_out_tasks = []
193
+ for task, deadline in list(task_deadlines.items()):
194
+ if now >= deadline:
195
+ # Task timed out
196
+ # Find the future for this task
197
+ found_future = None
198
+ for f, t in future_to_task.items():
199
+ if t == task and f in running:
200
+ found_future = f
201
+ break
148
202
 
149
- # Handle failure
150
- if task.fail_policy == "isolate":
151
- failed.add(task)
152
- self.trace.on_node_error(task.name, TimeoutError(f"Task exceeded timeout of {task.timeout_sec}s"))
153
- else:
154
- raise TimeoutError(f"Task '{task.name}' exceeded timeout of {task.timeout_sec}s")
203
+ if found_future:
204
+ timed_out_tasks.append(found_future)
205
+ # Remove from tracking
206
+ running.remove(found_future)
207
+ del task_deadlines[task]
208
+
209
+ # Handle failure
210
+ if task.fail_policy == "isolate":
211
+ failed.add(task)
212
+ run_ctx.tasks[task.name] = TaskState.FAILED
213
+ self.trace.on_node_error(task.name, TimeoutError(f"Task exceeded timeout of {task.timeout_sec}s"))
214
+ else:
215
+ run_ctx.status = RunStatus.FAILED
216
+ run_ctx.end_time = time.time()
217
+ raise TimeoutError(f"Task '{task.name}' exceeded timeout of {task.timeout_sec}s")
155
218
 
156
- for future in done:
157
- if future in running: # Might have been removed by timeout check above (unlikely if wait returned due to completion, but possible race)
158
- running.remove(future)
159
- task = future_to_task[future]
160
- if task in task_deadlines:
161
- del task_deadlines[task]
162
-
163
- try:
164
- future.result() # Re-raise exception if any
165
- executed.add(task)
166
- except Exception as e:
167
- if task.fail_policy == "isolate":
168
- failed.add(task)
169
- self.trace.on_node_error(task.name, e) # Log it
170
- else:
171
- # fail=stop (default)
172
- raise e
219
+ for future in done:
220
+ if future in running: # Might have been removed by timeout check above
221
+ running.remove(future)
222
+ task = future_to_task[future]
223
+ if task in task_deadlines:
224
+ del task_deadlines[task]
225
+
226
+ try:
227
+ future.result() # Re-raise exception if any
228
+ executed.add(task)
229
+ except Exception as e:
230
+ if task.fail_policy == "isolate":
231
+ failed.add(task)
232
+ # TaskState update is handled in _execute_task on exception?
233
+ # No, _execute_task raises. So we need to update here if it failed.
234
+ # Actually _execute_task updates to FAILED before raising?
235
+ # Let's check _execute_task implementation below.
236
+ # If _execute_task raises, we catch it here.
237
+ # We should ensure FAILED state.
238
+ run_ctx.tasks[task.name] = TaskState.FAILED
239
+ self.trace.on_node_error(task.name, e) # Log it
240
+ else:
241
+ # fail=stop (default)
242
+ run_ctx.status = RunStatus.FAILED
243
+ run_ctx.end_time = time.time()
244
+ raise e
245
+
246
+ finally:
247
+ # Cleanup active run
248
+ if run_ctx.run_id in self.active_runs:
249
+ del self.active_runs[run_ctx.run_id]
173
250
 
174
251
  self.trace.on_flow_end(flow.name)
252
+
253
+ # Update final run status
254
+ if run_ctx.status == RunStatus.RUNNING:
255
+ if failed:
256
+ # Some tasks failed but were isolated
257
+ # Should run be COMPLETED or FAILED?
258
+ # Usually if flow finished (even with partial failures), it's COMPLETED (or PARTIAL_SUCCESS?)
259
+ # For now let's say COMPLETED if it didn't crash.
260
+ run_ctx.status = RunStatus.COMPLETED # Or maybe FAILED if strict?
261
+ else:
262
+ run_ctx.status = RunStatus.COMPLETED
263
+
264
+ run_ctx.end_time = time.time()
175
265
  return ctx
176
266
 
177
267
  def _execute_task(self, task: Task, ctx: Context):
268
+ # Update state to RUNNING
269
+ from .models import TaskState
270
+ if ctx.run_context:
271
+ ctx.run_context.tasks[task.name] = TaskState.RUNNING
272
+
178
273
  self.trace.on_node_start(task.name)
179
274
  start_time = time.time()
180
275
  # Retry loop
@@ -234,6 +329,11 @@ class Engine:
234
329
 
235
330
  duration = (time.time() - start_time) * 1000
236
331
  self.trace.on_node_end(task.name, duration)
332
+
333
+ # Update state to SUCCEEDED
334
+ if ctx.run_context:
335
+ ctx.run_context.tasks[task.name] = TaskState.SUCCEEDED
336
+
237
337
  return # Success
238
338
 
239
339
  except Exception as e:
@@ -246,4 +346,7 @@ class Engine:
246
346
  continue
247
347
  else:
248
348
  self.trace.on_node_error(task.name, e)
349
+ # Update state to FAILED
350
+ if ctx.run_context:
351
+ ctx.run_context.tasks[task.name] = TaskState.FAILED
249
352
  raise e
pyoco/core/models.py CHANGED
@@ -1,8 +1,17 @@
1
1
  from typing import Any, Callable, Dict, List, Optional, Set, Union, ForwardRef
2
2
  from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ import time
5
+ import uuid
3
6
 
4
7
  @dataclass
5
8
  class Task:
9
+ """
10
+ Represents a single unit of work in the workflow.
11
+
12
+ Designed to be lightweight and serializable.
13
+ Contains metadata about the task, its dependencies, and execution policies.
14
+ """
6
15
  func: Callable
7
16
  name: str
8
17
  dependencies: Set['Task'] = field(default_factory=set)
@@ -32,8 +41,40 @@ class Task:
32
41
  def __repr__(self):
33
42
  return f"<Task {self.name}>"
34
43
 
44
+ class TaskState(Enum):
45
+ PENDING = "PENDING"
46
+ RUNNING = "RUNNING"
47
+ SUCCEEDED = "SUCCEEDED"
48
+ FAILED = "FAILED"
49
+ CANCELLED = "CANCELLED"
50
+
51
+ class RunStatus(Enum):
52
+ PENDING = "PENDING"
53
+ RUNNING = "RUNNING"
54
+ COMPLETED = "COMPLETED"
55
+ FAILED = "FAILED"
56
+ CANCELLING = "CANCELLING"
57
+ CANCELLED = "CANCELLED"
58
+
59
+ @dataclass
60
+ class RunContext:
61
+ """
62
+ Holds the state of a single workflow execution.
63
+ """
64
+ run_id: str = field(default_factory=lambda: str(uuid.uuid4()))
65
+ status: RunStatus = RunStatus.RUNNING
66
+ tasks: Dict[str, TaskState] = field(default_factory=dict)
67
+ start_time: float = field(default_factory=time.time)
68
+ end_time: Optional[float] = None
69
+
35
70
  @dataclass
36
71
  class Flow:
72
+ """
73
+ Represents a Directed Acyclic Graph (DAG) of tasks.
74
+
75
+ Manages the collection of tasks and their dependencies.
76
+ Optimized for single-machine execution without complex scheduling overhead.
77
+ """
37
78
  name: str = "main"
38
79
  tasks: Set[Task] = field(default_factory=set)
39
80
  _tail: Set[Task] = field(default_factory=set)
pyoco/discovery/loader.py CHANGED
@@ -45,7 +45,7 @@ class TaskLoader:
45
45
  print(f"Warning: {msg} Overwriting.")
46
46
 
47
47
  # Apply config overlay if exists
48
- if name in self.config.tasks:
48
+ if self.config and name in self.config.tasks:
49
49
  conf = self.config.tasks[name]
50
50
  if not conf.callable:
51
51
  if conf.inputs:
@@ -124,7 +124,6 @@ class TaskLoader:
124
124
  # Create a Task wrapper
125
125
  t = Task(func=real_func, name=name)
126
126
  t.inputs = conf.inputs
127
- t.inputs = conf.inputs
128
127
  t.outputs = conf.outputs
129
128
  self.tasks[name] = t
130
129
  except (ImportError, AttributeError) as e:
File without changes
pyoco/server/api.py ADDED
@@ -0,0 +1,71 @@
1
+ from fastapi import FastAPI, HTTPException
2
+ from typing import List, Optional
3
+ from .store import StateStore
4
+ from .models import (
5
+ RunSubmitRequest, RunResponse,
6
+ WorkerPollRequest, WorkerPollResponse,
7
+ WorkerHeartbeatRequest, WorkerHeartbeatResponse
8
+ )
9
+ from ..core.models import RunContext, RunStatus
10
+
11
+ app = FastAPI(title="Pyoco Kanban Server")
12
+ store = StateStore()
13
+
14
+ @app.post("/runs", response_model=RunResponse)
15
+ def submit_run(req: RunSubmitRequest):
16
+ run_ctx = store.create_run(req.flow_name, req.params)
17
+ return RunResponse(run_id=run_ctx.run_id, status=run_ctx.status)
18
+
19
+ @app.get("/runs", response_model=List[RunContext])
20
+ def list_runs(status: Optional[RunStatus] = None):
21
+ runs = store.list_runs()
22
+ if status:
23
+ runs = [r for r in runs if r.status == status]
24
+ return runs
25
+
26
+ @app.get("/runs/{run_id}", response_model=RunContext)
27
+ def get_run(run_id: str):
28
+ run = store.get_run(run_id)
29
+ if not run:
30
+ raise HTTPException(status_code=404, detail="Run not found")
31
+ return run
32
+
33
+ @app.post("/runs/{run_id}/cancel")
34
+ def cancel_run(run_id: str):
35
+ run = store.get_run(run_id)
36
+ if not run:
37
+ raise HTTPException(status_code=404, detail="Run not found")
38
+ store.cancel_run(run_id)
39
+ return {"status": "CANCELLING"}
40
+
41
+ @app.post("/workers/poll", response_model=WorkerPollResponse)
42
+ def poll_work(req: WorkerPollRequest):
43
+ # In v0.3.0, we ignore worker_id and tags for simplicity
44
+ run = store.dequeue()
45
+ if run:
46
+ # Mark as RUNNING? Or wait for worker to say so?
47
+ # Ideally, worker should confirm start.
48
+ # But for now, let's assume dequeue means "assigned".
49
+ # We update status to RUNNING when worker sends first heartbeat?
50
+ # Or here? Let's do it here to prevent re-queueing if logic was complex.
51
+ # But store.dequeue removes from queue.
52
+ # Status is still PENDING until worker starts.
53
+ return WorkerPollResponse(
54
+ run_id=run.run_id,
55
+ flow_name=run.flow_name,
56
+ params=run.params
57
+ )
58
+ return WorkerPollResponse()
59
+
60
+ @app.post("/runs/{run_id}/heartbeat", response_model=WorkerHeartbeatResponse)
61
+ def heartbeat(run_id: str, req: WorkerHeartbeatRequest):
62
+ run = store.get_run(run_id)
63
+ if not run:
64
+ raise HTTPException(status_code=404, detail="Run not found")
65
+
66
+ store.update_run(run_id, status=req.run_status, task_states=req.task_states)
67
+
68
+ # Check if cancellation was requested
69
+ cancel_requested = (run.status == RunStatus.CANCELLING)
70
+
71
+ return WorkerHeartbeatResponse(cancel_requested=cancel_requested)
pyoco/server/models.py ADDED
@@ -0,0 +1,28 @@
1
+ from typing import Dict, List, Optional, Any
2
+ from pydantic import BaseModel
3
+ from ..core.models import RunStatus, TaskState
4
+
5
+ class RunSubmitRequest(BaseModel):
6
+ flow_name: str
7
+ params: Dict[str, Any] = {}
8
+ tags: List[str] = []
9
+
10
+ class RunResponse(BaseModel):
11
+ run_id: str
12
+ status: RunStatus
13
+
14
+ class WorkerPollRequest(BaseModel):
15
+ worker_id: str
16
+ tags: List[str] = []
17
+
18
+ class WorkerPollResponse(BaseModel):
19
+ run_id: Optional[str] = None
20
+ flow_name: Optional[str] = None
21
+ params: Optional[Dict[str, Any]] = None
22
+
23
+ class WorkerHeartbeatRequest(BaseModel):
24
+ task_states: Dict[str, TaskState]
25
+ run_status: RunStatus
26
+
27
+ class WorkerHeartbeatResponse(BaseModel):
28
+ cancel_requested: bool