pyoco 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyoco/cli/main.py +257 -38
- pyoco/client.py +89 -0
- pyoco/core/context.py +100 -4
- pyoco/core/engine.py +432 -150
- pyoco/core/exceptions.py +15 -0
- pyoco/core/models.py +171 -1
- pyoco/discovery/loader.py +29 -3
- pyoco/discovery/plugins.py +92 -0
- pyoco/dsl/expressions.py +160 -0
- pyoco/dsl/nodes.py +56 -0
- pyoco/dsl/syntax.py +241 -95
- pyoco/dsl/validator.py +104 -0
- pyoco/server/__init__.py +0 -0
- pyoco/server/api.py +112 -0
- pyoco/server/metrics.py +113 -0
- pyoco/server/models.py +30 -0
- pyoco/server/store.py +219 -0
- pyoco/server/webhook.py +108 -0
- pyoco/socketless_reset.py +7 -0
- pyoco/trace/backend.py +1 -1
- pyoco/trace/console.py +12 -4
- pyoco/worker/__init__.py +0 -0
- pyoco/worker/client.py +43 -0
- pyoco/worker/runner.py +166 -0
- pyoco-0.5.0.dist-info/METADATA +159 -0
- pyoco-0.5.0.dist-info/RECORD +33 -0
- pyoco-0.1.0.dist-info/METADATA +0 -7
- pyoco-0.1.0.dist-info/RECORD +0 -17
- {pyoco-0.1.0.dist-info → pyoco-0.5.0.dist-info}/WHEEL +0 -0
- {pyoco-0.1.0.dist-info → pyoco-0.5.0.dist-info}/top_level.txt +0 -0
pyoco/server/models.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Dict, List, Optional, Any
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from ..core.models import RunStatus, TaskState
|
|
4
|
+
|
|
5
|
+
class RunSubmitRequest(BaseModel):
|
|
6
|
+
flow_name: str
|
|
7
|
+
params: Dict[str, Any] = {}
|
|
8
|
+
tags: List[str] = []
|
|
9
|
+
|
|
10
|
+
class RunResponse(BaseModel):
|
|
11
|
+
run_id: str
|
|
12
|
+
status: RunStatus
|
|
13
|
+
|
|
14
|
+
class WorkerPollRequest(BaseModel):
|
|
15
|
+
worker_id: str
|
|
16
|
+
tags: List[str] = []
|
|
17
|
+
|
|
18
|
+
class WorkerPollResponse(BaseModel):
|
|
19
|
+
run_id: Optional[str] = None
|
|
20
|
+
flow_name: Optional[str] = None
|
|
21
|
+
params: Optional[Dict[str, Any]] = None
|
|
22
|
+
|
|
23
|
+
class WorkerHeartbeatRequest(BaseModel):
|
|
24
|
+
task_states: Dict[str, TaskState]
|
|
25
|
+
task_records: Dict[str, Any] = {}
|
|
26
|
+
logs: List[Dict[str, Any]] = []
|
|
27
|
+
run_status: RunStatus
|
|
28
|
+
|
|
29
|
+
class WorkerHeartbeatResponse(BaseModel):
|
|
30
|
+
cancel_requested: bool
|
pyoco/server/store.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
import time
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Optional, Any
|
|
7
|
+
from ..core.models import RunContext, RunStatus, TaskState
|
|
8
|
+
from .metrics import metrics
|
|
9
|
+
from .webhook import webhook_notifier
|
|
10
|
+
|
|
11
|
+
MAX_RUN_HISTORY = int(os.getenv("PYOCO_MAX_RUN_HISTORY", "50"))
|
|
12
|
+
RUN_ARCHIVE_DIR = Path(os.getenv("PYOCO_RUN_ARCHIVE_DIR", "artifacts/runs"))
|
|
13
|
+
MAX_LOG_BYTES_PER_TASK = int(os.getenv("PYOCO_MAX_LOG_BYTES", str(1024 * 1024)))
|
|
14
|
+
|
|
15
|
+
class StateStore:
|
|
16
|
+
def __init__(self):
|
|
17
|
+
self.runs: Dict[str, RunContext] = {}
|
|
18
|
+
self.queue: List[str] = []
|
|
19
|
+
self.history: List[str] = []
|
|
20
|
+
self.max_runs = MAX_RUN_HISTORY
|
|
21
|
+
self.archive_dir = RUN_ARCHIVE_DIR
|
|
22
|
+
self.log_limit_bytes = MAX_LOG_BYTES_PER_TASK
|
|
23
|
+
self.metrics = metrics
|
|
24
|
+
self.webhook = webhook_notifier
|
|
25
|
+
|
|
26
|
+
def create_run(self, flow_name: str, params: Dict) -> RunContext:
|
|
27
|
+
run_id = str(uuid.uuid4())
|
|
28
|
+
run_ctx = RunContext(
|
|
29
|
+
run_id=run_id,
|
|
30
|
+
flow_name=flow_name,
|
|
31
|
+
params=params or {},
|
|
32
|
+
status=RunStatus.PENDING,
|
|
33
|
+
start_time=time.time()
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self.runs[run_id] = run_ctx
|
|
37
|
+
self.queue.append(run_id)
|
|
38
|
+
self.history.append(run_id)
|
|
39
|
+
self._enforce_retention()
|
|
40
|
+
self.metrics.record_status_transition(None, run_ctx.status)
|
|
41
|
+
return run_ctx
|
|
42
|
+
|
|
43
|
+
def get_run(self, run_id: str) -> Optional[RunContext]:
|
|
44
|
+
return self.runs.get(run_id)
|
|
45
|
+
|
|
46
|
+
def list_runs(
|
|
47
|
+
self,
|
|
48
|
+
status: Optional[RunStatus] = None,
|
|
49
|
+
flow: Optional[str] = None,
|
|
50
|
+
limit: Optional[int] = None,
|
|
51
|
+
) -> List[RunContext]:
|
|
52
|
+
runs = list(self.runs.values())
|
|
53
|
+
if status:
|
|
54
|
+
runs = [r for r in runs if r.status == status]
|
|
55
|
+
if flow:
|
|
56
|
+
runs = [r for r in runs if r.flow_name == flow]
|
|
57
|
+
runs.sort(key=lambda r: r.start_time or 0, reverse=True)
|
|
58
|
+
if limit:
|
|
59
|
+
runs = runs[:limit]
|
|
60
|
+
return runs
|
|
61
|
+
|
|
62
|
+
def dequeue(self, tags: List[str] = None) -> Optional[RunContext]:
|
|
63
|
+
# Simple FIFO queue for now. Tags ignored in v0.3.0 MVP.
|
|
64
|
+
if not self.queue:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
# Find first PENDING run
|
|
68
|
+
# Note: queue might contain cancelled runs?
|
|
69
|
+
# We should check status.
|
|
70
|
+
|
|
71
|
+
# Pop from front
|
|
72
|
+
# We iterate to find a valid candidate
|
|
73
|
+
for i, run_id in enumerate(self.queue):
|
|
74
|
+
run = self.runs.get(run_id)
|
|
75
|
+
if run and run.status == RunStatus.PENDING:
|
|
76
|
+
self.queue.pop(i)
|
|
77
|
+
return run
|
|
78
|
+
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def update_run(self, run_id: str, status: RunStatus = None, task_states: Dict = None, task_records: Dict = None, logs: List[Dict[str, Any]] = None):
|
|
82
|
+
run = self.runs.get(run_id)
|
|
83
|
+
if not run:
|
|
84
|
+
return
|
|
85
|
+
previous_status = run.status
|
|
86
|
+
|
|
87
|
+
if status:
|
|
88
|
+
# State transition check
|
|
89
|
+
# If server has CANCELLING, ignore RUNNING from worker
|
|
90
|
+
if run.status == RunStatus.CANCELLING and status == RunStatus.RUNNING:
|
|
91
|
+
pass
|
|
92
|
+
else:
|
|
93
|
+
run.status = status
|
|
94
|
+
|
|
95
|
+
if status in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
|
|
96
|
+
if not run.end_time:
|
|
97
|
+
run.end_time = time.time()
|
|
98
|
+
|
|
99
|
+
if run.status != previous_status:
|
|
100
|
+
self.metrics.record_status_transition(previous_status, run.status)
|
|
101
|
+
|
|
102
|
+
if task_states:
|
|
103
|
+
for name, state in task_states.items():
|
|
104
|
+
run.tasks[name] = TaskState(state) if isinstance(state, str) else state
|
|
105
|
+
if task_records:
|
|
106
|
+
for name, record in task_records.items():
|
|
107
|
+
info = run.ensure_task_record(name)
|
|
108
|
+
state_val = record.get("state")
|
|
109
|
+
if state_val:
|
|
110
|
+
info.state = TaskState(state_val) if isinstance(state_val, str) else state_val
|
|
111
|
+
info.started_at = record.get("started_at", info.started_at)
|
|
112
|
+
info.ended_at = record.get("ended_at", info.ended_at)
|
|
113
|
+
info.duration_ms = record.get("duration_ms", info.duration_ms)
|
|
114
|
+
info.error = record.get("error", info.error)
|
|
115
|
+
info.traceback = record.get("traceback", info.traceback)
|
|
116
|
+
info.inputs = record.get("inputs", info.inputs)
|
|
117
|
+
info.output = record.get("output", info.output)
|
|
118
|
+
info.artifacts = record.get("artifacts", info.artifacts)
|
|
119
|
+
self._record_task_metrics(run, name, info)
|
|
120
|
+
if logs:
|
|
121
|
+
for entry in logs:
|
|
122
|
+
task_name = entry.get("task") or "unknown"
|
|
123
|
+
text = entry.get("text", "")
|
|
124
|
+
encoded_len = len(text.encode("utf-8"))
|
|
125
|
+
current = run.log_bytes.get(task_name, 0)
|
|
126
|
+
if current >= self.log_limit_bytes:
|
|
127
|
+
continue
|
|
128
|
+
if current + encoded_len > self.log_limit_bytes:
|
|
129
|
+
allowed = max(self.log_limit_bytes - current, 0)
|
|
130
|
+
truncated_text = text[:allowed] + "\n[log truncated]"
|
|
131
|
+
entry = dict(entry)
|
|
132
|
+
entry["text"] = truncated_text
|
|
133
|
+
run.log_bytes[task_name] = self.log_limit_bytes
|
|
134
|
+
else:
|
|
135
|
+
run.log_bytes[task_name] = current + encoded_len
|
|
136
|
+
run.logs.append(entry)
|
|
137
|
+
if status in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
|
|
138
|
+
self._enforce_retention()
|
|
139
|
+
if run.end_time and not run.metrics_run_observed:
|
|
140
|
+
self.metrics.record_run_duration(run.flow_name, run.start_time, run.end_time)
|
|
141
|
+
run.metrics_run_observed = True
|
|
142
|
+
if run.status in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
|
|
143
|
+
if run.webhook_notified_status != run.status.value:
|
|
144
|
+
if self.webhook.notify_run(run):
|
|
145
|
+
run.webhook_notified_status = run.status.value
|
|
146
|
+
|
|
147
|
+
def cancel_run(self, run_id: str):
|
|
148
|
+
run = self.runs.get(run_id)
|
|
149
|
+
if not run:
|
|
150
|
+
return
|
|
151
|
+
previous = run.status
|
|
152
|
+
if run.status in [RunStatus.PENDING, RunStatus.RUNNING]:
|
|
153
|
+
run.status = RunStatus.CANCELLING
|
|
154
|
+
if run.status != previous:
|
|
155
|
+
self.metrics.record_status_transition(previous, run.status)
|
|
156
|
+
|
|
157
|
+
def export_run(self, run: RunContext) -> Dict[str, Any]:
|
|
158
|
+
return {
|
|
159
|
+
"run_id": run.run_id,
|
|
160
|
+
"flow_name": run.flow_name,
|
|
161
|
+
"params": run.params,
|
|
162
|
+
"status": run.status.value if hasattr(run.status, "value") else run.status,
|
|
163
|
+
"start_time": run.start_time,
|
|
164
|
+
"end_time": run.end_time,
|
|
165
|
+
"tasks": {name: state.value if hasattr(state, "value") else state for name, state in run.tasks.items()},
|
|
166
|
+
"task_records": run.serialize_task_records(),
|
|
167
|
+
"logs": run.logs,
|
|
168
|
+
"metadata": run.metadata,
|
|
169
|
+
"run_duration_ms": self._run_duration_ms(run),
|
|
170
|
+
"task_summary": self._build_task_summary(run),
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
def _enforce_retention(self):
|
|
174
|
+
removable_ids = [rid for rid in self.history if rid in self.runs]
|
|
175
|
+
while len(self.runs) > self.max_runs and removable_ids:
|
|
176
|
+
run_id = removable_ids.pop(0)
|
|
177
|
+
run = self.runs.get(run_id)
|
|
178
|
+
if not run:
|
|
179
|
+
continue
|
|
180
|
+
if run.status not in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
|
|
181
|
+
self.history.append(run_id)
|
|
182
|
+
continue
|
|
183
|
+
self._spill_run(run)
|
|
184
|
+
self.runs.pop(run_id, None)
|
|
185
|
+
if run_id in self.queue:
|
|
186
|
+
self.queue.remove(run_id)
|
|
187
|
+
self.history = [rid for rid in self.history if rid in self.runs]
|
|
188
|
+
|
|
189
|
+
def _spill_run(self, run: RunContext):
|
|
190
|
+
try:
|
|
191
|
+
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
|
192
|
+
path = self.archive_dir / f"{run.run_id}.json"
|
|
193
|
+
with path.open("w", encoding="utf-8") as fp:
|
|
194
|
+
json.dump(self.export_run(run), fp, indent=2)
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
|
|
198
|
+
def _record_task_metrics(self, run: RunContext, task_name: str, record):
|
|
199
|
+
if task_name in run.metrics_recorded_tasks:
|
|
200
|
+
return
|
|
201
|
+
if record.duration_ms is None or record.ended_at is None:
|
|
202
|
+
return
|
|
203
|
+
self.metrics.record_task_duration(task_name, record.duration_ms)
|
|
204
|
+
run.metrics_recorded_tasks.add(task_name)
|
|
205
|
+
|
|
206
|
+
def _run_duration_ms(self, run: RunContext) -> Optional[float]:
|
|
207
|
+
if run.start_time and run.end_time:
|
|
208
|
+
return (run.end_time - run.start_time) * 1000.0
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
def _build_task_summary(self, run: RunContext) -> Dict[str, Any]:
|
|
212
|
+
summary: Dict[str, Any] = {}
|
|
213
|
+
for name, record in run.task_records.items():
|
|
214
|
+
summary[name] = {
|
|
215
|
+
"state": record.state.value if hasattr(record.state, "value") else record.state,
|
|
216
|
+
"duration_ms": record.duration_ms,
|
|
217
|
+
"ended_at": record.ended_at,
|
|
218
|
+
}
|
|
219
|
+
return summary
|
pyoco/server/webhook.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Callable, Dict, Optional
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from ..core.models import RunContext, RunStatus
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class WebhookNotifier:
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
self.url: Optional[str] = None
|
|
16
|
+
self.timeout: float = 3.0
|
|
17
|
+
self.retries: int = 1
|
|
18
|
+
self.secret: Optional[str] = None
|
|
19
|
+
self.extra_headers: Dict[str, str] = {}
|
|
20
|
+
self._sender: Optional[Callable[[str, Dict[str, Any], Dict[str, str], float], None]] = None
|
|
21
|
+
self.last_error: Optional[str] = None
|
|
22
|
+
self.load_from_env()
|
|
23
|
+
|
|
24
|
+
def load_from_env(self) -> None:
|
|
25
|
+
self.url = os.getenv("PYOCO_WEBHOOK_URL") or None
|
|
26
|
+
self.timeout = float(os.getenv("PYOCO_WEBHOOK_TIMEOUT", "3.0"))
|
|
27
|
+
self.retries = int(os.getenv("PYOCO_WEBHOOK_RETRIES", "1"))
|
|
28
|
+
self.secret = os.getenv("PYOCO_WEBHOOK_SECRET") or None
|
|
29
|
+
self.extra_headers = {}
|
|
30
|
+
self.last_error = None
|
|
31
|
+
self._sender = None
|
|
32
|
+
|
|
33
|
+
def configure(
|
|
34
|
+
self,
|
|
35
|
+
*,
|
|
36
|
+
url: Optional[str] = None,
|
|
37
|
+
timeout: Optional[float] = None,
|
|
38
|
+
retries: Optional[int] = None,
|
|
39
|
+
secret: Optional[str] = None,
|
|
40
|
+
headers: Optional[Dict[str, str]] = None,
|
|
41
|
+
sender: Optional[Callable[[str, Dict[str, Any], Dict[str, str], float], None]] = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
if url is not None:
|
|
44
|
+
self.url = url
|
|
45
|
+
if timeout is not None:
|
|
46
|
+
self.timeout = timeout
|
|
47
|
+
if retries is not None:
|
|
48
|
+
self.retries = max(1, retries)
|
|
49
|
+
if secret is not None:
|
|
50
|
+
self.secret = secret
|
|
51
|
+
if headers is not None:
|
|
52
|
+
self.extra_headers = dict(headers)
|
|
53
|
+
if sender is not None:
|
|
54
|
+
self._sender = sender
|
|
55
|
+
self.last_error = None
|
|
56
|
+
|
|
57
|
+
def notify_run(self, run: RunContext) -> bool:
|
|
58
|
+
if not self.url:
|
|
59
|
+
return False
|
|
60
|
+
payload = self._build_payload(run)
|
|
61
|
+
sender = self._sender or self._http_sender
|
|
62
|
+
headers = {"Content-Type": "application/json", **self.extra_headers}
|
|
63
|
+
if self.secret:
|
|
64
|
+
headers.setdefault("X-Pyoco-Token", self.secret)
|
|
65
|
+
|
|
66
|
+
last_exc: Optional[Exception] = None
|
|
67
|
+
for attempt in range(self.retries):
|
|
68
|
+
try:
|
|
69
|
+
sender(self.url, payload, headers, self.timeout)
|
|
70
|
+
self.last_error = None
|
|
71
|
+
return True
|
|
72
|
+
except Exception as exc: # pragma: no cover - retries captured via tests
|
|
73
|
+
last_exc = exc
|
|
74
|
+
time.sleep(min(0.5, 0.1 * (attempt + 1)))
|
|
75
|
+
if last_exc:
|
|
76
|
+
self.last_error = str(last_exc)
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
def _http_sender(
|
|
80
|
+
self,
|
|
81
|
+
url: str,
|
|
82
|
+
payload: Dict[str, Any],
|
|
83
|
+
headers: Dict[str, str],
|
|
84
|
+
timeout: float,
|
|
85
|
+
) -> None:
|
|
86
|
+
httpx.post(url, json=payload, headers=headers, timeout=timeout)
|
|
87
|
+
|
|
88
|
+
def _build_payload(self, run: RunContext) -> Dict[str, Any]:
|
|
89
|
+
duration_ms = None
|
|
90
|
+
if run.start_time and run.end_time:
|
|
91
|
+
duration_ms = (run.end_time - run.start_time) * 1000.0
|
|
92
|
+
return {
|
|
93
|
+
"event": f"run.{run.status.value.lower()}",
|
|
94
|
+
"run_id": run.run_id,
|
|
95
|
+
"flow_name": run.flow_name,
|
|
96
|
+
"status": run.status.value if isinstance(run.status, RunStatus) else run.status,
|
|
97
|
+
"started_at": run.start_time,
|
|
98
|
+
"ended_at": run.end_time,
|
|
99
|
+
"duration_ms": duration_ms,
|
|
100
|
+
"tasks": run.serialize_task_records(),
|
|
101
|
+
"metadata": run.metadata,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
def reset(self) -> None:
|
|
105
|
+
self.load_from_env()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
webhook_notifier = WebhookNotifier()
|
pyoco/trace/backend.py
CHANGED
pyoco/trace/console.py
CHANGED
|
@@ -2,14 +2,22 @@ import time
|
|
|
2
2
|
from .backend import TraceBackend
|
|
3
3
|
|
|
4
4
|
class ConsoleTraceBackend(TraceBackend):
|
|
5
|
-
def __init__(self, style: str =
|
|
5
|
+
def __init__(self, style: str = None):
|
|
6
|
+
if style is None:
|
|
7
|
+
import os
|
|
8
|
+
env_cute = os.environ.get("PYOCO_CUTE", "true").lower()
|
|
9
|
+
if env_cute in ["0", "false", "no", "off"]:
|
|
10
|
+
style = "plain"
|
|
11
|
+
else:
|
|
12
|
+
style = "cute"
|
|
6
13
|
self.style = style
|
|
7
14
|
|
|
8
|
-
def on_flow_start(self, flow_name: str):
|
|
15
|
+
def on_flow_start(self, flow_name: str, run_id: str = None):
|
|
16
|
+
rid_str = f" run_id={run_id}" if run_id else ""
|
|
9
17
|
if self.style == "cute":
|
|
10
|
-
print(f"🐇 pyoco > start flow={flow_name}")
|
|
18
|
+
print(f"🐇 pyoco > start flow={flow_name}{rid_str}")
|
|
11
19
|
else:
|
|
12
|
-
print(f"INFO pyoco start flow={flow_name}")
|
|
20
|
+
print(f"INFO pyoco start flow={flow_name}{rid_str}")
|
|
13
21
|
|
|
14
22
|
def on_flow_end(self, flow_name: str):
|
|
15
23
|
if self.style == "cute":
|
pyoco/worker/__init__.py
ADDED
|
File without changes
|
pyoco/worker/client.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
from typing import Dict, List, Optional, Any
|
|
3
|
+
from ..core.models import RunStatus, TaskState
|
|
4
|
+
|
|
5
|
+
class WorkerClient:
|
|
6
|
+
def __init__(self, server_url: str, worker_id: str):
|
|
7
|
+
self.server_url = server_url.rstrip("/")
|
|
8
|
+
self.worker_id = worker_id
|
|
9
|
+
self.client = httpx.Client(base_url=self.server_url)
|
|
10
|
+
|
|
11
|
+
def poll(self, tags: List[str] = []) -> Optional[Dict[str, Any]]:
|
|
12
|
+
try:
|
|
13
|
+
resp = self.client.post("/workers/poll", json={
|
|
14
|
+
"worker_id": self.worker_id,
|
|
15
|
+
"tags": tags
|
|
16
|
+
})
|
|
17
|
+
resp.raise_for_status()
|
|
18
|
+
data = resp.json()
|
|
19
|
+
if data.get("run_id"):
|
|
20
|
+
return data
|
|
21
|
+
return None
|
|
22
|
+
except Exception as e:
|
|
23
|
+
print(f"Poll failed: {e}")
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
def heartbeat(self, run_id: str, task_states: Dict[str, TaskState], run_status: RunStatus) -> bool:
|
|
27
|
+
"""
|
|
28
|
+
Sends heartbeat. Returns True if cancellation is requested.
|
|
29
|
+
"""
|
|
30
|
+
try:
|
|
31
|
+
# Convert Enums to values
|
|
32
|
+
states_json = {k: v.value for k, v in task_states.items()}
|
|
33
|
+
status_value = run_status.value
|
|
34
|
+
|
|
35
|
+
resp = self.client.post(f"/runs/{run_id}/heartbeat", json={
|
|
36
|
+
"task_states": states_json,
|
|
37
|
+
"run_status": status_value
|
|
38
|
+
})
|
|
39
|
+
resp.raise_for_status()
|
|
40
|
+
return resp.json().get("cancel_requested", False)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"Heartbeat failed: {e}")
|
|
43
|
+
return False
|
pyoco/worker/runner.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from ..core.engine import Engine
|
|
5
|
+
from ..core.models import RunContext, RunStatus, Flow
|
|
6
|
+
from ..trace.backend import TraceBackend
|
|
7
|
+
from ..discovery.loader import TaskLoader
|
|
8
|
+
from ..schemas.config import PyocoConfig
|
|
9
|
+
from ..client import Client
|
|
10
|
+
|
|
11
|
+
from ..trace.console import ConsoleTraceBackend
|
|
12
|
+
|
|
13
|
+
class RemoteTraceBackend(TraceBackend):
|
|
14
|
+
def __init__(self, client: Client, run_ctx: RunContext, cute: bool = True):
|
|
15
|
+
self.client = client
|
|
16
|
+
self.run_ctx = run_ctx
|
|
17
|
+
self.last_heartbeat = 0
|
|
18
|
+
self.heartbeat_interval = 1.0 # sec
|
|
19
|
+
self.console = ConsoleTraceBackend(style="cute" if cute else "plain")
|
|
20
|
+
|
|
21
|
+
def _send_heartbeat(self, force=False):
|
|
22
|
+
now = time.time()
|
|
23
|
+
if force or (now - self.last_heartbeat > self.heartbeat_interval):
|
|
24
|
+
cancel = self.client.heartbeat(self.run_ctx)
|
|
25
|
+
if cancel and self.run_ctx.status not in [RunStatus.CANCELLING, RunStatus.CANCELLED]:
|
|
26
|
+
print(f"🛑 Cancellation requested from server for run {self.run_ctx.run_id}")
|
|
27
|
+
self.run_ctx.status = RunStatus.CANCELLING
|
|
28
|
+
self.last_heartbeat = now
|
|
29
|
+
|
|
30
|
+
def on_flow_start(self, name: str, run_id: Optional[str] = None):
|
|
31
|
+
self.console.on_flow_start(name, run_id)
|
|
32
|
+
self._send_heartbeat(force=True)
|
|
33
|
+
|
|
34
|
+
def on_flow_end(self, name: str):
|
|
35
|
+
self.console.on_flow_end(name)
|
|
36
|
+
self._send_heartbeat(force=True)
|
|
37
|
+
|
|
38
|
+
def on_node_start(self, node_name: str):
|
|
39
|
+
self.console.on_node_start(node_name)
|
|
40
|
+
self._send_heartbeat()
|
|
41
|
+
|
|
42
|
+
def on_node_end(self, node_name: str, duration: float):
|
|
43
|
+
self.console.on_node_end(node_name, duration)
|
|
44
|
+
self._send_heartbeat(force=True)
|
|
45
|
+
|
|
46
|
+
def on_node_error(self, node_name: str, error: Exception):
|
|
47
|
+
self.console.on_node_error(node_name, error)
|
|
48
|
+
self._send_heartbeat(force=True)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Worker:
|
|
52
|
+
def __init__(self, server_url: str, config: PyocoConfig, tags: List[str] = []):
|
|
53
|
+
self.server_url = server_url
|
|
54
|
+
self.config = config
|
|
55
|
+
self.tags = tags
|
|
56
|
+
self.worker_id = f"w-{uuid.uuid4().hex[:8]}"
|
|
57
|
+
self.client = Client(server_url, self.worker_id)
|
|
58
|
+
self.loader = TaskLoader(config)
|
|
59
|
+
self.loader.load() # Load tasks/flows once
|
|
60
|
+
|
|
61
|
+
def start(self):
|
|
62
|
+
print(f"🐜 Worker {self.worker_id} started. Connected to {self.server_url}")
|
|
63
|
+
try:
|
|
64
|
+
while True:
|
|
65
|
+
job = self.client.poll(self.tags)
|
|
66
|
+
if job:
|
|
67
|
+
self._execute_job(job)
|
|
68
|
+
else:
|
|
69
|
+
time.sleep(2.0)
|
|
70
|
+
except KeyboardInterrupt:
|
|
71
|
+
print("\n🛑 Worker stopping...")
|
|
72
|
+
|
|
73
|
+
def _execute_job(self, job):
|
|
74
|
+
run_id = job["run_id"]
|
|
75
|
+
flow_name = job["flow_name"]
|
|
76
|
+
params = job["params"] or {}
|
|
77
|
+
|
|
78
|
+
print(f"🚀 Received job: {run_id} (Flow: {flow_name})")
|
|
79
|
+
|
|
80
|
+
# Find flow
|
|
81
|
+
flow_def = self.config.flows.get(flow_name)
|
|
82
|
+
if not flow_def:
|
|
83
|
+
print(f"❌ Flow '{flow_name}' not found in local config.")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
# Build Flow object using exec (same as main.py)
|
|
87
|
+
from ..core.models import Flow as FlowModel
|
|
88
|
+
from ..dsl.syntax import TaskWrapper
|
|
89
|
+
|
|
90
|
+
eval_context = {name: TaskWrapper(task) for name, task in self.loader.tasks.items()}
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
flow = FlowModel(name=flow_name)
|
|
94
|
+
for t in self.loader.tasks.values():
|
|
95
|
+
flow.add_task(t)
|
|
96
|
+
|
|
97
|
+
# Evaluate graph
|
|
98
|
+
exec(flow_def.graph, {}, eval_context)
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
print(f"❌ Error building flow: {e}")
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
# Execute
|
|
105
|
+
engine = Engine()
|
|
106
|
+
|
|
107
|
+
# We need to inject run_id into Engine.
|
|
108
|
+
# Engine.run generates run_id if not provided.
|
|
109
|
+
# We need to pass it.
|
|
110
|
+
# Engine.run doesn't accept run_id argument currently.
|
|
111
|
+
# It creates RunContext inside.
|
|
112
|
+
# I need to modify Engine.run to accept optional run_id or RunContext.
|
|
113
|
+
|
|
114
|
+
# Wait, I modified Engine.run in v0.2.0.
|
|
115
|
+
# Let's check Engine.run signature.
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
# I will modify Engine.run to accept run_id.
|
|
119
|
+
# For now, let's assume I will do that.
|
|
120
|
+
|
|
121
|
+
# Create RemoteTraceBackend
|
|
122
|
+
# But we need run_ctx to create backend.
|
|
123
|
+
# And Engine creates run_ctx.
|
|
124
|
+
# Chicken and egg.
|
|
125
|
+
|
|
126
|
+
# Solution: Engine.run should accept an existing RunContext or run_id.
|
|
127
|
+
# If I pass run_id, Engine creates RunContext with that ID.
|
|
128
|
+
# Then I can access it?
|
|
129
|
+
# Or I pass a callback to get it?
|
|
130
|
+
|
|
131
|
+
# Better: Pass run_id to Engine.run.
|
|
132
|
+
# Engine creates RunContext.
|
|
133
|
+
# Engine calls trace.on_flow_start(run_id=...).
|
|
134
|
+
# RemoteTraceBackend receives run_id.
|
|
135
|
+
# But RemoteTraceBackend needs to know which run_id to report to (it knows from constructor).
|
|
136
|
+
# Actually, RemoteTraceBackend needs access to the RunContext object that Engine creates.
|
|
137
|
+
# Because it reads `run_ctx.tasks` and `run_ctx.status`.
|
|
138
|
+
|
|
139
|
+
# If Engine creates RunContext internally, we can't pass it to Backend beforehand.
|
|
140
|
+
# Unless Engine exposes it.
|
|
141
|
+
|
|
142
|
+
# Alternative:
|
|
143
|
+
# Modify Engine to accept `run_context` argument.
|
|
144
|
+
# If provided, use it.
|
|
145
|
+
|
|
146
|
+
# I will modify Engine.run to accept `run_context`.
|
|
147
|
+
|
|
148
|
+
run_ctx = RunContext(run_id=run_id, status=RunStatus.RUNNING)
|
|
149
|
+
# Initialize tasks as PENDING? Engine does that.
|
|
150
|
+
|
|
151
|
+
backend = RemoteTraceBackend(self.client, run_ctx)
|
|
152
|
+
engine.trace = backend # Replace default console trace? Or chain?
|
|
153
|
+
# Maybe chain if we want local logs too.
|
|
154
|
+
# For now, just replace or use MultiBackend (not implemented).
|
|
155
|
+
# Let's just use RemoteBackend.
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
engine.run(flow, params=params, run_context=run_ctx)
|
|
159
|
+
print(f"✅ Job {run_id} completed: {run_ctx.status}")
|
|
160
|
+
# Send final heartbeat
|
|
161
|
+
self.client.heartbeat(run_ctx)
|
|
162
|
+
except Exception as e:
|
|
163
|
+
print(f"💥 Job {run_id} failed: {e}")
|
|
164
|
+
# Heartbeat one last time
|
|
165
|
+
run_ctx.status = RunStatus.FAILED
|
|
166
|
+
self.client.heartbeat(run_ctx)
|