pyoco 0.3.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyoco/server/api.py CHANGED
@@ -1,37 +1,43 @@
1
- from fastapi import FastAPI, HTTPException
2
- from typing import List, Optional
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ from fastapi.responses import PlainTextResponse
3
+ from typing import List, Optional, Dict, Any
3
4
  from .store import StateStore
4
5
  from .models import (
5
- RunSubmitRequest, RunResponse,
6
+ RunSubmitRequest, RunResponse,
6
7
  WorkerPollRequest, WorkerPollResponse,
7
8
  WorkerHeartbeatRequest, WorkerHeartbeatResponse
8
9
  )
9
- from ..core.models import RunContext, RunStatus
10
+ from ..core.models import RunStatus
11
+ from .metrics import metrics, metrics_content_type
10
12
 
11
13
  app = FastAPI(title="Pyoco Kanban Server")
12
14
  store = StateStore()
13
15
 
14
16
  @app.post("/runs", response_model=RunResponse)
15
- def submit_run(req: RunSubmitRequest):
17
+ async def submit_run(req: RunSubmitRequest):
16
18
  run_ctx = store.create_run(req.flow_name, req.params)
17
19
  return RunResponse(run_id=run_ctx.run_id, status=run_ctx.status)
18
20
 
19
- @app.get("/runs", response_model=List[RunContext])
20
- def list_runs(status: Optional[RunStatus] = None):
21
- runs = store.list_runs()
22
- if status:
23
- runs = [r for r in runs if r.status == status]
24
- return runs
21
+ @app.get("/runs")
22
+ async def list_runs(
23
+ status: Optional[str] = None,
24
+ flow: Optional[str] = None,
25
+ limit: Optional[int] = Query(default=None, ge=1, le=200),
26
+ ):
27
+ status_enum = _parse_status(status)
28
+ limit_value = limit if isinstance(limit, int) else None
29
+ runs = store.list_runs(status=status_enum, flow=flow, limit=limit_value)
30
+ return [store.export_run(r) for r in runs]
25
31
 
26
- @app.get("/runs/{run_id}", response_model=RunContext)
27
- def get_run(run_id: str):
32
+ @app.get("/runs/{run_id}")
33
+ async def get_run(run_id: str):
28
34
  run = store.get_run(run_id)
29
35
  if not run:
30
36
  raise HTTPException(status_code=404, detail="Run not found")
31
- return run
37
+ return store.export_run(run)
32
38
 
33
39
  @app.post("/runs/{run_id}/cancel")
34
- def cancel_run(run_id: str):
40
+ async def cancel_run(run_id: str):
35
41
  run = store.get_run(run_id)
36
42
  if not run:
37
43
  raise HTTPException(status_code=404, detail="Run not found")
@@ -39,7 +45,7 @@ def cancel_run(run_id: str):
39
45
  return {"status": "CANCELLING"}
40
46
 
41
47
  @app.post("/workers/poll", response_model=WorkerPollResponse)
42
- def poll_work(req: WorkerPollRequest):
48
+ async def poll_work(req: WorkerPollRequest):
43
49
  # In v0.3.0, we ignore worker_id and tags for simplicity
44
50
  run = store.dequeue()
45
51
  if run:
@@ -58,14 +64,49 @@ def poll_work(req: WorkerPollRequest):
58
64
  return WorkerPollResponse()
59
65
 
60
66
  @app.post("/runs/{run_id}/heartbeat", response_model=WorkerHeartbeatResponse)
61
- def heartbeat(run_id: str, req: WorkerHeartbeatRequest):
67
+ async def heartbeat(run_id: str, req: WorkerHeartbeatRequest):
62
68
  run = store.get_run(run_id)
63
69
  if not run:
64
70
  raise HTTPException(status_code=404, detail="Run not found")
65
71
 
66
- store.update_run(run_id, status=req.run_status, task_states=req.task_states)
72
+ store.update_run(
73
+ run_id,
74
+ status=req.run_status,
75
+ task_states=req.task_states,
76
+ task_records=req.task_records,
77
+ logs=req.logs
78
+ )
67
79
 
68
80
  # Check if cancellation was requested
69
81
  cancel_requested = (run.status == RunStatus.CANCELLING)
70
82
 
71
83
  return WorkerHeartbeatResponse(cancel_requested=cancel_requested)
84
+
85
+ @app.get("/runs/{run_id}/logs")
86
+ async def get_logs(run_id: str, task: Optional[str] = None, tail: Optional[int] = None):
87
+ run = store.get_run(run_id)
88
+ if not run:
89
+ raise HTTPException(status_code=404, detail="Run not found")
90
+ logs = run.logs
91
+ if task:
92
+ logs = [entry for entry in logs if entry["task"] == task]
93
+ if tail:
94
+ logs = logs[-tail:]
95
+ return {"run_status": run.status, "logs": logs}
96
+
97
+
98
+ @app.get("/metrics")
99
+ async def prometheus_metrics():
100
+ payload = metrics.render_latest()
101
+ return PlainTextResponse(payload, media_type=metrics_content_type())
102
+
103
+
104
+ def _parse_status(value: Optional[str]) -> Optional[RunStatus]:
105
+ if not value:
106
+ return None
107
+ if isinstance(value, RunStatus):
108
+ return value
109
+ try:
110
+ return RunStatus(value)
111
+ except ValueError:
112
+ raise HTTPException(status_code=400, detail=f"Invalid status '{value}'")
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from prometheus_client import (
6
+ CollectorRegistry,
7
+ CONTENT_TYPE_LATEST,
8
+ Counter,
9
+ Gauge,
10
+ Histogram,
11
+ generate_latest,
12
+ )
13
+
14
+ from ..core.models import RunStatus
15
+
16
+
17
+ _DEFAULT_BUCKETS = (
18
+ 0.05,
19
+ 0.1,
20
+ 0.25,
21
+ 0.5,
22
+ 1.0,
23
+ 2.5,
24
+ 5.0,
25
+ 10.0,
26
+ 30.0,
27
+ 60.0,
28
+ )
29
+
30
+
31
+ class MetricsSink:
32
+ """
33
+ Small wrapper that owns a CollectorRegistry so tests can reset easily.
34
+ """
35
+
36
+ def __init__(self) -> None:
37
+ self.registry = CollectorRegistry()
38
+ self._init_metrics()
39
+
40
+ def _init_metrics(self) -> None:
41
+ self.runs_total = Counter(
42
+ "pyoco_runs_total",
43
+ "Total runs observed by status transitions.",
44
+ ["status"],
45
+ registry=self.registry,
46
+ )
47
+ self.runs_in_progress = Gauge(
48
+ "pyoco_runs_in_progress",
49
+ "Number of runs currently executing (RUNNING).",
50
+ registry=self.registry,
51
+ )
52
+ self.task_duration = Histogram(
53
+ "pyoco_task_duration_seconds",
54
+ "Observed task durations.",
55
+ ["task"],
56
+ buckets=_DEFAULT_BUCKETS,
57
+ registry=self.registry,
58
+ )
59
+ self.run_duration = Histogram(
60
+ "pyoco_run_duration_seconds",
61
+ "Observed end-to-end run durations.",
62
+ ["flow"],
63
+ buckets=_DEFAULT_BUCKETS,
64
+ registry=self.registry,
65
+ )
66
+
67
+ def reset(self) -> None:
68
+ self.__init__()
69
+
70
+ def record_status_transition(
71
+ self,
72
+ previous: Optional[RunStatus],
73
+ new_status: RunStatus,
74
+ ) -> None:
75
+ status_value = new_status.value if hasattr(new_status, "value") else str(new_status)
76
+ self.runs_total.labels(status=status_value).inc()
77
+
78
+ prev_value = previous.value if hasattr(previous, "value") else previous
79
+ if status_value == RunStatus.RUNNING.value:
80
+ if prev_value != RunStatus.RUNNING.value:
81
+ self.runs_in_progress.inc()
82
+ elif prev_value == RunStatus.RUNNING.value:
83
+ self.runs_in_progress.dec()
84
+
85
+ def record_task_duration(self, task_name: str, duration_ms: Optional[float]) -> None:
86
+ if duration_ms is None:
87
+ return
88
+ if duration_ms < 0:
89
+ return
90
+ self.task_duration.labels(task=task_name).observe(duration_ms / 1000.0)
91
+
92
+ def record_run_duration(
93
+ self,
94
+ flow_name: str,
95
+ start_time: Optional[float],
96
+ end_time: Optional[float],
97
+ ) -> None:
98
+ if start_time is None or end_time is None:
99
+ return
100
+ duration = end_time - start_time
101
+ if duration < 0:
102
+ return
103
+ self.run_duration.labels(flow=flow_name).observe(duration)
104
+
105
+ def render_latest(self) -> bytes:
106
+ return generate_latest(self.registry)
107
+
108
+
109
+ metrics = MetricsSink()
110
+
111
+
112
+ def metrics_content_type() -> str:
113
+ return CONTENT_TYPE_LATEST
pyoco/server/models.py CHANGED
@@ -22,6 +22,8 @@ class WorkerPollResponse(BaseModel):
22
22
 
23
23
  class WorkerHeartbeatRequest(BaseModel):
24
24
  task_states: Dict[str, TaskState]
25
+ task_records: Dict[str, Any] = {}
26
+ logs: List[Dict[str, Any]] = []
25
27
  run_status: RunStatus
26
28
 
27
29
  class WorkerHeartbeatResponse(BaseModel):
pyoco/server/store.py CHANGED
@@ -1,38 +1,63 @@
1
1
  import uuid
2
2
  import time
3
- from typing import Dict, List, Optional
4
- from ..core.models import RunContext, RunStatus
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional, Any
7
+ from ..core.models import RunContext, RunStatus, TaskState
8
+ from .metrics import metrics
9
+ from .webhook import webhook_notifier
10
+
11
+ MAX_RUN_HISTORY = int(os.getenv("PYOCO_MAX_RUN_HISTORY", "50"))
12
+ RUN_ARCHIVE_DIR = Path(os.getenv("PYOCO_RUN_ARCHIVE_DIR", "artifacts/runs"))
13
+ MAX_LOG_BYTES_PER_TASK = int(os.getenv("PYOCO_MAX_LOG_BYTES", str(1024 * 1024)))
5
14
 
6
15
  class StateStore:
7
16
  def __init__(self):
8
17
  self.runs: Dict[str, RunContext] = {}
9
18
  self.queue: List[str] = []
19
+ self.history: List[str] = []
20
+ self.max_runs = MAX_RUN_HISTORY
21
+ self.archive_dir = RUN_ARCHIVE_DIR
22
+ self.log_limit_bytes = MAX_LOG_BYTES_PER_TASK
23
+ self.metrics = metrics
24
+ self.webhook = webhook_notifier
10
25
 
11
26
  def create_run(self, flow_name: str, params: Dict) -> RunContext:
12
27
  run_id = str(uuid.uuid4())
13
28
  run_ctx = RunContext(
14
29
  run_id=run_id,
30
+ flow_name=flow_name,
31
+ params=params or {},
15
32
  status=RunStatus.PENDING,
16
33
  start_time=time.time()
17
34
  )
18
- # Store extra metadata if needed (flow_name, params)
19
- # For now, RunContext doesn't have flow_name/params fields in core.models.
20
- # We might need to extend RunContext or store them separately.
21
- # Let's attach them dynamically for now or assume the worker knows.
22
- # Actually, the worker needs flow_name and params to run.
23
- # We should store them in the store alongside the context.
24
- run_ctx.flow_name = flow_name
25
- run_ctx.params = params
26
35
 
27
36
  self.runs[run_id] = run_ctx
28
37
  self.queue.append(run_id)
38
+ self.history.append(run_id)
39
+ self._enforce_retention()
40
+ self.metrics.record_status_transition(None, run_ctx.status)
29
41
  return run_ctx
30
42
 
31
43
  def get_run(self, run_id: str) -> Optional[RunContext]:
32
44
  return self.runs.get(run_id)
33
45
 
34
- def list_runs(self) -> List[RunContext]:
35
- return list(self.runs.values())
46
+ def list_runs(
47
+ self,
48
+ status: Optional[RunStatus] = None,
49
+ flow: Optional[str] = None,
50
+ limit: Optional[int] = None,
51
+ ) -> List[RunContext]:
52
+ runs = list(self.runs.values())
53
+ if status:
54
+ runs = [r for r in runs if r.status == status]
55
+ if flow:
56
+ runs = [r for r in runs if r.flow_name == flow]
57
+ runs.sort(key=lambda r: r.start_time or 0, reverse=True)
58
+ if limit:
59
+ runs = runs[:limit]
60
+ return runs
36
61
 
37
62
  def dequeue(self, tags: List[str] = None) -> Optional[RunContext]:
38
63
  # Simple FIFO queue for now. Tags ignored in v0.3.0 MVP.
@@ -53,11 +78,12 @@ class StateStore:
53
78
 
54
79
  return None
55
80
 
56
- def update_run(self, run_id: str, status: RunStatus = None, task_states: Dict = None):
81
+ def update_run(self, run_id: str, status: RunStatus = None, task_states: Dict = None, task_records: Dict = None, logs: List[Dict[str, Any]] = None):
57
82
  run = self.runs.get(run_id)
58
83
  if not run:
59
84
  return
60
-
85
+ previous_status = run.status
86
+
61
87
  if status:
62
88
  # State transition check
63
89
  # If server has CANCELLING, ignore RUNNING from worker
@@ -70,13 +96,124 @@ class StateStore:
70
96
  if not run.end_time:
71
97
  run.end_time = time.time()
72
98
 
99
+ if run.status != previous_status:
100
+ self.metrics.record_status_transition(previous_status, run.status)
101
+
73
102
  if task_states:
74
- run.tasks.update(task_states)
103
+ for name, state in task_states.items():
104
+ run.tasks[name] = TaskState(state) if isinstance(state, str) else state
105
+ if task_records:
106
+ for name, record in task_records.items():
107
+ info = run.ensure_task_record(name)
108
+ state_val = record.get("state")
109
+ if state_val:
110
+ info.state = TaskState(state_val) if isinstance(state_val, str) else state_val
111
+ info.started_at = record.get("started_at", info.started_at)
112
+ info.ended_at = record.get("ended_at", info.ended_at)
113
+ info.duration_ms = record.get("duration_ms", info.duration_ms)
114
+ info.error = record.get("error", info.error)
115
+ info.traceback = record.get("traceback", info.traceback)
116
+ info.inputs = record.get("inputs", info.inputs)
117
+ info.output = record.get("output", info.output)
118
+ info.artifacts = record.get("artifacts", info.artifacts)
119
+ self._record_task_metrics(run, name, info)
120
+ if logs:
121
+ for entry in logs:
122
+ task_name = entry.get("task") or "unknown"
123
+ text = entry.get("text", "")
124
+ encoded_len = len(text.encode("utf-8"))
125
+ current = run.log_bytes.get(task_name, 0)
126
+ if current >= self.log_limit_bytes:
127
+ continue
128
+ if current + encoded_len > self.log_limit_bytes:
129
+ allowed = max(self.log_limit_bytes - current, 0)
130
+ truncated_text = text[:allowed] + "\n[log truncated]"
131
+ entry = dict(entry)
132
+ entry["text"] = truncated_text
133
+ run.log_bytes[task_name] = self.log_limit_bytes
134
+ else:
135
+ run.log_bytes[task_name] = current + encoded_len
136
+ run.logs.append(entry)
137
+ if status in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
138
+ self._enforce_retention()
139
+ if run.end_time and not run.metrics_run_observed:
140
+ self.metrics.record_run_duration(run.flow_name, run.start_time, run.end_time)
141
+ run.metrics_run_observed = True
142
+ if run.status in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
143
+ if run.webhook_notified_status != run.status.value:
144
+ if self.webhook.notify_run(run):
145
+ run.webhook_notified_status = run.status.value
75
146
 
76
147
  def cancel_run(self, run_id: str):
77
148
  run = self.runs.get(run_id)
78
149
  if not run:
79
150
  return
80
-
151
+ previous = run.status
81
152
  if run.status in [RunStatus.PENDING, RunStatus.RUNNING]:
82
153
  run.status = RunStatus.CANCELLING
154
+ if run.status != previous:
155
+ self.metrics.record_status_transition(previous, run.status)
156
+
157
+ def export_run(self, run: RunContext) -> Dict[str, Any]:
158
+ return {
159
+ "run_id": run.run_id,
160
+ "flow_name": run.flow_name,
161
+ "params": run.params,
162
+ "status": run.status.value if hasattr(run.status, "value") else run.status,
163
+ "start_time": run.start_time,
164
+ "end_time": run.end_time,
165
+ "tasks": {name: state.value if hasattr(state, "value") else state for name, state in run.tasks.items()},
166
+ "task_records": run.serialize_task_records(),
167
+ "logs": run.logs,
168
+ "metadata": run.metadata,
169
+ "run_duration_ms": self._run_duration_ms(run),
170
+ "task_summary": self._build_task_summary(run),
171
+ }
172
+
173
+ def _enforce_retention(self):
174
+ removable_ids = [rid for rid in self.history if rid in self.runs]
175
+ while len(self.runs) > self.max_runs and removable_ids:
176
+ run_id = removable_ids.pop(0)
177
+ run = self.runs.get(run_id)
178
+ if not run:
179
+ continue
180
+ if run.status not in [RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED]:
181
+ self.history.append(run_id)
182
+ continue
183
+ self._spill_run(run)
184
+ self.runs.pop(run_id, None)
185
+ if run_id in self.queue:
186
+ self.queue.remove(run_id)
187
+ self.history = [rid for rid in self.history if rid in self.runs]
188
+
189
+ def _spill_run(self, run: RunContext):
190
+ try:
191
+ self.archive_dir.mkdir(parents=True, exist_ok=True)
192
+ path = self.archive_dir / f"{run.run_id}.json"
193
+ with path.open("w", encoding="utf-8") as fp:
194
+ json.dump(self.export_run(run), fp, indent=2)
195
+ except Exception:
196
+ pass
197
+
198
+ def _record_task_metrics(self, run: RunContext, task_name: str, record):
199
+ if task_name in run.metrics_recorded_tasks:
200
+ return
201
+ if record.duration_ms is None or record.ended_at is None:
202
+ return
203
+ self.metrics.record_task_duration(task_name, record.duration_ms)
204
+ run.metrics_recorded_tasks.add(task_name)
205
+
206
+ def _run_duration_ms(self, run: RunContext) -> Optional[float]:
207
+ if run.start_time and run.end_time:
208
+ return (run.end_time - run.start_time) * 1000.0
209
+ return None
210
+
211
+ def _build_task_summary(self, run: RunContext) -> Dict[str, Any]:
212
+ summary: Dict[str, Any] = {}
213
+ for name, record in run.task_records.items():
214
+ summary[name] = {
215
+ "state": record.state.value if hasattr(record.state, "value") else record.state,
216
+ "duration_ms": record.duration_ms,
217
+ "ended_at": record.ended_at,
218
+ }
219
+ return summary
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ from typing import Any, Callable, Dict, Optional
7
+
8
+ import httpx
9
+
10
+ from ..core.models import RunContext, RunStatus
11
+
12
+
13
+ class WebhookNotifier:
14
+ def __init__(self) -> None:
15
+ self.url: Optional[str] = None
16
+ self.timeout: float = 3.0
17
+ self.retries: int = 1
18
+ self.secret: Optional[str] = None
19
+ self.extra_headers: Dict[str, str] = {}
20
+ self._sender: Optional[Callable[[str, Dict[str, Any], Dict[str, str], float], None]] = None
21
+ self.last_error: Optional[str] = None
22
+ self.load_from_env()
23
+
24
+ def load_from_env(self) -> None:
25
+ self.url = os.getenv("PYOCO_WEBHOOK_URL") or None
26
+ self.timeout = float(os.getenv("PYOCO_WEBHOOK_TIMEOUT", "3.0"))
27
+ self.retries = int(os.getenv("PYOCO_WEBHOOK_RETRIES", "1"))
28
+ self.secret = os.getenv("PYOCO_WEBHOOK_SECRET") or None
29
+ self.extra_headers = {}
30
+ self.last_error = None
31
+ self._sender = None
32
+
33
+ def configure(
34
+ self,
35
+ *,
36
+ url: Optional[str] = None,
37
+ timeout: Optional[float] = None,
38
+ retries: Optional[int] = None,
39
+ secret: Optional[str] = None,
40
+ headers: Optional[Dict[str, str]] = None,
41
+ sender: Optional[Callable[[str, Dict[str, Any], Dict[str, str], float], None]] = None,
42
+ ) -> None:
43
+ if url is not None:
44
+ self.url = url
45
+ if timeout is not None:
46
+ self.timeout = timeout
47
+ if retries is not None:
48
+ self.retries = max(1, retries)
49
+ if secret is not None:
50
+ self.secret = secret
51
+ if headers is not None:
52
+ self.extra_headers = dict(headers)
53
+ if sender is not None:
54
+ self._sender = sender
55
+ self.last_error = None
56
+
57
+ def notify_run(self, run: RunContext) -> bool:
58
+ if not self.url:
59
+ return False
60
+ payload = self._build_payload(run)
61
+ sender = self._sender or self._http_sender
62
+ headers = {"Content-Type": "application/json", **self.extra_headers}
63
+ if self.secret:
64
+ headers.setdefault("X-Pyoco-Token", self.secret)
65
+
66
+ last_exc: Optional[Exception] = None
67
+ for attempt in range(self.retries):
68
+ try:
69
+ sender(self.url, payload, headers, self.timeout)
70
+ self.last_error = None
71
+ return True
72
+ except Exception as exc: # pragma: no cover - retries captured via tests
73
+ last_exc = exc
74
+ time.sleep(min(0.5, 0.1 * (attempt + 1)))
75
+ if last_exc:
76
+ self.last_error = str(last_exc)
77
+ return False
78
+
79
+ def _http_sender(
80
+ self,
81
+ url: str,
82
+ payload: Dict[str, Any],
83
+ headers: Dict[str, str],
84
+ timeout: float,
85
+ ) -> None:
86
+ httpx.post(url, json=payload, headers=headers, timeout=timeout)
87
+
88
+ def _build_payload(self, run: RunContext) -> Dict[str, Any]:
89
+ duration_ms = None
90
+ if run.start_time and run.end_time:
91
+ duration_ms = (run.end_time - run.start_time) * 1000.0
92
+ return {
93
+ "event": f"run.{run.status.value.lower()}",
94
+ "run_id": run.run_id,
95
+ "flow_name": run.flow_name,
96
+ "status": run.status.value if isinstance(run.status, RunStatus) else run.status,
97
+ "started_at": run.start_time,
98
+ "ended_at": run.end_time,
99
+ "duration_ms": duration_ms,
100
+ "tasks": run.serialize_task_records(),
101
+ "metadata": run.metadata,
102
+ }
103
+
104
+ def reset(self) -> None:
105
+ self.load_from_env()
106
+
107
+
108
+ webhook_notifier = WebhookNotifier()
@@ -0,0 +1,7 @@
1
+ from pyoco.server.api import store
2
+
3
+
4
+ def reset_store():
5
+ store.runs.clear()
6
+ store.queue.clear()
7
+ store.history.clear()
pyoco/worker/runner.py CHANGED
@@ -21,11 +21,7 @@ class RemoteTraceBackend(TraceBackend):
21
21
  def _send_heartbeat(self, force=False):
22
22
  now = time.time()
23
23
  if force or (now - self.last_heartbeat > self.heartbeat_interval):
24
- cancel = self.client.heartbeat(
25
- self.run_ctx.run_id,
26
- self.run_ctx.tasks,
27
- self.run_ctx.status
28
- )
24
+ cancel = self.client.heartbeat(self.run_ctx)
29
25
  if cancel and self.run_ctx.status not in [RunStatus.CANCELLING, RunStatus.CANCELLED]:
30
26
  print(f"🛑 Cancellation requested from server for run {self.run_ctx.run_id}")
31
27
  self.run_ctx.status = RunStatus.CANCELLING
@@ -162,10 +158,9 @@ class Worker:
162
158
  engine.run(flow, params=params, run_context=run_ctx)
163
159
  print(f"✅ Job {run_id} completed: {run_ctx.status}")
164
160
  # Send final heartbeat
165
- self.client.heartbeat(run_id, run_ctx.tasks, run_ctx.status)
161
+ self.client.heartbeat(run_ctx)
166
162
  except Exception as e:
167
163
  print(f"💥 Job {run_id} failed: {e}")
168
164
  # Heartbeat one last time
169
165
  run_ctx.status = RunStatus.FAILED
170
- self.client.heartbeat(run_id, run_ctx.tasks, run_ctx.status)
171
-
166
+ self.client.heartbeat(run_ctx)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyoco
3
- Version: 0.3.0
3
+ Version: 0.5.1
4
4
  Summary: A workflow engine with sugar syntax
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -8,6 +8,7 @@ Requires-Dist: pyyaml>=6.0.3
8
8
  Requires-Dist: fastapi>=0.100.0
9
9
  Requires-Dist: uvicorn>=0.20.0
10
10
  Requires-Dist: httpx>=0.24.0
11
+ Requires-Dist: prometheus-client>=0.20.0
11
12
 
12
13
  # 🐇 Pyoco
13
14
 
@@ -132,6 +133,20 @@ Or via CLI flag:
132
133
  pyoco run --non-cute ...
133
134
  ```
134
135
 
136
+ ## 🔭 Observability Bridge (v0.5)
137
+
138
+ - `/metrics` exposes Prometheus counters (`pyoco_runs_total`, `pyoco_runs_in_progress`) and histograms (`pyoco_task_duration_seconds`, `pyoco_run_duration_seconds`). Point Grafana/Prometheus at it to watch pipelines without opening sockets.
139
+ - `/runs` now accepts `status`, `flow`, `limit` query params; `/runs/{id}/logs?tail=100` fetches only the latest snippets for dashboards.
140
+ - Webhook notifications fire when runs COMPLETE/FAIL—configure via `PYOCO_WEBHOOK_*` env vars and forward to Slack or your alerting stack.
141
+ - Import `docs/grafana_pyoco_cute.json` for a lavender/orange starter dashboard (3 panels: in-progress count, completion trend, per-flow latency).
142
+ - 詳細な手順は [docs/observability.md](docs/observability.md) を参照してください。
143
+
144
+ ## 🧩 Plug-ins
145
+
146
+ Need to share domain-specific tasks? Publish an entry point under `pyoco.tasks` and pyoco will auto-load it. In v0.5.1 we recommend **Task subclasses first** (callables still work with warnings). See [docs/plugins.md](docs/plugins.md) for examples, quickstart, and `pyoco plugins list` / `pyoco plugins lint`.
147
+
148
+ **Big data note:** pass handles, not copies. For large tensors/images, stash paths or handles in `ctx.artifacts`/`ctx.scratch` and let downstream tasks materialize only when needed. For lazy pipelines (e.g., DataPipe), log the pipeline when you actually iterate (typically the training task) instead of materializing upstream.
149
+
135
150
  ## 📚 Documentation
136
151
 
137
152
  - [Tutorials](docs/tutorial/index.md)
@@ -0,0 +1,33 @@
1
+ pyoco/__init__.py,sha256=E2pgDGvGRSVon7dSqIM4UD55LgVpf4jiZZA-70kOcuw,409
2
+ pyoco/client.py,sha256=Y95NmMsOKTJ9AZJEg_OzHamC_w32YWmSVS653mpqHVQ,3141
3
+ pyoco/socketless_reset.py,sha256=KsAF4I23_Kbhy9fIWFARzV5QaIOQqbl0U0yPb8a34sM,129
4
+ pyoco/cli/entry.py,sha256=zPIG0Gx-cFO8Cf1Z3wD3Ifz_2sHaryHZ6mCRri2WEqE,93
5
+ pyoco/cli/main.py,sha256=LbhgTgRw9Tr_04hiYLqLP64jdnE1RA8B9Rasetgc_MM,18557
6
+ pyoco/core/base_task.py,sha256=z7hOFntAPv4yCADapS-fhtLe5eWqaO8k3T1r05YEEUE,2106
7
+ pyoco/core/context.py,sha256=TeCUriOmg7qZB3nMRu8HPdPshMW6pMVx48xZLY6a-A4,6524
8
+ pyoco/core/engine.py,sha256=iX2Id8ryFt-xeZgraqnF3uqkI6ubiZt5NBNYWX6Qv1s,24166
9
+ pyoco/core/exceptions.py,sha256=G82KY8PCnAhp3IDDIG8--Uh3EfVa192zei3l6ihfShI,565
10
+ pyoco/core/models.py,sha256=8faYURF43-7IebqzTIorHxpCeC4TZfoXWjGyPNaWhyI,10501
11
+ pyoco/discovery/loader.py,sha256=L9Wb2i-d1Hv3EiTFUvuR2mrv7Fc9vt5Bv9ZRuRqAzSg,6132
12
+ pyoco/discovery/plugins.py,sha256=r1KY-OwWXSSe6arVOdfK72pGaI3tpumucg9cXEXA-Z0,4873
13
+ pyoco/dsl/__init__.py,sha256=xWdb60pSRL8lNFk4GHF3EJ4hon0uiWqpv264g6-4gdg,45
14
+ pyoco/dsl/expressions.py,sha256=BtEIxPSf3BU-wPNEicIqX_TVZ4fAnlWGrzrrfc6pU1g,4875
15
+ pyoco/dsl/nodes.py,sha256=qDiIEsAJHnD8dpuOd-Rpy6OORCW6KDW_BdYiA2BKu18,1041
16
+ pyoco/dsl/syntax.py,sha256=kYP5uGbwxmkSd_zeSksax8iWm_7UlRW5JxE9_DoSqbk,8638
17
+ pyoco/dsl/validator.py,sha256=HXjcc-GzjH72YByaNxAg_7YOZsVsFDFnUaenVwd5PbY,3576
18
+ pyoco/schemas/config.py,sha256=KkGZK3GxTHoIHEGb4f4k8GE2W-aBN4iPzmc_HrwuROU,1735
19
+ pyoco/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ pyoco/server/api.py,sha256=vu2ieDZgHbi8cysO2rS-lcxqWiSQprIcqRn6GkwTtKo,3890
21
+ pyoco/server/metrics.py,sha256=92sHZKka_yBNBGlHZgRIteywx97aoTa-MnXh3UJ0HJY,2952
22
+ pyoco/server/models.py,sha256=ir5AuvyXQigmaynA7bS_0RNJcJo2VtpJl0GjRZrj2rU,786
23
+ pyoco/server/store.py,sha256=ITYAV1QlPWDnceywqjjJZW9E0CyocFlPmqqfjcoM-wA,9133
24
+ pyoco/server/webhook.py,sha256=fBSLWTDN7sIWSK0AUVuiCSdVVBFV_AyP-XEKOcdMXmQ,3643
25
+ pyoco/trace/backend.py,sha256=a1css94_lhO4SGSPHZ1f59HJqFQtZ5Sjx09Kw7v5bsk,617
26
+ pyoco/trace/console.py,sha256=I-BcF405OGLWoacJWeke8vTT9M5JxSBpJL-NazVyxb4,1742
27
+ pyoco/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ pyoco/worker/client.py,sha256=862KccXRtfG7zd9ZSLqrpVSV6ev8zeuEHHdtAfLghiM,1557
29
+ pyoco/worker/runner.py,sha256=hyKn5NbuIuF-109CnQbYc8laKbWmwe9ChaLrNUtsVIg,6367
30
+ pyoco-0.5.1.dist-info/METADATA,sha256=JLUsGfujXl71AvCSuKDc52v2FjSxlWcIocGyCCzHnrU,5642
31
+ pyoco-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
+ pyoco-0.5.1.dist-info/top_level.txt,sha256=2JRVocfaWRbX1VJ3zq1c5wQaOK6fMARS6ptVFWyvRF4,6
33
+ pyoco-0.5.1.dist-info/RECORD,,