AbstractRuntime 0.0.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractruntime/__init__.py +104 -2
- abstractruntime/core/__init__.py +26 -0
- abstractruntime/core/config.py +101 -0
- abstractruntime/core/models.py +282 -0
- abstractruntime/core/policy.py +166 -0
- abstractruntime/core/runtime.py +736 -0
- abstractruntime/core/spec.py +53 -0
- abstractruntime/core/vars.py +94 -0
- abstractruntime/identity/__init__.py +7 -0
- abstractruntime/identity/fingerprint.py +57 -0
- abstractruntime/integrations/__init__.py +11 -0
- abstractruntime/integrations/abstractcore/__init__.py +47 -0
- abstractruntime/integrations/abstractcore/effect_handlers.py +119 -0
- abstractruntime/integrations/abstractcore/factory.py +187 -0
- abstractruntime/integrations/abstractcore/llm_client.py +397 -0
- abstractruntime/integrations/abstractcore/logging.py +27 -0
- abstractruntime/integrations/abstractcore/tool_executor.py +168 -0
- abstractruntime/scheduler/__init__.py +13 -0
- abstractruntime/scheduler/convenience.py +324 -0
- abstractruntime/scheduler/registry.py +101 -0
- abstractruntime/scheduler/scheduler.py +431 -0
- abstractruntime/storage/__init__.py +25 -0
- abstractruntime/storage/artifacts.py +519 -0
- abstractruntime/storage/base.py +107 -0
- abstractruntime/storage/in_memory.py +119 -0
- abstractruntime/storage/json_files.py +208 -0
- abstractruntime/storage/ledger_chain.py +153 -0
- abstractruntime/storage/snapshots.py +217 -0
- abstractruntime-0.2.0.dist-info/METADATA +163 -0
- abstractruntime-0.2.0.dist-info/RECORD +32 -0
- {abstractruntime-0.0.0.dist-info → abstractruntime-0.2.0.dist-info}/licenses/LICENSE +3 -1
- abstractruntime-0.0.0.dist-info/METADATA +0 -89
- abstractruntime-0.0.0.dist-info/RECORD +0 -5
- {abstractruntime-0.0.0.dist-info → abstractruntime-0.2.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,736 @@
|
|
|
1
|
+
"""abstractruntime.core.runtime
|
|
2
|
+
|
|
3
|
+
Minimal durable graph runner (v0.1).
|
|
4
|
+
|
|
5
|
+
Key semantics:
|
|
6
|
+
- `tick()` progresses a run until it blocks (WAITING) or completes.
|
|
7
|
+
- Blocking is represented by a persisted WaitState in RunState.
|
|
8
|
+
- `resume()` injects an external payload to unblock a waiting run.
|
|
9
|
+
|
|
10
|
+
Durability note:
|
|
11
|
+
This MVP persists checkpoints + a ledger, but does NOT attempt to implement
|
|
12
|
+
full Temporal-like replay/determinism guarantees.
|
|
13
|
+
|
|
14
|
+
We keep the design explicitly modular:
|
|
15
|
+
- stores: RunStore + LedgerStore
|
|
16
|
+
- effect handlers: pluggable registry
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from typing import Any, Callable, Dict, Optional
|
|
24
|
+
import inspect
|
|
25
|
+
|
|
26
|
+
from .config import RuntimeConfig
|
|
27
|
+
from .models import (
|
|
28
|
+
Effect,
|
|
29
|
+
EffectType,
|
|
30
|
+
LimitWarning,
|
|
31
|
+
RunState,
|
|
32
|
+
RunStatus,
|
|
33
|
+
StepPlan,
|
|
34
|
+
StepRecord,
|
|
35
|
+
StepStatus,
|
|
36
|
+
WaitReason,
|
|
37
|
+
WaitState,
|
|
38
|
+
)
|
|
39
|
+
from .spec import WorkflowSpec
|
|
40
|
+
from .policy import DefaultEffectPolicy, EffectPolicy
|
|
41
|
+
from ..storage.base import LedgerStore, RunStore
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def utc_now_iso() -> str:
|
|
45
|
+
return datetime.now(timezone.utc).isoformat()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class DefaultRunContext:
|
|
50
|
+
def now_iso(self) -> str:
|
|
51
|
+
return utc_now_iso()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# NOTE:
|
|
55
|
+
# Effect handlers are given the node's `next_node` as `default_next_node` so that
|
|
56
|
+
# waiting effects (ask_user / wait_until / tool passthrough) can safely resume
|
|
57
|
+
# into the next node without forcing every node to duplicate `resume_to_node`
|
|
58
|
+
# into the effect payload.
|
|
59
|
+
EffectHandler = Callable[[RunState, Effect, Optional[str]], "EffectOutcome"]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True)
|
|
63
|
+
class EffectOutcome:
|
|
64
|
+
"""Result of executing an effect."""
|
|
65
|
+
|
|
66
|
+
status: str # "completed" | "waiting" | "failed"
|
|
67
|
+
result: Optional[Dict[str, Any]] = None
|
|
68
|
+
wait: Optional[WaitState] = None
|
|
69
|
+
error: Optional[str] = None
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def completed(cls, result: Optional[Dict[str, Any]] = None) -> "EffectOutcome":
|
|
73
|
+
return cls(status="completed", result=result)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def waiting(cls, wait: WaitState) -> "EffectOutcome":
|
|
77
|
+
return cls(status="waiting", wait=wait)
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def failed(cls, error: str) -> "EffectOutcome":
|
|
81
|
+
return cls(status="failed", error=error)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Runtime:
|
|
85
|
+
"""Durable graph runner."""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
*,
|
|
90
|
+
run_store: RunStore,
|
|
91
|
+
ledger_store: LedgerStore,
|
|
92
|
+
effect_handlers: Optional[Dict[EffectType, EffectHandler]] = None,
|
|
93
|
+
context: Optional[Any] = None,
|
|
94
|
+
workflow_registry: Optional[Any] = None,
|
|
95
|
+
artifact_store: Optional[Any] = None,
|
|
96
|
+
effect_policy: Optional[EffectPolicy] = None,
|
|
97
|
+
config: Optional[RuntimeConfig] = None,
|
|
98
|
+
):
|
|
99
|
+
self._run_store = run_store
|
|
100
|
+
self._ledger_store = ledger_store
|
|
101
|
+
self._ctx = context or DefaultRunContext()
|
|
102
|
+
self._workflow_registry = workflow_registry
|
|
103
|
+
self._artifact_store = artifact_store
|
|
104
|
+
self._effect_policy: EffectPolicy = effect_policy or DefaultEffectPolicy()
|
|
105
|
+
self._config: RuntimeConfig = config or RuntimeConfig()
|
|
106
|
+
|
|
107
|
+
self._handlers: Dict[EffectType, EffectHandler] = {}
|
|
108
|
+
self._register_builtin_handlers()
|
|
109
|
+
if effect_handlers:
|
|
110
|
+
self._handlers.update(effect_handlers)
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------
|
|
113
|
+
# Public API
|
|
114
|
+
# ---------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def run_store(self) -> RunStore:
|
|
118
|
+
"""Access the run store."""
|
|
119
|
+
return self._run_store
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def ledger_store(self) -> LedgerStore:
|
|
123
|
+
"""Access the ledger store."""
|
|
124
|
+
return self._ledger_store
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def workflow_registry(self) -> Optional[Any]:
|
|
128
|
+
"""Access the workflow registry (if set)."""
|
|
129
|
+
return self._workflow_registry
|
|
130
|
+
|
|
131
|
+
def set_workflow_registry(self, registry: Any) -> None:
|
|
132
|
+
"""Set the workflow registry for subworkflow support."""
|
|
133
|
+
self._workflow_registry = registry
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def artifact_store(self) -> Optional[Any]:
|
|
137
|
+
"""Access the artifact store (if set)."""
|
|
138
|
+
return self._artifact_store
|
|
139
|
+
|
|
140
|
+
def set_artifact_store(self, store: Any) -> None:
|
|
141
|
+
"""Set the artifact store for large payload support."""
|
|
142
|
+
self._artifact_store = store
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def effect_policy(self) -> EffectPolicy:
|
|
146
|
+
"""Access the effect policy."""
|
|
147
|
+
return self._effect_policy
|
|
148
|
+
|
|
149
|
+
def set_effect_policy(self, policy: EffectPolicy) -> None:
|
|
150
|
+
"""Set the effect policy for retry and idempotency."""
|
|
151
|
+
self._effect_policy = policy
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def config(self) -> RuntimeConfig:
|
|
155
|
+
"""Access the runtime configuration."""
|
|
156
|
+
return self._config
|
|
157
|
+
|
|
158
|
+
def start(
|
|
159
|
+
self,
|
|
160
|
+
*,
|
|
161
|
+
workflow: WorkflowSpec,
|
|
162
|
+
vars: Optional[Dict[str, Any]] = None,
|
|
163
|
+
actor_id: Optional[str] = None,
|
|
164
|
+
session_id: Optional[str] = None,
|
|
165
|
+
parent_run_id: Optional[str] = None,
|
|
166
|
+
) -> str:
|
|
167
|
+
# Initialize vars with _limits from config if not already set
|
|
168
|
+
vars = dict(vars or {})
|
|
169
|
+
if "_limits" not in vars:
|
|
170
|
+
vars["_limits"] = self._config.to_limits_dict()
|
|
171
|
+
|
|
172
|
+
run = RunState.new(
|
|
173
|
+
workflow_id=workflow.workflow_id,
|
|
174
|
+
entry_node=workflow.entry_node,
|
|
175
|
+
vars=vars,
|
|
176
|
+
actor_id=actor_id,
|
|
177
|
+
session_id=session_id,
|
|
178
|
+
parent_run_id=parent_run_id,
|
|
179
|
+
)
|
|
180
|
+
self._run_store.save(run)
|
|
181
|
+
return run.run_id
|
|
182
|
+
|
|
183
|
+
def cancel_run(self, run_id: str, *, reason: Optional[str] = None) -> RunState:
|
|
184
|
+
"""Cancel a run.
|
|
185
|
+
|
|
186
|
+
Sets the run status to CANCELLED. Only RUNNING or WAITING runs can be cancelled.
|
|
187
|
+
COMPLETED, FAILED, or already CANCELLED runs are returned unchanged.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
run_id: The run to cancel.
|
|
191
|
+
reason: Optional cancellation reason (stored in error field).
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
The updated RunState.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
KeyError: If run_id not found.
|
|
198
|
+
"""
|
|
199
|
+
run = self.get_state(run_id)
|
|
200
|
+
|
|
201
|
+
# Terminal states cannot be cancelled
|
|
202
|
+
if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
|
|
203
|
+
return run
|
|
204
|
+
|
|
205
|
+
run.status = RunStatus.CANCELLED
|
|
206
|
+
run.error = reason or "Cancelled"
|
|
207
|
+
run.waiting = None
|
|
208
|
+
run.updated_at = utc_now_iso()
|
|
209
|
+
self._run_store.save(run)
|
|
210
|
+
return run
|
|
211
|
+
|
|
212
|
+
def get_state(self, run_id: str) -> RunState:
|
|
213
|
+
run = self._run_store.load(run_id)
|
|
214
|
+
if run is None:
|
|
215
|
+
raise KeyError(f"Unknown run_id: {run_id}")
|
|
216
|
+
return run
|
|
217
|
+
|
|
218
|
+
def get_ledger(self, run_id: str) -> list[dict[str, Any]]:
|
|
219
|
+
return self._ledger_store.list(run_id)
|
|
220
|
+
|
|
221
|
+
# ---------------------------------------------------------------------
|
|
222
|
+
# Limit Management
|
|
223
|
+
# ---------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def get_limit_status(self, run_id: str) -> Dict[str, Any]:
|
|
226
|
+
"""Get current limit status for a run.
|
|
227
|
+
|
|
228
|
+
Returns a structured dict with information about iterations, tokens,
|
|
229
|
+
and history limits, including whether warning thresholds are reached.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
run_id: The run to check
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Dict with "iterations", "tokens", and "history" status info
|
|
236
|
+
|
|
237
|
+
Raises:
|
|
238
|
+
KeyError: If run_id not found
|
|
239
|
+
"""
|
|
240
|
+
run = self.get_state(run_id)
|
|
241
|
+
limits = run.vars.get("_limits", {})
|
|
242
|
+
|
|
243
|
+
def pct(current: int, maximum: int) -> float:
|
|
244
|
+
return round(current / maximum * 100, 1) if maximum > 0 else 0
|
|
245
|
+
|
|
246
|
+
current_iter = int(limits.get("current_iteration", 0) or 0)
|
|
247
|
+
max_iter = int(limits.get("max_iterations", 25) or 25)
|
|
248
|
+
tokens_used = int(limits.get("estimated_tokens_used", 0) or 0)
|
|
249
|
+
max_tokens = int(limits.get("max_tokens", 32768) or 32768)
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
"iterations": {
|
|
253
|
+
"current": current_iter,
|
|
254
|
+
"max": max_iter,
|
|
255
|
+
"pct": pct(current_iter, max_iter),
|
|
256
|
+
"warning": pct(current_iter, max_iter) >= limits.get("warn_iterations_pct", 80),
|
|
257
|
+
},
|
|
258
|
+
"tokens": {
|
|
259
|
+
"estimated_used": tokens_used,
|
|
260
|
+
"max": max_tokens,
|
|
261
|
+
"pct": pct(tokens_used, max_tokens),
|
|
262
|
+
"warning": pct(tokens_used, max_tokens) >= limits.get("warn_tokens_pct", 80),
|
|
263
|
+
},
|
|
264
|
+
"history": {
|
|
265
|
+
"max_messages": limits.get("max_history_messages", -1),
|
|
266
|
+
},
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
def check_limits(self, run: RunState) -> list[LimitWarning]:
|
|
270
|
+
"""Check if any limits are approaching or exceeded.
|
|
271
|
+
|
|
272
|
+
This is the hybrid enforcement model: the runtime provides warnings,
|
|
273
|
+
workflow nodes are responsible for enforcement decisions.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
run: The RunState to check
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
List of LimitWarning objects for any limits at warning threshold or exceeded
|
|
280
|
+
"""
|
|
281
|
+
warnings: list[LimitWarning] = []
|
|
282
|
+
limits = run.vars.get("_limits", {})
|
|
283
|
+
|
|
284
|
+
# Check iterations
|
|
285
|
+
current = int(limits.get("current_iteration", 0) or 0)
|
|
286
|
+
max_iter = int(limits.get("max_iterations", 25) or 25)
|
|
287
|
+
warn_pct = int(limits.get("warn_iterations_pct", 80) or 80)
|
|
288
|
+
|
|
289
|
+
if max_iter > 0:
|
|
290
|
+
if current >= max_iter:
|
|
291
|
+
warnings.append(LimitWarning("iterations", "exceeded", current, max_iter))
|
|
292
|
+
elif (current / max_iter * 100) >= warn_pct:
|
|
293
|
+
warnings.append(LimitWarning("iterations", "warning", current, max_iter))
|
|
294
|
+
|
|
295
|
+
# Check tokens
|
|
296
|
+
tokens_used = int(limits.get("estimated_tokens_used", 0) or 0)
|
|
297
|
+
max_tokens = int(limits.get("max_tokens", 32768) or 32768)
|
|
298
|
+
warn_tokens_pct = int(limits.get("warn_tokens_pct", 80) or 80)
|
|
299
|
+
|
|
300
|
+
if max_tokens > 0 and tokens_used > 0:
|
|
301
|
+
if tokens_used >= max_tokens:
|
|
302
|
+
warnings.append(LimitWarning("tokens", "exceeded", tokens_used, max_tokens))
|
|
303
|
+
elif (tokens_used / max_tokens * 100) >= warn_tokens_pct:
|
|
304
|
+
warnings.append(LimitWarning("tokens", "warning", tokens_used, max_tokens))
|
|
305
|
+
|
|
306
|
+
return warnings
|
|
307
|
+
|
|
308
|
+
def update_limits(self, run_id: str, updates: Dict[str, Any]) -> None:
|
|
309
|
+
"""Update limits for a running workflow.
|
|
310
|
+
|
|
311
|
+
This allows mid-session updates (e.g., from /max-tokens command).
|
|
312
|
+
Only allowed limit keys are updated; unknown keys are ignored.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
run_id: The run to update
|
|
316
|
+
updates: Dict of limit updates (e.g., {"max_tokens": 65536})
|
|
317
|
+
|
|
318
|
+
Raises:
|
|
319
|
+
KeyError: If run_id not found
|
|
320
|
+
"""
|
|
321
|
+
run = self.get_state(run_id)
|
|
322
|
+
limits = run.vars.setdefault("_limits", {})
|
|
323
|
+
|
|
324
|
+
allowed_keys = {
|
|
325
|
+
"max_iterations",
|
|
326
|
+
"max_tokens",
|
|
327
|
+
"max_output_tokens",
|
|
328
|
+
"max_history_messages",
|
|
329
|
+
"warn_iterations_pct",
|
|
330
|
+
"warn_tokens_pct",
|
|
331
|
+
"estimated_tokens_used",
|
|
332
|
+
"current_iteration",
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
for key, value in updates.items():
|
|
336
|
+
if key in allowed_keys:
|
|
337
|
+
limits[key] = value
|
|
338
|
+
|
|
339
|
+
self._run_store.save(run)
|
|
340
|
+
|
|
341
|
+
def tick(self, *, workflow: WorkflowSpec, run_id: str, max_steps: int = 100) -> RunState:
|
|
342
|
+
run = self.get_state(run_id)
|
|
343
|
+
if run.status in (RunStatus.COMPLETED, RunStatus.FAILED):
|
|
344
|
+
return run
|
|
345
|
+
if run.status == RunStatus.WAITING:
|
|
346
|
+
# For WAIT_UNTIL we can auto-unblock if time passed
|
|
347
|
+
if run.waiting and run.waiting.reason == WaitReason.UNTIL and run.waiting.until:
|
|
348
|
+
if utc_now_iso() >= run.waiting.until:
|
|
349
|
+
self._apply_resume_payload(run, payload={}, override_node=run.waiting.resume_to_node)
|
|
350
|
+
else:
|
|
351
|
+
return run
|
|
352
|
+
else:
|
|
353
|
+
return run
|
|
354
|
+
|
|
355
|
+
steps = 0
|
|
356
|
+
while steps < max_steps:
|
|
357
|
+
steps += 1
|
|
358
|
+
|
|
359
|
+
handler = workflow.get_node(run.current_node)
|
|
360
|
+
plan = handler(run, self._ctx)
|
|
361
|
+
|
|
362
|
+
# Completion
|
|
363
|
+
if plan.complete_output is not None:
|
|
364
|
+
run.status = RunStatus.COMPLETED
|
|
365
|
+
run.output = plan.complete_output
|
|
366
|
+
run.updated_at = utc_now_iso()
|
|
367
|
+
self._run_store.save(run)
|
|
368
|
+
# ledger: completion record (no effect)
|
|
369
|
+
rec = StepRecord.start(run=run, node_id=plan.node_id, effect=None)
|
|
370
|
+
rec.status = StepStatus.COMPLETED
|
|
371
|
+
rec.result = {"completed": True}
|
|
372
|
+
rec.ended_at = utc_now_iso()
|
|
373
|
+
self._ledger_store.append(rec)
|
|
374
|
+
return run
|
|
375
|
+
|
|
376
|
+
# Pure transition
|
|
377
|
+
if plan.effect is None:
|
|
378
|
+
if not plan.next_node:
|
|
379
|
+
raise ValueError(f"Node '{plan.node_id}' returned no effect and no next_node")
|
|
380
|
+
run.current_node = plan.next_node
|
|
381
|
+
run.updated_at = utc_now_iso()
|
|
382
|
+
self._run_store.save(run)
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
# Effectful step - check for prior completed result (idempotency)
|
|
386
|
+
idempotency_key = self._effect_policy.idempotency_key(
|
|
387
|
+
run=run, node_id=plan.node_id, effect=plan.effect
|
|
388
|
+
)
|
|
389
|
+
prior_result = self._find_prior_completed_result(run.run_id, idempotency_key)
|
|
390
|
+
|
|
391
|
+
if prior_result is not None:
|
|
392
|
+
# Reuse prior result - skip re-execution
|
|
393
|
+
outcome = EffectOutcome.completed(prior_result)
|
|
394
|
+
else:
|
|
395
|
+
# Execute with retry logic
|
|
396
|
+
outcome = self._execute_effect_with_retry(
|
|
397
|
+
run=run,
|
|
398
|
+
node_id=plan.node_id,
|
|
399
|
+
effect=plan.effect,
|
|
400
|
+
idempotency_key=idempotency_key,
|
|
401
|
+
default_next_node=plan.next_node,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
if outcome.status == "failed":
|
|
405
|
+
run.status = RunStatus.FAILED
|
|
406
|
+
run.error = outcome.error or "unknown error"
|
|
407
|
+
run.updated_at = utc_now_iso()
|
|
408
|
+
self._run_store.save(run)
|
|
409
|
+
return run
|
|
410
|
+
|
|
411
|
+
if outcome.status == "waiting":
|
|
412
|
+
assert outcome.wait is not None
|
|
413
|
+
run.status = RunStatus.WAITING
|
|
414
|
+
run.waiting = outcome.wait
|
|
415
|
+
run.updated_at = utc_now_iso()
|
|
416
|
+
self._run_store.save(run)
|
|
417
|
+
return run
|
|
418
|
+
|
|
419
|
+
# completed
|
|
420
|
+
if plan.effect.result_key and outcome.result is not None:
|
|
421
|
+
_set_nested(run.vars, plan.effect.result_key, outcome.result)
|
|
422
|
+
|
|
423
|
+
if not plan.next_node:
|
|
424
|
+
raise ValueError(f"Node '{plan.node_id}' executed effect but did not specify next_node")
|
|
425
|
+
run.current_node = plan.next_node
|
|
426
|
+
run.updated_at = utc_now_iso()
|
|
427
|
+
self._run_store.save(run)
|
|
428
|
+
|
|
429
|
+
return run
|
|
430
|
+
|
|
431
|
+
def resume(self, *, workflow: WorkflowSpec, run_id: str, wait_key: Optional[str], payload: Dict[str, Any]) -> RunState:
|
|
432
|
+
run = self.get_state(run_id)
|
|
433
|
+
if run.status != RunStatus.WAITING or run.waiting is None:
|
|
434
|
+
raise ValueError("Run is not waiting")
|
|
435
|
+
|
|
436
|
+
# Validate wait_key if provided
|
|
437
|
+
if wait_key is not None and run.waiting.wait_key is not None and wait_key != run.waiting.wait_key:
|
|
438
|
+
raise ValueError(f"wait_key mismatch: expected '{run.waiting.wait_key}', got '{wait_key}'")
|
|
439
|
+
|
|
440
|
+
resume_to = run.waiting.resume_to_node
|
|
441
|
+
result_key = run.waiting.result_key
|
|
442
|
+
|
|
443
|
+
if result_key:
|
|
444
|
+
_set_nested(run.vars, result_key, payload)
|
|
445
|
+
|
|
446
|
+
self._apply_resume_payload(run, payload=payload, override_node=resume_to)
|
|
447
|
+
run.updated_at = utc_now_iso()
|
|
448
|
+
self._run_store.save(run)
|
|
449
|
+
|
|
450
|
+
return self.tick(workflow=workflow, run_id=run_id)
|
|
451
|
+
|
|
452
|
+
# ---------------------------------------------------------------------
|
|
453
|
+
# Internals
|
|
454
|
+
# ---------------------------------------------------------------------
|
|
455
|
+
|
|
456
|
+
def _register_builtin_handlers(self) -> None:
|
|
457
|
+
self._handlers[EffectType.WAIT_EVENT] = self._handle_wait_event
|
|
458
|
+
self._handlers[EffectType.WAIT_UNTIL] = self._handle_wait_until
|
|
459
|
+
self._handlers[EffectType.ASK_USER] = self._handle_ask_user
|
|
460
|
+
self._handlers[EffectType.START_SUBWORKFLOW] = self._handle_start_subworkflow
|
|
461
|
+
|
|
462
|
+
def _find_prior_completed_result(
|
|
463
|
+
self, run_id: str, idempotency_key: str
|
|
464
|
+
) -> Optional[Dict[str, Any]]:
|
|
465
|
+
"""Find a prior completed result for an idempotency key.
|
|
466
|
+
|
|
467
|
+
Scans the ledger for a completed step with the same idempotency key.
|
|
468
|
+
Returns the result if found, None otherwise.
|
|
469
|
+
"""
|
|
470
|
+
records = self._ledger_store.list(run_id)
|
|
471
|
+
for record in records:
|
|
472
|
+
if record.get("idempotency_key") == idempotency_key:
|
|
473
|
+
if record.get("status") == StepStatus.COMPLETED.value:
|
|
474
|
+
return record.get("result")
|
|
475
|
+
return None
|
|
476
|
+
|
|
477
|
+
def _execute_effect_with_retry(
|
|
478
|
+
self,
|
|
479
|
+
*,
|
|
480
|
+
run: RunState,
|
|
481
|
+
node_id: str,
|
|
482
|
+
effect: Effect,
|
|
483
|
+
idempotency_key: str,
|
|
484
|
+
default_next_node: Optional[str],
|
|
485
|
+
) -> EffectOutcome:
|
|
486
|
+
"""Execute an effect with retry logic.
|
|
487
|
+
|
|
488
|
+
Retries according to the effect policy. Records each attempt
|
|
489
|
+
in the ledger with attempt number and idempotency key.
|
|
490
|
+
"""
|
|
491
|
+
import time
|
|
492
|
+
|
|
493
|
+
max_attempts = self._effect_policy.max_attempts(effect)
|
|
494
|
+
last_error: Optional[str] = None
|
|
495
|
+
|
|
496
|
+
for attempt in range(1, max_attempts + 1):
|
|
497
|
+
# Record attempt start
|
|
498
|
+
rec = StepRecord.start(
|
|
499
|
+
run=run,
|
|
500
|
+
node_id=node_id,
|
|
501
|
+
effect=effect,
|
|
502
|
+
attempt=attempt,
|
|
503
|
+
idempotency_key=idempotency_key,
|
|
504
|
+
)
|
|
505
|
+
self._ledger_store.append(rec)
|
|
506
|
+
|
|
507
|
+
# Execute the effect (catch exceptions as failures)
|
|
508
|
+
try:
|
|
509
|
+
outcome = self._execute_effect(run, effect, default_next_node)
|
|
510
|
+
except Exception as e:
|
|
511
|
+
outcome = EffectOutcome.failed(f"Effect handler raised exception: {e}")
|
|
512
|
+
|
|
513
|
+
if outcome.status == "completed":
|
|
514
|
+
rec.finish_success(outcome.result)
|
|
515
|
+
self._ledger_store.append(rec)
|
|
516
|
+
return outcome
|
|
517
|
+
|
|
518
|
+
if outcome.status == "waiting":
|
|
519
|
+
rec.finish_waiting(outcome.wait)
|
|
520
|
+
self._ledger_store.append(rec)
|
|
521
|
+
return outcome
|
|
522
|
+
|
|
523
|
+
# Failed - record and maybe retry
|
|
524
|
+
last_error = outcome.error or "unknown error"
|
|
525
|
+
rec.finish_failure(last_error)
|
|
526
|
+
self._ledger_store.append(rec)
|
|
527
|
+
|
|
528
|
+
if attempt < max_attempts:
|
|
529
|
+
# Wait before retry
|
|
530
|
+
backoff = self._effect_policy.backoff_seconds(
|
|
531
|
+
effect=effect, attempt=attempt
|
|
532
|
+
)
|
|
533
|
+
if backoff > 0:
|
|
534
|
+
time.sleep(backoff)
|
|
535
|
+
|
|
536
|
+
# All attempts exhausted
|
|
537
|
+
return EffectOutcome.failed(
|
|
538
|
+
f"Effect failed after {max_attempts} attempts: {last_error}"
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
def _execute_effect(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
|
|
542
|
+
if effect.type not in self._handlers:
|
|
543
|
+
return EffectOutcome.failed(f"No effect handler registered for {effect.type.value}")
|
|
544
|
+
handler = self._handlers[effect.type]
|
|
545
|
+
|
|
546
|
+
# Backward compatibility: allow older handlers with signature (run, effect).
|
|
547
|
+
# New handlers can accept (run, effect, default_next_node) to implement
|
|
548
|
+
# correct resume semantics for waiting effects without duplicating payload fields.
|
|
549
|
+
try:
|
|
550
|
+
sig = inspect.signature(handler)
|
|
551
|
+
except (TypeError, ValueError):
|
|
552
|
+
sig = None
|
|
553
|
+
|
|
554
|
+
if sig is not None:
|
|
555
|
+
params = list(sig.parameters.values())
|
|
556
|
+
has_varargs = any(p.kind == inspect.Parameter.VAR_POSITIONAL for p in params)
|
|
557
|
+
if has_varargs or len(params) >= 3:
|
|
558
|
+
return handler(run, effect, default_next_node)
|
|
559
|
+
return handler(run, effect)
|
|
560
|
+
|
|
561
|
+
# If signature inspection fails, fall back to attempting the new call form,
|
|
562
|
+
# then the legacy form (only for arity-mismatch TypeError).
|
|
563
|
+
try:
|
|
564
|
+
return handler(run, effect, default_next_node)
|
|
565
|
+
except TypeError as e:
|
|
566
|
+
msg = str(e)
|
|
567
|
+
if "positional" in msg and "argument" in msg and ("given" in msg or "required" in msg):
|
|
568
|
+
return handler(run, effect)
|
|
569
|
+
raise
|
|
570
|
+
|
|
571
|
+
def _apply_resume_payload(self, run: RunState, *, payload: Dict[str, Any], override_node: Optional[str]) -> None:
|
|
572
|
+
run.status = RunStatus.RUNNING
|
|
573
|
+
run.waiting = None
|
|
574
|
+
if override_node:
|
|
575
|
+
run.current_node = override_node
|
|
576
|
+
|
|
577
|
+
# Built-in wait handlers ------------------------------------------------
|
|
578
|
+
|
|
579
|
+
def _handle_wait_event(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
|
|
580
|
+
wait_key = effect.payload.get("wait_key")
|
|
581
|
+
if not wait_key:
|
|
582
|
+
return EffectOutcome.failed("wait_event requires payload.wait_key")
|
|
583
|
+
resume_to = effect.payload.get("resume_to_node") or default_next_node
|
|
584
|
+
wait = WaitState(
|
|
585
|
+
reason=WaitReason.EVENT,
|
|
586
|
+
wait_key=str(wait_key),
|
|
587
|
+
resume_to_node=resume_to,
|
|
588
|
+
result_key=effect.result_key,
|
|
589
|
+
)
|
|
590
|
+
return EffectOutcome.waiting(wait)
|
|
591
|
+
|
|
592
|
+
def _handle_wait_until(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
|
|
593
|
+
until = effect.payload.get("until")
|
|
594
|
+
if not until:
|
|
595
|
+
return EffectOutcome.failed("wait_until requires payload.until (ISO timestamp)")
|
|
596
|
+
|
|
597
|
+
resume_to = effect.payload.get("resume_to_node") or default_next_node
|
|
598
|
+
if utc_now_iso() >= str(until):
|
|
599
|
+
# immediate
|
|
600
|
+
return EffectOutcome.completed({"until": str(until), "ready": True})
|
|
601
|
+
|
|
602
|
+
wait = WaitState(
|
|
603
|
+
reason=WaitReason.UNTIL,
|
|
604
|
+
until=str(until),
|
|
605
|
+
resume_to_node=resume_to,
|
|
606
|
+
result_key=effect.result_key,
|
|
607
|
+
)
|
|
608
|
+
return EffectOutcome.waiting(wait)
|
|
609
|
+
|
|
610
|
+
def _handle_ask_user(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
|
|
611
|
+
prompt = effect.payload.get("prompt")
|
|
612
|
+
if not prompt:
|
|
613
|
+
return EffectOutcome.failed("ask_user requires payload.prompt")
|
|
614
|
+
|
|
615
|
+
resume_to = effect.payload.get("resume_to_node") or default_next_node
|
|
616
|
+
wait_key = effect.payload.get("wait_key") or f"user:{run.run_id}:{run.current_node}"
|
|
617
|
+
choices = effect.payload.get("choices")
|
|
618
|
+
allow_free_text = bool(effect.payload.get("allow_free_text", True))
|
|
619
|
+
|
|
620
|
+
wait = WaitState(
|
|
621
|
+
reason=WaitReason.USER,
|
|
622
|
+
wait_key=str(wait_key),
|
|
623
|
+
resume_to_node=resume_to,
|
|
624
|
+
result_key=effect.result_key,
|
|
625
|
+
prompt=str(prompt),
|
|
626
|
+
choices=list(choices) if isinstance(choices, list) else None,
|
|
627
|
+
allow_free_text=allow_free_text,
|
|
628
|
+
)
|
|
629
|
+
return EffectOutcome.waiting(wait)
|
|
630
|
+
|
|
631
|
+
def _handle_start_subworkflow(
|
|
632
|
+
self, run: RunState, effect: Effect, default_next_node: Optional[str]
|
|
633
|
+
) -> EffectOutcome:
|
|
634
|
+
"""Handle START_SUBWORKFLOW effect.
|
|
635
|
+
|
|
636
|
+
Payload:
|
|
637
|
+
workflow_id: str - ID of the subworkflow to start (required)
|
|
638
|
+
vars: dict - Initial variables for the subworkflow (optional)
|
|
639
|
+
async: bool - If True, don't wait for completion (optional, default False)
|
|
640
|
+
|
|
641
|
+
Sync mode (async=False):
|
|
642
|
+
- Starts the subworkflow and runs it until completion or waiting
|
|
643
|
+
- If subworkflow completes: returns its output
|
|
644
|
+
- If subworkflow waits: parent also waits (WaitReason.SUBWORKFLOW)
|
|
645
|
+
|
|
646
|
+
Async mode (async=True):
|
|
647
|
+
- Starts the subworkflow and returns immediately
|
|
648
|
+
- Returns {"sub_run_id": "..."} so parent can track it
|
|
649
|
+
"""
|
|
650
|
+
workflow_id = effect.payload.get("workflow_id")
|
|
651
|
+
if not workflow_id:
|
|
652
|
+
return EffectOutcome.failed("start_subworkflow requires payload.workflow_id")
|
|
653
|
+
|
|
654
|
+
if self._workflow_registry is None:
|
|
655
|
+
return EffectOutcome.failed(
|
|
656
|
+
"start_subworkflow requires a workflow_registry. "
|
|
657
|
+
"Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)"
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
# Look up the subworkflow
|
|
661
|
+
sub_workflow = self._workflow_registry.get(workflow_id)
|
|
662
|
+
if sub_workflow is None:
|
|
663
|
+
return EffectOutcome.failed(f"Workflow '{workflow_id}' not found in registry")
|
|
664
|
+
|
|
665
|
+
sub_vars = effect.payload.get("vars") or {}
|
|
666
|
+
is_async = bool(effect.payload.get("async", False))
|
|
667
|
+
resume_to = effect.payload.get("resume_to_node") or default_next_node
|
|
668
|
+
|
|
669
|
+
# Start the subworkflow with parent tracking
|
|
670
|
+
sub_run_id = self.start(
|
|
671
|
+
workflow=sub_workflow,
|
|
672
|
+
vars=sub_vars,
|
|
673
|
+
actor_id=run.actor_id, # Inherit actor from parent
|
|
674
|
+
session_id=getattr(run, "session_id", None), # Inherit session from parent
|
|
675
|
+
parent_run_id=run.run_id, # Track parent for hierarchy
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
if is_async:
|
|
679
|
+
# Async mode: return immediately with sub_run_id
|
|
680
|
+
# The child is started but not ticked - caller is responsible for driving it
|
|
681
|
+
return EffectOutcome.completed({"sub_run_id": sub_run_id, "async": True})
|
|
682
|
+
|
|
683
|
+
# Sync mode: run the subworkflow until completion or waiting
|
|
684
|
+
try:
|
|
685
|
+
sub_state = self.tick(workflow=sub_workflow, run_id=sub_run_id)
|
|
686
|
+
except Exception as e:
|
|
687
|
+
# Child raised an exception - propagate as failure
|
|
688
|
+
return EffectOutcome.failed(f"Subworkflow '{workflow_id}' failed: {e}")
|
|
689
|
+
|
|
690
|
+
if sub_state.status == RunStatus.COMPLETED:
|
|
691
|
+
# Subworkflow completed - return its output
|
|
692
|
+
return EffectOutcome.completed({
|
|
693
|
+
"sub_run_id": sub_run_id,
|
|
694
|
+
"output": sub_state.output,
|
|
695
|
+
})
|
|
696
|
+
|
|
697
|
+
if sub_state.status == RunStatus.FAILED:
|
|
698
|
+
# Subworkflow failed - propagate error
|
|
699
|
+
return EffectOutcome.failed(
|
|
700
|
+
f"Subworkflow '{workflow_id}' failed: {sub_state.error}"
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
if sub_state.status == RunStatus.WAITING:
|
|
704
|
+
# Subworkflow is waiting - parent must also wait
|
|
705
|
+
wait = WaitState(
|
|
706
|
+
reason=WaitReason.SUBWORKFLOW,
|
|
707
|
+
wait_key=f"subworkflow:{sub_run_id}",
|
|
708
|
+
resume_to_node=resume_to,
|
|
709
|
+
result_key=effect.result_key,
|
|
710
|
+
details={
|
|
711
|
+
"sub_run_id": sub_run_id,
|
|
712
|
+
"sub_workflow_id": workflow_id,
|
|
713
|
+
"sub_waiting": {
|
|
714
|
+
"reason": sub_state.waiting.reason.value if sub_state.waiting else None,
|
|
715
|
+
"wait_key": sub_state.waiting.wait_key if sub_state.waiting else None,
|
|
716
|
+
},
|
|
717
|
+
},
|
|
718
|
+
)
|
|
719
|
+
return EffectOutcome.waiting(wait)
|
|
720
|
+
|
|
721
|
+
# Unexpected status
|
|
722
|
+
return EffectOutcome.failed(f"Unexpected subworkflow status: {sub_state.status.value}")
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def _set_nested(target: Dict[str, Any], dotted_key: str, value: Any) -> None:
|
|
726
|
+
"""Set nested dict value using dot notation."""
|
|
727
|
+
|
|
728
|
+
parts = dotted_key.split(".")
|
|
729
|
+
cur: Dict[str, Any] = target
|
|
730
|
+
for p in parts[:-1]:
|
|
731
|
+
nxt = cur.get(p)
|
|
732
|
+
if not isinstance(nxt, dict):
|
|
733
|
+
nxt = {}
|
|
734
|
+
cur[p] = nxt
|
|
735
|
+
cur = nxt
|
|
736
|
+
cur[parts[-1]] = value
|