agent-runtime-core 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_runtime_core/__init__.py +19 -1
- agent_runtime_core/interfaces.py +8 -0
- agent_runtime_core/persistence/__init__.py +44 -8
- agent_runtime_core/persistence/base.py +417 -12
- agent_runtime_core/persistence/manager.py +120 -12
- agent_runtime_core/steps.py +373 -0
- agent_runtime_core-0.5.0.dist-info/METADATA +863 -0
- {agent_runtime_core-0.3.0.dist-info → agent_runtime_core-0.5.0.dist-info}/RECORD +10 -9
- agent_runtime_core-0.3.0.dist-info/METADATA +0 -461
- {agent_runtime_core-0.3.0.dist-info → agent_runtime_core-0.5.0.dist-info}/WHEEL +0 -0
- {agent_runtime_core-0.3.0.dist-info → agent_runtime_core-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,6 +8,16 @@ For Django integration, you can either:
|
|
|
8
8
|
1. Pass pre-instantiated store instances
|
|
9
9
|
2. Pass store classes with appropriate kwargs
|
|
10
10
|
3. Use factory functions for request-scoped stores
|
|
11
|
+
|
|
12
|
+
Core stores (always available):
|
|
13
|
+
- MemoryStore: Key-value storage
|
|
14
|
+
- ConversationStore: Conversation history
|
|
15
|
+
- TaskStore: Task lists and progress
|
|
16
|
+
- PreferencesStore: User/agent configuration
|
|
17
|
+
|
|
18
|
+
Optional stores (must be explicitly configured):
|
|
19
|
+
- KnowledgeStore: Facts, summaries, embeddings
|
|
20
|
+
- AuditStore: Logs, errors, metrics
|
|
11
21
|
"""
|
|
12
22
|
|
|
13
23
|
from dataclasses import dataclass, field
|
|
@@ -19,6 +29,8 @@ from agent_runtime_core.persistence.base import (
|
|
|
19
29
|
ConversationStore,
|
|
20
30
|
TaskStore,
|
|
21
31
|
PreferencesStore,
|
|
32
|
+
KnowledgeStore,
|
|
33
|
+
AuditStore,
|
|
22
34
|
Scope,
|
|
23
35
|
)
|
|
24
36
|
from agent_runtime_core.persistence.file import (
|
|
@@ -34,6 +46,8 @@ MemoryStoreFactory = Callable[[], MemoryStore]
|
|
|
34
46
|
ConversationStoreFactory = Callable[[], ConversationStore]
|
|
35
47
|
TaskStoreFactory = Callable[[], TaskStore]
|
|
36
48
|
PreferencesStoreFactory = Callable[[], PreferencesStore]
|
|
49
|
+
KnowledgeStoreFactory = Callable[[], KnowledgeStore]
|
|
50
|
+
AuditStoreFactory = Callable[[], AuditStore]
|
|
37
51
|
|
|
38
52
|
|
|
39
53
|
@dataclass
|
|
@@ -46,6 +60,9 @@ class PersistenceConfig:
|
|
|
46
60
|
- A pre-instantiated store instance
|
|
47
61
|
- A factory function that returns a store instance
|
|
48
62
|
|
|
63
|
+
Core stores (memory, conversations, tasks, preferences) have file-based
|
|
64
|
+
defaults. Optional stores (knowledge, audit) must be explicitly configured.
|
|
65
|
+
|
|
49
66
|
Example for Django:
|
|
50
67
|
from myapp.stores import DjangoMemoryStore, DjangoConversationStore
|
|
51
68
|
|
|
@@ -65,31 +82,47 @@ class PersistenceConfig:
|
|
|
65
82
|
config = PersistenceConfig(
|
|
66
83
|
memory_store_factory=lambda: DjangoMemoryStore(user=get_current_user()),
|
|
67
84
|
)
|
|
85
|
+
|
|
86
|
+
# Option 4: Enable optional stores
|
|
87
|
+
config = PersistenceConfig(
|
|
88
|
+
knowledge_store=DjangoKnowledgeStore(user=request.user),
|
|
89
|
+
audit_store=DjangoAuditStore(user=request.user),
|
|
90
|
+
)
|
|
68
91
|
"""
|
|
69
92
|
|
|
70
|
-
# Backend classes (can be swapped for custom implementations)
|
|
93
|
+
# Backend classes for core stores (can be swapped for custom implementations)
|
|
71
94
|
memory_store_class: Type[MemoryStore] = FileMemoryStore
|
|
72
95
|
conversation_store_class: Type[ConversationStore] = FileConversationStore
|
|
73
96
|
task_store_class: Type[TaskStore] = FileTaskStore
|
|
74
97
|
preferences_store_class: Type[PreferencesStore] = FilePreferencesStore
|
|
75
98
|
|
|
99
|
+
# Backend classes for optional stores (no defaults - must be explicitly set)
|
|
100
|
+
knowledge_store_class: Optional[Type[KnowledgeStore]] = None
|
|
101
|
+
audit_store_class: Optional[Type[AuditStore]] = None
|
|
102
|
+
|
|
76
103
|
# Pre-instantiated store instances (takes precedence over classes)
|
|
77
104
|
memory_store: Optional[MemoryStore] = None
|
|
78
105
|
conversation_store: Optional[ConversationStore] = None
|
|
79
106
|
task_store: Optional[TaskStore] = None
|
|
80
107
|
preferences_store: Optional[PreferencesStore] = None
|
|
108
|
+
knowledge_store: Optional[KnowledgeStore] = None
|
|
109
|
+
audit_store: Optional[AuditStore] = None
|
|
81
110
|
|
|
82
111
|
# Factory functions (takes precedence over classes, but not instances)
|
|
83
112
|
memory_store_factory: Optional[MemoryStoreFactory] = None
|
|
84
113
|
conversation_store_factory: Optional[ConversationStoreFactory] = None
|
|
85
114
|
task_store_factory: Optional[TaskStoreFactory] = None
|
|
86
115
|
preferences_store_factory: Optional[PreferencesStoreFactory] = None
|
|
116
|
+
knowledge_store_factory: Optional[KnowledgeStoreFactory] = None
|
|
117
|
+
audit_store_factory: Optional[AuditStoreFactory] = None
|
|
87
118
|
|
|
88
119
|
# Kwargs passed to store class constructors (only used with classes)
|
|
89
120
|
memory_store_kwargs: dict = field(default_factory=dict)
|
|
90
121
|
conversation_store_kwargs: dict = field(default_factory=dict)
|
|
91
122
|
task_store_kwargs: dict = field(default_factory=dict)
|
|
92
123
|
preferences_store_kwargs: dict = field(default_factory=dict)
|
|
124
|
+
knowledge_store_kwargs: dict = field(default_factory=dict)
|
|
125
|
+
audit_store_kwargs: dict = field(default_factory=dict)
|
|
93
126
|
|
|
94
127
|
# Project directory (convenience for file-based stores)
|
|
95
128
|
# Only used if store_kwargs doesn't already have project_dir
|
|
@@ -100,8 +133,11 @@ class PersistenceManager:
|
|
|
100
133
|
"""
|
|
101
134
|
Unified manager for all persistence stores.
|
|
102
135
|
|
|
103
|
-
Provides access to memory, conversations, tasks,
|
|
104
|
-
with pluggable backends.
|
|
136
|
+
Provides access to core stores (memory, conversations, tasks, preferences)
|
|
137
|
+
and optional stores (knowledge, audit) with pluggable backends.
|
|
138
|
+
|
|
139
|
+
Core stores have file-based defaults. Optional stores return None
|
|
140
|
+
unless explicitly configured.
|
|
105
141
|
|
|
106
142
|
Example:
|
|
107
143
|
# Use default file-based storage
|
|
@@ -120,8 +156,15 @@ class PersistenceManager:
|
|
|
120
156
|
config = PersistenceConfig(
|
|
121
157
|
memory_store=DjangoMemoryStore(user=request.user),
|
|
122
158
|
conversation_store=DjangoConversationStore(user=request.user),
|
|
159
|
+
# Enable optional stores
|
|
160
|
+
knowledge_store=DjangoKnowledgeStore(user=request.user),
|
|
161
|
+
audit_store=DjangoAuditStore(user=request.user),
|
|
123
162
|
)
|
|
124
163
|
manager = PersistenceManager(config)
|
|
164
|
+
|
|
165
|
+
# Check if optional stores are available
|
|
166
|
+
if manager.knowledge:
|
|
167
|
+
await manager.knowledge.save_fact(fact)
|
|
125
168
|
"""
|
|
126
169
|
|
|
127
170
|
def __init__(self, config: Optional[PersistenceConfig] = None):
|
|
@@ -130,6 +173,11 @@ class PersistenceManager:
|
|
|
130
173
|
self._conversations: Optional[ConversationStore] = None
|
|
131
174
|
self._tasks: Optional[TaskStore] = None
|
|
132
175
|
self._preferences: Optional[PreferencesStore] = None
|
|
176
|
+
self._knowledge: Optional[KnowledgeStore] = None
|
|
177
|
+
self._audit: Optional[AuditStore] = None
|
|
178
|
+
# Track if optional stores have been initialized
|
|
179
|
+
self._knowledge_initialized = False
|
|
180
|
+
self._audit_initialized = False
|
|
133
181
|
|
|
134
182
|
def _build_kwargs(self, store_kwargs: dict) -> dict:
|
|
135
183
|
"""Build kwargs for store instantiation."""
|
|
@@ -193,6 +241,54 @@ class PersistenceManager:
|
|
|
193
241
|
self._preferences = self._config.preferences_store_class(**kwargs)
|
|
194
242
|
return self._preferences
|
|
195
243
|
|
|
244
|
+
@property
|
|
245
|
+
def knowledge(self) -> Optional[KnowledgeStore]:
|
|
246
|
+
"""
|
|
247
|
+
Get the knowledge store (optional).
|
|
248
|
+
|
|
249
|
+
Returns None if not configured. Check before using:
|
|
250
|
+
if manager.knowledge:
|
|
251
|
+
await manager.knowledge.save_fact(fact)
|
|
252
|
+
"""
|
|
253
|
+
if not self._knowledge_initialized:
|
|
254
|
+
self._knowledge_initialized = True
|
|
255
|
+
if self._config.knowledge_store is not None:
|
|
256
|
+
self._knowledge = self._config.knowledge_store
|
|
257
|
+
elif self._config.knowledge_store_factory is not None:
|
|
258
|
+
self._knowledge = self._config.knowledge_store_factory()
|
|
259
|
+
elif self._config.knowledge_store_class is not None:
|
|
260
|
+
kwargs = self._build_kwargs(self._config.knowledge_store_kwargs)
|
|
261
|
+
self._knowledge = self._config.knowledge_store_class(**kwargs)
|
|
262
|
+
return self._knowledge
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def audit(self) -> Optional[AuditStore]:
|
|
266
|
+
"""
|
|
267
|
+
Get the audit store (optional).
|
|
268
|
+
|
|
269
|
+
Returns None if not configured. Check before using:
|
|
270
|
+
if manager.audit:
|
|
271
|
+
await manager.audit.log_event(entry)
|
|
272
|
+
"""
|
|
273
|
+
if not self._audit_initialized:
|
|
274
|
+
self._audit_initialized = True
|
|
275
|
+
if self._config.audit_store is not None:
|
|
276
|
+
self._audit = self._config.audit_store
|
|
277
|
+
elif self._config.audit_store_factory is not None:
|
|
278
|
+
self._audit = self._config.audit_store_factory()
|
|
279
|
+
elif self._config.audit_store_class is not None:
|
|
280
|
+
kwargs = self._build_kwargs(self._config.audit_store_kwargs)
|
|
281
|
+
self._audit = self._config.audit_store_class(**kwargs)
|
|
282
|
+
return self._audit
|
|
283
|
+
|
|
284
|
+
def has_knowledge(self) -> bool:
|
|
285
|
+
"""Check if knowledge store is configured."""
|
|
286
|
+
return self.knowledge is not None
|
|
287
|
+
|
|
288
|
+
def has_audit(self) -> bool:
|
|
289
|
+
"""Check if audit store is configured."""
|
|
290
|
+
return self.audit is not None
|
|
291
|
+
|
|
196
292
|
async def close(self) -> None:
|
|
197
293
|
"""Close all stores."""
|
|
198
294
|
if self._memory:
|
|
@@ -203,6 +299,10 @@ class PersistenceManager:
|
|
|
203
299
|
await self._tasks.close()
|
|
204
300
|
if self._preferences:
|
|
205
301
|
await self._preferences.close()
|
|
302
|
+
if self._knowledge:
|
|
303
|
+
await self._knowledge.close()
|
|
304
|
+
if self._audit:
|
|
305
|
+
await self._audit.close()
|
|
206
306
|
|
|
207
307
|
|
|
208
308
|
# Global manager instance
|
|
@@ -215,27 +315,31 @@ def configure_persistence(
|
|
|
215
315
|
conversation_store_class: Optional[Type[ConversationStore]] = None,
|
|
216
316
|
task_store_class: Optional[Type[TaskStore]] = None,
|
|
217
317
|
preferences_store_class: Optional[Type[PreferencesStore]] = None,
|
|
318
|
+
knowledge_store_class: Optional[Type[KnowledgeStore]] = None,
|
|
319
|
+
audit_store_class: Optional[Type[AuditStore]] = None,
|
|
218
320
|
project_dir: Optional[Path] = None,
|
|
219
321
|
**kwargs,
|
|
220
322
|
) -> PersistenceConfig:
|
|
221
323
|
"""
|
|
222
324
|
Configure the global persistence manager.
|
|
223
|
-
|
|
325
|
+
|
|
224
326
|
Args:
|
|
225
327
|
memory_store_class: Custom memory store implementation
|
|
226
328
|
conversation_store_class: Custom conversation store implementation
|
|
227
329
|
task_store_class: Custom task store implementation
|
|
228
330
|
preferences_store_class: Custom preferences store implementation
|
|
331
|
+
knowledge_store_class: Custom knowledge store implementation (optional)
|
|
332
|
+
audit_store_class: Custom audit store implementation (optional)
|
|
229
333
|
project_dir: Project directory for PROJECT scope
|
|
230
334
|
**kwargs: Additional store-specific configuration
|
|
231
|
-
|
|
335
|
+
|
|
232
336
|
Returns:
|
|
233
337
|
The configured PersistenceConfig
|
|
234
338
|
"""
|
|
235
339
|
global _config, _manager
|
|
236
|
-
|
|
340
|
+
|
|
237
341
|
config = PersistenceConfig(project_dir=project_dir)
|
|
238
|
-
|
|
342
|
+
|
|
239
343
|
if memory_store_class:
|
|
240
344
|
config.memory_store_class = memory_store_class
|
|
241
345
|
if conversation_store_class:
|
|
@@ -244,23 +348,27 @@ def configure_persistence(
|
|
|
244
348
|
config.task_store_class = task_store_class
|
|
245
349
|
if preferences_store_class:
|
|
246
350
|
config.preferences_store_class = preferences_store_class
|
|
247
|
-
|
|
351
|
+
if knowledge_store_class:
|
|
352
|
+
config.knowledge_store_class = knowledge_store_class
|
|
353
|
+
if audit_store_class:
|
|
354
|
+
config.audit_store_class = audit_store_class
|
|
355
|
+
|
|
248
356
|
_config = config
|
|
249
357
|
_manager = None # Reset manager to use new config
|
|
250
|
-
|
|
358
|
+
|
|
251
359
|
return config
|
|
252
360
|
|
|
253
361
|
|
|
254
362
|
def get_persistence_manager() -> PersistenceManager:
|
|
255
363
|
"""
|
|
256
364
|
Get the global persistence manager.
|
|
257
|
-
|
|
365
|
+
|
|
258
366
|
Creates a new manager with default config if not configured.
|
|
259
367
|
"""
|
|
260
368
|
global _manager
|
|
261
|
-
|
|
369
|
+
|
|
262
370
|
if _manager is None:
|
|
263
371
|
_manager = PersistenceManager(_config)
|
|
264
|
-
|
|
372
|
+
|
|
265
373
|
return _manager
|
|
266
374
|
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Step executor for long-running multi-step agent operations.
|
|
3
|
+
|
|
4
|
+
This module provides a structured way to execute multi-step operations
|
|
5
|
+
with automatic checkpointing, resume capability, retries, and progress
|
|
6
|
+
reporting.
|
|
7
|
+
|
|
8
|
+
Example usage:
|
|
9
|
+
from agent_runtime_core.steps import StepExecutor, Step
|
|
10
|
+
|
|
11
|
+
class MyAgent(AgentRuntime):
|
|
12
|
+
async def run(self, ctx: RunContext) -> RunResult:
|
|
13
|
+
executor = StepExecutor(ctx)
|
|
14
|
+
|
|
15
|
+
result = await executor.run([
|
|
16
|
+
Step("fetch", self.fetch_data),
|
|
17
|
+
Step("process", self.process_data, retries=3),
|
|
18
|
+
Step("validate", self.validate),
|
|
19
|
+
])
|
|
20
|
+
|
|
21
|
+
return RunResult(final_output=result)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
import traceback
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from datetime import datetime
|
|
28
|
+
from enum import Enum
|
|
29
|
+
from typing import Any, Awaitable, Callable, Optional, TypeVar, Union
|
|
30
|
+
from uuid import UUID, uuid4
|
|
31
|
+
|
|
32
|
+
from agent_runtime_core.interfaces import EventType, RunContext
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class StepStatus(str, Enum):
|
|
36
|
+
"""Status of a step execution."""
|
|
37
|
+
|
|
38
|
+
PENDING = "pending"
|
|
39
|
+
RUNNING = "running"
|
|
40
|
+
COMPLETED = "completed"
|
|
41
|
+
FAILED = "failed"
|
|
42
|
+
SKIPPED = "skipped"
|
|
43
|
+
CANCELLED = "cancelled"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Type for step functions: async def step_fn(ctx, state) -> result
|
|
47
|
+
StepFunction = Callable[[RunContext, dict], Awaitable[Any]]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class Step:
|
|
52
|
+
"""
|
|
53
|
+
Definition of a single step in a multi-step operation.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
name: Unique identifier for this step
|
|
57
|
+
fn: Async function to execute. Receives (ctx, state) and returns result.
|
|
58
|
+
retries: Number of retry attempts on failure (default: 0)
|
|
59
|
+
retry_delay: Seconds to wait between retries (default: 1.0)
|
|
60
|
+
timeout: Optional timeout in seconds for this step
|
|
61
|
+
description: Human-readable description for progress reporting
|
|
62
|
+
checkpoint: Whether to checkpoint after this step (default: True)
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
name: str
|
|
66
|
+
fn: StepFunction
|
|
67
|
+
retries: int = 0
|
|
68
|
+
retry_delay: float = 1.0
|
|
69
|
+
timeout: Optional[float] = None
|
|
70
|
+
description: Optional[str] = None
|
|
71
|
+
checkpoint: bool = True
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class StepResult:
|
|
76
|
+
"""Result of executing a single step."""
|
|
77
|
+
|
|
78
|
+
name: str
|
|
79
|
+
status: StepStatus
|
|
80
|
+
result: Any = None
|
|
81
|
+
error: Optional[str] = None
|
|
82
|
+
attempts: int = 1
|
|
83
|
+
started_at: Optional[datetime] = None
|
|
84
|
+
completed_at: Optional[datetime] = None
|
|
85
|
+
duration_ms: Optional[float] = None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class ExecutionState:
|
|
90
|
+
"""
|
|
91
|
+
State of a multi-step execution.
|
|
92
|
+
|
|
93
|
+
This is what gets checkpointed and can be used to resume.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
execution_id: UUID = field(default_factory=uuid4)
|
|
97
|
+
current_step_index: int = 0
|
|
98
|
+
completed_steps: list[str] = field(default_factory=list)
|
|
99
|
+
step_results: dict[str, Any] = field(default_factory=dict)
|
|
100
|
+
started_at: datetime = field(default_factory=datetime.utcnow)
|
|
101
|
+
custom_state: dict = field(default_factory=dict)
|
|
102
|
+
|
|
103
|
+
def to_dict(self) -> dict:
|
|
104
|
+
"""Convert to dictionary for checkpointing."""
|
|
105
|
+
return {
|
|
106
|
+
"execution_id": str(self.execution_id),
|
|
107
|
+
"current_step_index": self.current_step_index,
|
|
108
|
+
"completed_steps": self.completed_steps,
|
|
109
|
+
"step_results": self.step_results,
|
|
110
|
+
"started_at": self.started_at.isoformat(),
|
|
111
|
+
"custom_state": self.custom_state,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def from_dict(cls, data: dict) -> "ExecutionState":
|
|
116
|
+
"""Restore from checkpointed dictionary."""
|
|
117
|
+
return cls(
|
|
118
|
+
execution_id=UUID(data["execution_id"]),
|
|
119
|
+
current_step_index=data["current_step_index"],
|
|
120
|
+
completed_steps=data["completed_steps"],
|
|
121
|
+
step_results=data["step_results"],
|
|
122
|
+
started_at=datetime.fromisoformat(data["started_at"]),
|
|
123
|
+
custom_state=data.get("custom_state", {}),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class StepExecutionError(Exception):
|
|
128
|
+
"""Raised when step execution fails after all retries."""
|
|
129
|
+
|
|
130
|
+
def __init__(self, step_name: str, message: str, attempts: int):
|
|
131
|
+
self.step_name = step_name
|
|
132
|
+
self.attempts = attempts
|
|
133
|
+
super().__init__(f"Step '{step_name}' failed after {attempts} attempts: {message}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class StepCancelledError(Exception):
|
|
137
|
+
"""Raised when execution is cancelled."""
|
|
138
|
+
|
|
139
|
+
def __init__(self, step_name: str):
|
|
140
|
+
self.step_name = step_name
|
|
141
|
+
super().__init__(f"Execution cancelled during step '{step_name}'")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class StepExecutor:
|
|
145
|
+
"""
|
|
146
|
+
Executes a sequence of steps with checkpointing and resume capability.
|
|
147
|
+
|
|
148
|
+
Features:
|
|
149
|
+
- Automatic checkpointing after each step
|
|
150
|
+
- Resume from last checkpoint on restart
|
|
151
|
+
- Per-step retries with configurable delay
|
|
152
|
+
- Progress reporting via events
|
|
153
|
+
- Cancellation support
|
|
154
|
+
- Step-level timeouts
|
|
155
|
+
|
|
156
|
+
Example:
|
|
157
|
+
executor = StepExecutor(ctx)
|
|
158
|
+
|
|
159
|
+
result = await executor.run([
|
|
160
|
+
Step("fetch", fetch_data),
|
|
161
|
+
Step("process", process_data, retries=3),
|
|
162
|
+
Step("save", save_results),
|
|
163
|
+
])
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
ctx: RunContext,
|
|
169
|
+
*,
|
|
170
|
+
checkpoint_key: str = "_step_executor_state",
|
|
171
|
+
cancel_check_interval: float = 0.5,
|
|
172
|
+
):
|
|
173
|
+
"""
|
|
174
|
+
Initialize the step executor.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
ctx: The run context from the agent runtime
|
|
178
|
+
checkpoint_key: Key used for storing execution state
|
|
179
|
+
cancel_check_interval: How often to check for cancellation (seconds)
|
|
180
|
+
"""
|
|
181
|
+
self.ctx = ctx
|
|
182
|
+
self.checkpoint_key = checkpoint_key
|
|
183
|
+
self.cancel_check_interval = cancel_check_interval
|
|
184
|
+
self._state: Optional[ExecutionState] = None
|
|
185
|
+
|
|
186
|
+
async def run(
|
|
187
|
+
self,
|
|
188
|
+
steps: list[Step],
|
|
189
|
+
*,
|
|
190
|
+
initial_state: Optional[dict] = None,
|
|
191
|
+
resume: bool = True,
|
|
192
|
+
) -> dict[str, Any]:
|
|
193
|
+
"""
|
|
194
|
+
Execute a sequence of steps.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
steps: List of steps to execute
|
|
198
|
+
initial_state: Optional initial custom state
|
|
199
|
+
resume: Whether to resume from checkpoint if available
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Dictionary mapping step names to their results
|
|
203
|
+
|
|
204
|
+
Raises:
|
|
205
|
+
StepExecutionError: If a step fails after all retries
|
|
206
|
+
StepCancelledError: If execution is cancelled
|
|
207
|
+
"""
|
|
208
|
+
# Try to resume from checkpoint
|
|
209
|
+
if resume:
|
|
210
|
+
self._state = await self._load_state()
|
|
211
|
+
|
|
212
|
+
if self._state is None:
|
|
213
|
+
self._state = ExecutionState(
|
|
214
|
+
custom_state=initial_state or {}
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
total_steps = len(steps)
|
|
218
|
+
|
|
219
|
+
for i, step in enumerate(steps):
|
|
220
|
+
# Skip already completed steps
|
|
221
|
+
if step.name in self._state.completed_steps:
|
|
222
|
+
await self.ctx.emit(EventType.STEP_SKIPPED, {
|
|
223
|
+
"step_name": step.name,
|
|
224
|
+
"step_index": i,
|
|
225
|
+
"total_steps": total_steps,
|
|
226
|
+
"reason": "already_completed",
|
|
227
|
+
})
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
# Check for cancellation
|
|
231
|
+
if self.ctx.cancelled():
|
|
232
|
+
raise StepCancelledError(step.name)
|
|
233
|
+
|
|
234
|
+
# Update state
|
|
235
|
+
self._state.current_step_index = i
|
|
236
|
+
|
|
237
|
+
# Execute the step
|
|
238
|
+
result = await self._execute_step(step, i, total_steps)
|
|
239
|
+
|
|
240
|
+
# Record completion
|
|
241
|
+
self._state.completed_steps.append(step.name)
|
|
242
|
+
self._state.step_results[step.name] = result.result
|
|
243
|
+
|
|
244
|
+
# Checkpoint if enabled
|
|
245
|
+
if step.checkpoint:
|
|
246
|
+
await self._save_state()
|
|
247
|
+
|
|
248
|
+
return self._state.step_results
|
|
249
|
+
|
|
250
|
+
async def _execute_step(
|
|
251
|
+
self,
|
|
252
|
+
step: Step,
|
|
253
|
+
index: int,
|
|
254
|
+
total: int,
|
|
255
|
+
) -> StepResult:
|
|
256
|
+
"""Execute a single step with retries."""
|
|
257
|
+
attempts = 0
|
|
258
|
+
last_error: Optional[str] = None
|
|
259
|
+
|
|
260
|
+
while attempts <= step.retries:
|
|
261
|
+
attempts += 1
|
|
262
|
+
|
|
263
|
+
# Emit started event
|
|
264
|
+
await self.ctx.emit(EventType.STEP_STARTED, {
|
|
265
|
+
"step_name": step.name,
|
|
266
|
+
"step_index": index,
|
|
267
|
+
"total_steps": total,
|
|
268
|
+
"attempt": attempts,
|
|
269
|
+
"max_attempts": step.retries + 1,
|
|
270
|
+
"description": step.description,
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
# Emit progress
|
|
274
|
+
await self.ctx.emit(EventType.PROGRESS_UPDATE, {
|
|
275
|
+
"step_name": step.name,
|
|
276
|
+
"step_index": index,
|
|
277
|
+
"total_steps": total,
|
|
278
|
+
"progress_percent": (index / total) * 100,
|
|
279
|
+
"description": step.description or f"Executing {step.name}",
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
started_at = datetime.utcnow()
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
# Execute with optional timeout
|
|
286
|
+
if step.timeout:
|
|
287
|
+
result = await asyncio.wait_for(
|
|
288
|
+
step.fn(self.ctx, self._state.custom_state),
|
|
289
|
+
timeout=step.timeout,
|
|
290
|
+
)
|
|
291
|
+
else:
|
|
292
|
+
result = await step.fn(self.ctx, self._state.custom_state)
|
|
293
|
+
|
|
294
|
+
completed_at = datetime.utcnow()
|
|
295
|
+
duration_ms = (completed_at - started_at).total_seconds() * 1000
|
|
296
|
+
|
|
297
|
+
# Emit completed event
|
|
298
|
+
await self.ctx.emit(EventType.STEP_COMPLETED, {
|
|
299
|
+
"step_name": step.name,
|
|
300
|
+
"step_index": index,
|
|
301
|
+
"total_steps": total,
|
|
302
|
+
"attempt": attempts,
|
|
303
|
+
"duration_ms": duration_ms,
|
|
304
|
+
})
|
|
305
|
+
|
|
306
|
+
return StepResult(
|
|
307
|
+
name=step.name,
|
|
308
|
+
status=StepStatus.COMPLETED,
|
|
309
|
+
result=result,
|
|
310
|
+
attempts=attempts,
|
|
311
|
+
started_at=started_at,
|
|
312
|
+
completed_at=completed_at,
|
|
313
|
+
duration_ms=duration_ms,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
except asyncio.CancelledError:
|
|
317
|
+
raise StepCancelledError(step.name)
|
|
318
|
+
|
|
319
|
+
except asyncio.TimeoutError:
|
|
320
|
+
last_error = f"Step timed out after {step.timeout}s"
|
|
321
|
+
|
|
322
|
+
except Exception as e:
|
|
323
|
+
last_error = f"{type(e).__name__}: {str(e)}"
|
|
324
|
+
|
|
325
|
+
# Check if we should retry
|
|
326
|
+
if attempts <= step.retries:
|
|
327
|
+
await self.ctx.emit(EventType.STEP_RETRYING, {
|
|
328
|
+
"step_name": step.name,
|
|
329
|
+
"step_index": index,
|
|
330
|
+
"attempt": attempts,
|
|
331
|
+
"max_attempts": step.retries + 1,
|
|
332
|
+
"error": last_error,
|
|
333
|
+
"retry_delay": step.retry_delay,
|
|
334
|
+
})
|
|
335
|
+
await asyncio.sleep(step.retry_delay)
|
|
336
|
+
|
|
337
|
+
# All retries exhausted
|
|
338
|
+
await self.ctx.emit(EventType.STEP_FAILED, {
|
|
339
|
+
"step_name": step.name,
|
|
340
|
+
"step_index": index,
|
|
341
|
+
"total_steps": total,
|
|
342
|
+
"attempts": attempts,
|
|
343
|
+
"error": last_error,
|
|
344
|
+
})
|
|
345
|
+
|
|
346
|
+
raise StepExecutionError(step.name, last_error or "Unknown error", attempts)
|
|
347
|
+
|
|
348
|
+
async def _load_state(self) -> Optional[ExecutionState]:
|
|
349
|
+
"""Load execution state from checkpoint."""
|
|
350
|
+
checkpoint = await self.ctx.get_state()
|
|
351
|
+
if checkpoint and self.checkpoint_key in checkpoint:
|
|
352
|
+
try:
|
|
353
|
+
return ExecutionState.from_dict(checkpoint[self.checkpoint_key])
|
|
354
|
+
except (KeyError, ValueError):
|
|
355
|
+
return None
|
|
356
|
+
return None
|
|
357
|
+
|
|
358
|
+
async def _save_state(self) -> None:
|
|
359
|
+
"""Save execution state to checkpoint."""
|
|
360
|
+
checkpoint = await self.ctx.get_state() or {}
|
|
361
|
+
checkpoint[self.checkpoint_key] = self._state.to_dict()
|
|
362
|
+
await self.ctx.checkpoint(checkpoint)
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def state(self) -> Optional[ExecutionState]:
|
|
366
|
+
"""Get the current execution state."""
|
|
367
|
+
return self._state
|
|
368
|
+
|
|
369
|
+
def update_custom_state(self, updates: dict) -> None:
|
|
370
|
+
"""Update custom state (will be checkpointed with next step)."""
|
|
371
|
+
if self._state:
|
|
372
|
+
self._state.custom_state.update(updates)
|
|
373
|
+
|