aethergraph 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +49 -0
- aethergraph/config/__init__.py +0 -0
- aethergraph/config/config.py +121 -0
- aethergraph/config/context.py +16 -0
- aethergraph/config/llm.py +26 -0
- aethergraph/config/loader.py +60 -0
- aethergraph/config/runtime.py +9 -0
- aethergraph/contracts/errors/errors.py +44 -0
- aethergraph/contracts/services/artifacts.py +142 -0
- aethergraph/contracts/services/channel.py +72 -0
- aethergraph/contracts/services/continuations.py +23 -0
- aethergraph/contracts/services/eventbus.py +12 -0
- aethergraph/contracts/services/kv.py +24 -0
- aethergraph/contracts/services/llm.py +17 -0
- aethergraph/contracts/services/mcp.py +22 -0
- aethergraph/contracts/services/memory.py +108 -0
- aethergraph/contracts/services/resume.py +28 -0
- aethergraph/contracts/services/state_stores.py +33 -0
- aethergraph/contracts/services/wakeup.py +28 -0
- aethergraph/core/execution/base_scheduler.py +77 -0
- aethergraph/core/execution/forward_scheduler.py +777 -0
- aethergraph/core/execution/global_scheduler.py +634 -0
- aethergraph/core/execution/retry_policy.py +22 -0
- aethergraph/core/execution/step_forward.py +411 -0
- aethergraph/core/execution/step_result.py +18 -0
- aethergraph/core/execution/wait_types.py +72 -0
- aethergraph/core/graph/graph_builder.py +192 -0
- aethergraph/core/graph/graph_fn.py +219 -0
- aethergraph/core/graph/graph_io.py +67 -0
- aethergraph/core/graph/graph_refs.py +154 -0
- aethergraph/core/graph/graph_spec.py +115 -0
- aethergraph/core/graph/graph_state.py +59 -0
- aethergraph/core/graph/graphify.py +128 -0
- aethergraph/core/graph/interpreter.py +145 -0
- aethergraph/core/graph/node_handle.py +33 -0
- aethergraph/core/graph/node_spec.py +46 -0
- aethergraph/core/graph/node_state.py +63 -0
- aethergraph/core/graph/task_graph.py +747 -0
- aethergraph/core/graph/task_node.py +82 -0
- aethergraph/core/graph/utils.py +37 -0
- aethergraph/core/graph/visualize.py +239 -0
- aethergraph/core/runtime/ad_hoc_context.py +61 -0
- aethergraph/core/runtime/base_service.py +153 -0
- aethergraph/core/runtime/bind_adapter.py +42 -0
- aethergraph/core/runtime/bound_memory.py +69 -0
- aethergraph/core/runtime/execution_context.py +220 -0
- aethergraph/core/runtime/graph_runner.py +349 -0
- aethergraph/core/runtime/lifecycle.py +26 -0
- aethergraph/core/runtime/node_context.py +203 -0
- aethergraph/core/runtime/node_services.py +30 -0
- aethergraph/core/runtime/recovery.py +159 -0
- aethergraph/core/runtime/run_registration.py +33 -0
- aethergraph/core/runtime/runtime_env.py +157 -0
- aethergraph/core/runtime/runtime_registry.py +32 -0
- aethergraph/core/runtime/runtime_services.py +224 -0
- aethergraph/core/runtime/wakeup_watcher.py +40 -0
- aethergraph/core/tools/__init__.py +10 -0
- aethergraph/core/tools/builtins/channel_tools.py +194 -0
- aethergraph/core/tools/builtins/toolset.py +134 -0
- aethergraph/core/tools/toolkit.py +510 -0
- aethergraph/core/tools/waitable.py +109 -0
- aethergraph/plugins/channel/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/console.py +106 -0
- aethergraph/plugins/channel/adapters/file.py +102 -0
- aethergraph/plugins/channel/adapters/slack.py +285 -0
- aethergraph/plugins/channel/adapters/telegram.py +302 -0
- aethergraph/plugins/channel/adapters/webhook.py +104 -0
- aethergraph/plugins/channel/adapters/webui.py +134 -0
- aethergraph/plugins/channel/routes/__init__.py +0 -0
- aethergraph/plugins/channel/routes/console_routes.py +86 -0
- aethergraph/plugins/channel/routes/slack_routes.py +49 -0
- aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
- aethergraph/plugins/channel/routes/webui_routes.py +136 -0
- aethergraph/plugins/channel/utils/__init__.py +0 -0
- aethergraph/plugins/channel/utils/slack_utils.py +278 -0
- aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
- aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
- aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
- aethergraph/plugins/mcp/fs_server.py +128 -0
- aethergraph/plugins/mcp/http_server.py +101 -0
- aethergraph/plugins/mcp/ws_server.py +180 -0
- aethergraph/plugins/net/http.py +10 -0
- aethergraph/plugins/utils/data_io.py +359 -0
- aethergraph/runner/__init__.py +5 -0
- aethergraph/runtime/__init__.py +62 -0
- aethergraph/server/__init__.py +3 -0
- aethergraph/server/app_factory.py +84 -0
- aethergraph/server/start.py +122 -0
- aethergraph/services/__init__.py +10 -0
- aethergraph/services/artifacts/facade.py +284 -0
- aethergraph/services/artifacts/factory.py +35 -0
- aethergraph/services/artifacts/fs_store.py +656 -0
- aethergraph/services/artifacts/jsonl_index.py +123 -0
- aethergraph/services/artifacts/paths.py +23 -0
- aethergraph/services/artifacts/sqlite_index.py +209 -0
- aethergraph/services/artifacts/utils.py +124 -0
- aethergraph/services/auth/dev.py +16 -0
- aethergraph/services/channel/channel_bus.py +293 -0
- aethergraph/services/channel/factory.py +44 -0
- aethergraph/services/channel/session.py +511 -0
- aethergraph/services/channel/wait_helpers.py +57 -0
- aethergraph/services/clock/clock.py +9 -0
- aethergraph/services/container/default_container.py +320 -0
- aethergraph/services/continuations/continuation.py +56 -0
- aethergraph/services/continuations/factory.py +34 -0
- aethergraph/services/continuations/stores/fs_store.py +264 -0
- aethergraph/services/continuations/stores/inmem_store.py +95 -0
- aethergraph/services/eventbus/inmem.py +21 -0
- aethergraph/services/features/static.py +10 -0
- aethergraph/services/kv/ephemeral.py +90 -0
- aethergraph/services/kv/factory.py +27 -0
- aethergraph/services/kv/layered.py +41 -0
- aethergraph/services/kv/sqlite_kv.py +128 -0
- aethergraph/services/llm/factory.py +157 -0
- aethergraph/services/llm/generic_client.py +542 -0
- aethergraph/services/llm/providers.py +3 -0
- aethergraph/services/llm/service.py +105 -0
- aethergraph/services/logger/base.py +36 -0
- aethergraph/services/logger/compat.py +50 -0
- aethergraph/services/logger/formatters.py +106 -0
- aethergraph/services/logger/std.py +203 -0
- aethergraph/services/mcp/helpers.py +23 -0
- aethergraph/services/mcp/http_client.py +70 -0
- aethergraph/services/mcp/mcp_tools.py +21 -0
- aethergraph/services/mcp/registry.py +14 -0
- aethergraph/services/mcp/service.py +100 -0
- aethergraph/services/mcp/stdio_client.py +70 -0
- aethergraph/services/mcp/ws_client.py +115 -0
- aethergraph/services/memory/bound.py +106 -0
- aethergraph/services/memory/distillers/episode.py +116 -0
- aethergraph/services/memory/distillers/rolling.py +74 -0
- aethergraph/services/memory/facade.py +633 -0
- aethergraph/services/memory/factory.py +78 -0
- aethergraph/services/memory/hotlog_kv.py +27 -0
- aethergraph/services/memory/indices.py +74 -0
- aethergraph/services/memory/io_helpers.py +72 -0
- aethergraph/services/memory/persist_fs.py +40 -0
- aethergraph/services/memory/resolver.py +152 -0
- aethergraph/services/metering/noop.py +4 -0
- aethergraph/services/prompts/file_store.py +41 -0
- aethergraph/services/rag/chunker.py +29 -0
- aethergraph/services/rag/facade.py +593 -0
- aethergraph/services/rag/index/base.py +27 -0
- aethergraph/services/rag/index/faiss_index.py +121 -0
- aethergraph/services/rag/index/sqlite_index.py +134 -0
- aethergraph/services/rag/index_factory.py +52 -0
- aethergraph/services/rag/parsers/md.py +7 -0
- aethergraph/services/rag/parsers/pdf.py +14 -0
- aethergraph/services/rag/parsers/txt.py +7 -0
- aethergraph/services/rag/utils/hybrid.py +39 -0
- aethergraph/services/rag/utils/make_fs_key.py +62 -0
- aethergraph/services/redactor/simple.py +16 -0
- aethergraph/services/registry/key_parsing.py +44 -0
- aethergraph/services/registry/registry_key.py +19 -0
- aethergraph/services/registry/unified_registry.py +185 -0
- aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
- aethergraph/services/resume/router.py +73 -0
- aethergraph/services/schedulers/registry.py +41 -0
- aethergraph/services/secrets/base.py +7 -0
- aethergraph/services/secrets/env.py +8 -0
- aethergraph/services/state_stores/externalize.py +135 -0
- aethergraph/services/state_stores/graph_observer.py +131 -0
- aethergraph/services/state_stores/json_store.py +67 -0
- aethergraph/services/state_stores/resume_policy.py +119 -0
- aethergraph/services/state_stores/serialize.py +249 -0
- aethergraph/services/state_stores/utils.py +91 -0
- aethergraph/services/state_stores/validate.py +78 -0
- aethergraph/services/tracing/noop.py +18 -0
- aethergraph/services/waits/wait_registry.py +91 -0
- aethergraph/services/wakeup/memory_queue.py +57 -0
- aethergraph/services/wakeup/scanner_producer.py +56 -0
- aethergraph/services/wakeup/worker.py +31 -0
- aethergraph/tools/__init__.py +25 -0
- aethergraph/utils/optdeps.py +8 -0
- aethergraph-0.1.0a1.dist-info/METADATA +410 -0
- aethergraph-0.1.0a1.dist-info/RECORD +182 -0
- aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
- aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
- aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
- aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
- aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import timedelta
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from aethergraph.contracts.services.llm import LLMClientProtocol
|
|
6
|
+
from aethergraph.core.runtime.runtime_services import get_ext_context_service
|
|
7
|
+
from aethergraph.services.channel.session import ChannelSession
|
|
8
|
+
from aethergraph.services.continuations.continuation import Continuation
|
|
9
|
+
from aethergraph.services.llm.providers import Provider
|
|
10
|
+
from aethergraph.services.memory.facade import MemoryFacade
|
|
11
|
+
|
|
12
|
+
from .base_service import _ServiceHandle
|
|
13
|
+
from .bound_memory import BoundMemoryAdapter
|
|
14
|
+
from .node_services import NodeServices
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class NodeContext:
|
|
19
|
+
run_id: str
|
|
20
|
+
graph_id: str
|
|
21
|
+
node_id: str
|
|
22
|
+
services: NodeServices
|
|
23
|
+
resume_payload: dict[str, Any] | None = None
|
|
24
|
+
bound_memory: BoundMemoryAdapter | None = None # back-compat
|
|
25
|
+
|
|
26
|
+
# --- accessors (compatible names) ---
|
|
27
|
+
def runtime(self) -> NodeServices:
|
|
28
|
+
return self.services
|
|
29
|
+
|
|
30
|
+
def logger(self):
|
|
31
|
+
return self.services.logger.for_node_ctx(
|
|
32
|
+
run_id=self.run_id, node_id=self.node_id, graph_id=self.graph_id
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def channel(self, channel_key: str | None = None):
|
|
36
|
+
return ChannelSession(self, channel_key)
|
|
37
|
+
|
|
38
|
+
# New way: prefer memory_facade directly
|
|
39
|
+
def memory(self) -> MemoryFacade:
|
|
40
|
+
if not self.services.memory_facade:
|
|
41
|
+
raise RuntimeError("MemoryFacade not bound")
|
|
42
|
+
return self.services.memory_facade
|
|
43
|
+
|
|
44
|
+
# Back-compat: old ctx.mem() -> To be deprecated
|
|
45
|
+
def mem(self) -> BoundMemoryAdapter:
|
|
46
|
+
if not self.bound_memory:
|
|
47
|
+
raise RuntimeError("BoundMemory adapter not available")
|
|
48
|
+
return self.bound_memory
|
|
49
|
+
|
|
50
|
+
# Artifacts / index
|
|
51
|
+
def artifacts(self):
|
|
52
|
+
return self.services.artifact_store
|
|
53
|
+
|
|
54
|
+
def kv(self):
|
|
55
|
+
if not self.services.kv:
|
|
56
|
+
raise RuntimeError("KV not available")
|
|
57
|
+
return self.services.kv
|
|
58
|
+
|
|
59
|
+
def llm(
|
|
60
|
+
self,
|
|
61
|
+
profile: str = "default",
|
|
62
|
+
*,
|
|
63
|
+
provider: Provider | None = None,
|
|
64
|
+
model: str | None = None,
|
|
65
|
+
base_url: str | None = None,
|
|
66
|
+
api_key: str | None = None,
|
|
67
|
+
azure_deployment: str | None = None,
|
|
68
|
+
timeout: float | None = None,
|
|
69
|
+
) -> LLMClientProtocol:
|
|
70
|
+
"""
|
|
71
|
+
Get an LLM client by profile.
|
|
72
|
+
- If no overrides are provided, just return existing profile.
|
|
73
|
+
- If overrides are provided, create/update that profile at runtime.
|
|
74
|
+
"""
|
|
75
|
+
svc = self.services.llm
|
|
76
|
+
|
|
77
|
+
if (
|
|
78
|
+
provider is None
|
|
79
|
+
and model is None
|
|
80
|
+
and base_url is None
|
|
81
|
+
and api_key is None
|
|
82
|
+
and azure_deployment is None
|
|
83
|
+
and timeout is None
|
|
84
|
+
):
|
|
85
|
+
return svc.get(profile)
|
|
86
|
+
|
|
87
|
+
return svc.configure_profile(
|
|
88
|
+
profile=profile,
|
|
89
|
+
provider=provider,
|
|
90
|
+
model=model,
|
|
91
|
+
base_url=base_url,
|
|
92
|
+
api_key=api_key,
|
|
93
|
+
azure_deployment=azure_deployment,
|
|
94
|
+
timeout=timeout,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def llm_set_key(self, provider: str, model: str, api_key: str, profile: str = "default"):
|
|
98
|
+
"""
|
|
99
|
+
Quickly configure or override the provider/key for a profile.
|
|
100
|
+
"""
|
|
101
|
+
svc = self.services.llm
|
|
102
|
+
svc.set_key(provider=provider, model=model, api_key=api_key, profile=profile)
|
|
103
|
+
|
|
104
|
+
def rag(self):
|
|
105
|
+
if not self.services.rag:
|
|
106
|
+
raise RuntimeError("RAGService not available")
|
|
107
|
+
return self.services.rag
|
|
108
|
+
|
|
109
|
+
def mcp(self, name):
|
|
110
|
+
if not self.services.mcp:
|
|
111
|
+
raise RuntimeError("MCPService not available")
|
|
112
|
+
return self.services.mcp.get(name)
|
|
113
|
+
|
|
114
|
+
def continuations(self):
|
|
115
|
+
return self.services.continuation_store
|
|
116
|
+
|
|
117
|
+
def prepare_wait_for_resume(self, token: str):
|
|
118
|
+
# creates and registers a Future for this token without awaiting
|
|
119
|
+
return self.services.wait_registry.register(token)
|
|
120
|
+
|
|
121
|
+
def clock(self):
|
|
122
|
+
if not self.services.clock:
|
|
123
|
+
raise RuntimeError("Clock service not available")
|
|
124
|
+
return self.services.clock
|
|
125
|
+
|
|
126
|
+
def svc(self, name: str) -> Any:
|
|
127
|
+
# generic accessor for external context services
|
|
128
|
+
raw = get_ext_context_service(name)
|
|
129
|
+
if raw is None:
|
|
130
|
+
raise KeyError(f"Service '{name}' not registered")
|
|
131
|
+
# bind the service to the context
|
|
132
|
+
bind = getattr(raw, "bind", None)
|
|
133
|
+
if callable(bind):
|
|
134
|
+
return raw.bind(context=self)
|
|
135
|
+
return raw
|
|
136
|
+
|
|
137
|
+
def __getattr__(self, name: str) -> Any:
|
|
138
|
+
# Try to resolve as an external context service
|
|
139
|
+
try:
|
|
140
|
+
bound = self.svc(name)
|
|
141
|
+
except KeyError:
|
|
142
|
+
# Fall back to normal attribute error for anything else
|
|
143
|
+
raise AttributeError(f"NodeContext has no attribute '{name}'") from None
|
|
144
|
+
# Return a callable handle that behaves like the bound service
|
|
145
|
+
return _ServiceHandle(name, bound)
|
|
146
|
+
|
|
147
|
+
def _now(self):
|
|
148
|
+
if self.services.clock:
|
|
149
|
+
return self.services.clock.now()
|
|
150
|
+
else:
|
|
151
|
+
from datetime import datetime
|
|
152
|
+
|
|
153
|
+
return datetime.utcnow()
|
|
154
|
+
|
|
155
|
+
# ---- continuation helpers ----
|
|
156
|
+
async def create_continuation(
|
|
157
|
+
self,
|
|
158
|
+
*,
|
|
159
|
+
kind: str,
|
|
160
|
+
payload: dict | None,
|
|
161
|
+
channel: str | None,
|
|
162
|
+
deadline_s: int | None = None,
|
|
163
|
+
poll: dict | None = None,
|
|
164
|
+
attempts: int = 0,
|
|
165
|
+
) -> Continuation:
|
|
166
|
+
"""Create and store a continuation for this node in the continuation store."""
|
|
167
|
+
token = await self.services.continuation_store.mint_token(
|
|
168
|
+
self.run_id, self.node_id, attempts=attempts
|
|
169
|
+
)
|
|
170
|
+
deadline = None
|
|
171
|
+
if deadline_s:
|
|
172
|
+
deadline = self._now() + timedelta(seconds=deadline_s)
|
|
173
|
+
|
|
174
|
+
continuation = Continuation(
|
|
175
|
+
run_id=self.run_id,
|
|
176
|
+
node_id=self.node_id,
|
|
177
|
+
kind=kind,
|
|
178
|
+
token=token,
|
|
179
|
+
prompt=payload.get("prompt") if payload else None,
|
|
180
|
+
resume_schema=payload.get("resume_schema") if payload else None,
|
|
181
|
+
channel=channel,
|
|
182
|
+
deadline=deadline,
|
|
183
|
+
poll=poll,
|
|
184
|
+
next_wakeup_at=deadline,
|
|
185
|
+
created_at=self._now(),
|
|
186
|
+
attempts=attempts,
|
|
187
|
+
payload=payload,
|
|
188
|
+
)
|
|
189
|
+
await self.services.continuation_store.save(continuation)
|
|
190
|
+
return continuation
|
|
191
|
+
|
|
192
|
+
async def wait_for_resume(self, token: str) -> dict:
|
|
193
|
+
"""Wait for a continuation to be resumed, and return the payload.
|
|
194
|
+
This will register the wait in the wait registry, and suspend until resumed.
|
|
195
|
+
Useful for nodes that need to pause and wait for short-term external events.
|
|
196
|
+
For long-term waits, use DualStage Tools instead.
|
|
197
|
+
"""
|
|
198
|
+
waits = self.services.wait_registry
|
|
199
|
+
if not waits:
|
|
200
|
+
raise RuntimeError("WaitRegistry missing on context/runtime")
|
|
201
|
+
fut = waits.register(token)
|
|
202
|
+
payload = await fut
|
|
203
|
+
return payload
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from aethergraph.contracts.services.llm import LLMClientProtocol
|
|
5
|
+
from aethergraph.services.channel.channel_bus import ChannelBus
|
|
6
|
+
from aethergraph.services.clock.clock import SystemClock
|
|
7
|
+
from aethergraph.services.continuations.stores.fs_store import FSContinuationStore
|
|
8
|
+
from aethergraph.services.logger.std import StdLoggerService
|
|
9
|
+
from aethergraph.services.mcp.service import MCPService
|
|
10
|
+
from aethergraph.services.memory.facade import MemoryFacade
|
|
11
|
+
from aethergraph.services.rag.facade import RAGFacade
|
|
12
|
+
from aethergraph.services.waits.wait_registry import WaitRegistry
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class NodeServices:
|
|
17
|
+
channels: ChannelBus
|
|
18
|
+
continuation_store: FSContinuationStore
|
|
19
|
+
artifact_store: Any # e.g., ArtifactFacadeAsync
|
|
20
|
+
wait_registry: WaitRegistry | None = None
|
|
21
|
+
clock: SystemClock | None = None
|
|
22
|
+
logger: StdLoggerService | None = (
|
|
23
|
+
None # StdLoggerService.for_node_ctx() will be used in NodeContext
|
|
24
|
+
)
|
|
25
|
+
kv: Any | None = None
|
|
26
|
+
memory: Any | None = None # MemoryFactory (for cross-session needs)
|
|
27
|
+
memory_facade: MemoryFacade | None = None # bound memory for this node
|
|
28
|
+
llm: LLMClientProtocol | None = None # LLMService
|
|
29
|
+
rag: RAGFacade | None = None # RAGService
|
|
30
|
+
mcp: MCPService | None = None # MCPService
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# aethergraph/runtime/recovery.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import datetime
|
|
5
|
+
import hashlib
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from aethergraph.contracts.services.state_stores import GraphStateStore
|
|
10
|
+
|
|
11
|
+
from ..graph.node_state import NodeStatus
|
|
12
|
+
from ..graph.task_graph import TaskGraph, TaskGraphSpec
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def hash_spec(spec: TaskGraphSpec) -> str:
|
|
16
|
+
import json
|
|
17
|
+
|
|
18
|
+
# stable hash of the immutable parts
|
|
19
|
+
raw = json.dumps(
|
|
20
|
+
{
|
|
21
|
+
"graph_id": spec.graph_id,
|
|
22
|
+
"version": spec.version,
|
|
23
|
+
"nodes": {
|
|
24
|
+
nid: {
|
|
25
|
+
"type": ns.type,
|
|
26
|
+
"dependencies": ns.dependencies,
|
|
27
|
+
"logic": ns.logic if isinstance(ns.logic, str) else str(ns.logic),
|
|
28
|
+
"metadata": ns.metadata,
|
|
29
|
+
}
|
|
30
|
+
for nid, ns in spec.nodes.items()
|
|
31
|
+
},
|
|
32
|
+
"io": {
|
|
33
|
+
"required": sorted(list(spec.io.required.keys())),
|
|
34
|
+
"optional": sorted(list(spec.io.optional.keys())),
|
|
35
|
+
"outputs": sorted(list(spec.io.outputs.keys())),
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
sort_keys=True,
|
|
39
|
+
)
|
|
40
|
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def recover_graph_run(
|
|
44
|
+
*,
|
|
45
|
+
spec: TaskGraphSpec,
|
|
46
|
+
run_id: str,
|
|
47
|
+
store: GraphStateStore,
|
|
48
|
+
) -> TaskGraph:
|
|
49
|
+
snap = await store.load_latest_snapshot(run_id)
|
|
50
|
+
g = TaskGraph.from_spec(spec=spec, state=None)
|
|
51
|
+
g.state.run_id = run_id
|
|
52
|
+
# If no snapshot, we're starting fresh.
|
|
53
|
+
if not snap:
|
|
54
|
+
return g
|
|
55
|
+
|
|
56
|
+
# Basic drift guard (optional: warn if different)
|
|
57
|
+
want = hash_spec(spec)
|
|
58
|
+
if snap.spec_hash != want:
|
|
59
|
+
# Soft warning; TODO: raise if later want strictness.
|
|
60
|
+
import logging
|
|
61
|
+
|
|
62
|
+
logger = logging.getLogger("aethergraph.core.runtime.recovery")
|
|
63
|
+
logger.warning(
|
|
64
|
+
f"[recover_graph_run] Spec hash mismatch for run {run_id}: snapshot has {snap.spec_hash[:8]}..., want {want[:8]}... This typically means the graph definition changed since the snapshot was taken. It is not a problem if you created the graph differently on resume."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Apply snapshot state
|
|
68
|
+
_hydrate_state_from_json(g, snap.state)
|
|
69
|
+
|
|
70
|
+
return g
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _hydrate_state_from_json(graph, j: dict[str, Any]) -> None:
|
|
74
|
+
graph.state.rev = j.get("rev", 0)
|
|
75
|
+
graph.state._bound_inputs = j.get("_bound_inputs")
|
|
76
|
+
for nid, ns_json in j.get("nodes", {}).items():
|
|
77
|
+
ns = graph.state.nodes.setdefault(nid, graph.state.nodes.get(nid))
|
|
78
|
+
status_name = ns_json.get("status", "PENDING")
|
|
79
|
+
status = getattr(NodeStatus, status_name, NodeStatus.PENDING)
|
|
80
|
+
if status == NodeStatus.RUNNING:
|
|
81
|
+
status = NodeStatus.PENDING
|
|
82
|
+
ns.status = status
|
|
83
|
+
|
|
84
|
+
outs = ns_json.get("outputs") or {}
|
|
85
|
+
# Keep as-is; resume_policy already blocked non-JSON/ref earlier
|
|
86
|
+
ns.outputs = outs
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
async def rearm_waits_if_needed(graph, env, *, ttl_s: int = 3600):
|
|
90
|
+
store = env.container.cont_store
|
|
91
|
+
bus = env.container.channels
|
|
92
|
+
now = time.time()
|
|
93
|
+
|
|
94
|
+
for nid, ns in graph.state.nodes.items():
|
|
95
|
+
if getattr(ns, "status", None) not in (
|
|
96
|
+
NodeStatus.WAITING_HUMAN,
|
|
97
|
+
getattr(NodeStatus, "WAITING_EXTERNAL", "WAITING_EXTERNAL"),
|
|
98
|
+
):
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
cont = await store.get(run_id=env.run_id, node_id=nid)
|
|
102
|
+
# Normalize deadline to a numeric timestamp to avoid comparing datetime with float
|
|
103
|
+
deadline = getattr(cont, "deadline", None)
|
|
104
|
+
deadline_ts = deadline.timestamp() if isinstance(deadline, datetime.datetime) else deadline
|
|
105
|
+
expired = (not cont) or (deadline_ts is not None and deadline_ts < now)
|
|
106
|
+
|
|
107
|
+
if not expired:
|
|
108
|
+
continue # still valid
|
|
109
|
+
|
|
110
|
+
# Rebuild OutEvent from saved wait_spec
|
|
111
|
+
ws = getattr(ns, "wait_spec", None)
|
|
112
|
+
if not ws:
|
|
113
|
+
# No spec → safest fallback is to keep waiting but log it
|
|
114
|
+
env.container.logger.for_run().warning(
|
|
115
|
+
f"[rearm] missing wait_spec for {env.run_id}:{nid}; staying WAITING"
|
|
116
|
+
)
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Mint a new continuation token
|
|
120
|
+
new_deadline = now + ttl_s
|
|
121
|
+
token = store.mint(
|
|
122
|
+
run_id=env.run_id,
|
|
123
|
+
node_id=nid,
|
|
124
|
+
kind=ws["kind"],
|
|
125
|
+
channel=ws.get("channel"),
|
|
126
|
+
deadline=new_deadline,
|
|
127
|
+
meta=ws.get("meta") or {},
|
|
128
|
+
)
|
|
129
|
+
# Build + send OutEvent
|
|
130
|
+
out = {
|
|
131
|
+
"type": "session.need_input"
|
|
132
|
+
if ws["kind"] == "text"
|
|
133
|
+
else "session.need_approval"
|
|
134
|
+
if ws["kind"] == "approval"
|
|
135
|
+
else "session.need_input", # default
|
|
136
|
+
"channel": ws.get("channel"),
|
|
137
|
+
"text": ws.get("prompt"),
|
|
138
|
+
"buttons": [{"label": o} for o in (ws.get("options") or [])],
|
|
139
|
+
"meta": ws.get("meta") or {},
|
|
140
|
+
}
|
|
141
|
+
payload = await bus.send(out) # may inline-resume for console/web
|
|
142
|
+
|
|
143
|
+
# If adapter returned a payload immediately → deliver inline
|
|
144
|
+
if payload and "payload" in payload:
|
|
145
|
+
# inline path (same as in _enter_wait)
|
|
146
|
+
await env.container.resume_bus.deliver_inline(
|
|
147
|
+
run_id=env.run_id, node_id=nid, payload=payload["payload"]
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
# Persist (replace/insert) the new continuation
|
|
151
|
+
store.save_for_node(
|
|
152
|
+
run_id=env.run_id,
|
|
153
|
+
node_id=nid,
|
|
154
|
+
token=token,
|
|
155
|
+
kind=ws["kind"],
|
|
156
|
+
channel=ws.get("channel"),
|
|
157
|
+
deadline=new_deadline,
|
|
158
|
+
meta=ws.get("meta") or {},
|
|
159
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from contextlib import AbstractContextManager
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class RunRegistrationGuard(AbstractContextManager):
|
|
5
|
+
"""Context manager to register and unregister a scheduler for a run. Primarily for resume handling.
|
|
6
|
+
On enter, registers the scheduler with the container's scheduler registry.
|
|
7
|
+
On exit, unregisters the scheduler.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, *, run_id: str, scheduler, container):
|
|
11
|
+
self.run_id = run_id
|
|
12
|
+
self.scheduler = scheduler
|
|
13
|
+
self.container = container
|
|
14
|
+
self._did_reg = False
|
|
15
|
+
|
|
16
|
+
def __enter__(self):
|
|
17
|
+
reg = self.container.sched_registry
|
|
18
|
+
existing = reg.get(self.run_id)
|
|
19
|
+
if existing is not None and existing is not self.scheduler:
|
|
20
|
+
# Be explicit to avoid silent clobbering
|
|
21
|
+
raise RuntimeError(f"Scheduler already registered for run_id={self.run_id}")
|
|
22
|
+
reg.register(self.run_id, self.scheduler)
|
|
23
|
+
self._did_reg = True
|
|
24
|
+
return self
|
|
25
|
+
|
|
26
|
+
def __exit__(self, exc_type, exc, tb):
|
|
27
|
+
if self._did_reg:
|
|
28
|
+
try:
|
|
29
|
+
self.container.sched_registry.unregister(self.run_id)
|
|
30
|
+
finally:
|
|
31
|
+
self._did_reg = False
|
|
32
|
+
# Return False to propagate any exception (important so callers can detect failures)
|
|
33
|
+
return False
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
# ---- artifact services ----
|
|
6
|
+
from aethergraph.services.artifacts.fs_store import FSArtifactStore # AsyncArtifactStore
|
|
7
|
+
from aethergraph.services.artifacts.jsonl_index import JsonlArtifactIndex # AsyncArtifactIndex
|
|
8
|
+
|
|
9
|
+
# ---- channel services ----
|
|
10
|
+
from aethergraph.services.channel.channel_bus import ChannelBus
|
|
11
|
+
from aethergraph.services.clock.clock import SystemClock
|
|
12
|
+
from aethergraph.services.container.default_container import DefaultContainer, get_container
|
|
13
|
+
from aethergraph.services.continuations.stores.fs_store import (
|
|
14
|
+
FSContinuationStore, # AsyncContinuationStore
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# ---- memory services ----
|
|
18
|
+
from aethergraph.services.memory.facade import MemoryFacade
|
|
19
|
+
from aethergraph.services.resume.router import ResumeRouter
|
|
20
|
+
from aethergraph.services.waits.wait_registry import WaitRegistry
|
|
21
|
+
|
|
22
|
+
from ..graph.task_node import TaskNodeRuntime
|
|
23
|
+
from .bound_memory import BoundMemoryAdapter
|
|
24
|
+
from .execution_context import ExecutionContext
|
|
25
|
+
from .node_services import NodeServices
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class RuntimeEnv:
|
|
30
|
+
"""Unified runtime env that is built from DefaultContainer and can spawn NodeContexts."""
|
|
31
|
+
|
|
32
|
+
run_id: str
|
|
33
|
+
graph_id: str | None = None
|
|
34
|
+
graph_inputs: dict[str, Any] = field(default_factory=dict)
|
|
35
|
+
outputs_by_node: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
# container (DI)
|
|
38
|
+
container: DefaultContainer = field(default_factory=get_container)
|
|
39
|
+
|
|
40
|
+
# optional predicate to skip execution
|
|
41
|
+
should_run_fn: Callable[[], bool] | None = None
|
|
42
|
+
|
|
43
|
+
# --- convenience projections of commonly used services ---
|
|
44
|
+
@property
|
|
45
|
+
def schedulers(self) -> dict[str, Any]:
|
|
46
|
+
return self.container.schedulers
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def registry(self):
|
|
50
|
+
return self.container.registry
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def logger_factory(self):
|
|
54
|
+
return self.container.logger
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def clock(self) -> SystemClock:
|
|
58
|
+
return self.container.clock
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def channels(self) -> ChannelBus:
|
|
62
|
+
return self.container.channels
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def continuation_store(self) -> FSContinuationStore:
|
|
66
|
+
return self.container.cont_store
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def wait_registry(self) -> WaitRegistry:
|
|
70
|
+
return self.container.wait_registry
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def artifacts(self) -> FSArtifactStore:
|
|
74
|
+
return self.container.artifacts
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def artifact_index(self) -> JsonlArtifactIndex:
|
|
78
|
+
return self.container.artifact_index
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def memory_factory(self):
|
|
82
|
+
return self.container.memory_factory
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def llm_service(self):
|
|
86
|
+
return self.container.llm
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def rag_facade(self):
|
|
90
|
+
return self.container.rag
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def mcp_service(self):
|
|
94
|
+
return self.container.mcp
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def resume_router(self) -> ResumeRouter:
|
|
98
|
+
return self.container.resume_router
|
|
99
|
+
|
|
100
|
+
def make_ctx(
|
|
101
|
+
self, *, node: "TaskNodeRuntime", resume_payload: dict[str, Any] | None = None
|
|
102
|
+
) -> Any:
|
|
103
|
+
defaults = {
|
|
104
|
+
"run_id": self.run_id,
|
|
105
|
+
"graph_id": self.graph_id,
|
|
106
|
+
"node_id": node.node_id,
|
|
107
|
+
"agent_id": getattr(node, "tool_name", None),
|
|
108
|
+
"tags": [],
|
|
109
|
+
"entities": [],
|
|
110
|
+
}
|
|
111
|
+
mem: MemoryFacade = self.memory_factory.for_session(
|
|
112
|
+
run_id=self.run_id,
|
|
113
|
+
graph_id=self.graph_id,
|
|
114
|
+
node_id=node.node_id,
|
|
115
|
+
agent_id=defaults["agent_id"],
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
from aethergraph.services.artifacts.facade import ArtifactFacade
|
|
119
|
+
|
|
120
|
+
artifact_facade = ArtifactFacade(
|
|
121
|
+
run_id=self.run_id,
|
|
122
|
+
graph_id=self.graph_id or "",
|
|
123
|
+
node_id=node.node_id,
|
|
124
|
+
tool_name=node.tool_name,
|
|
125
|
+
tool_version=node.tool_version, # to be filled from node if available
|
|
126
|
+
store=self.artifacts,
|
|
127
|
+
index=self.artifact_index,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
services = NodeServices(
|
|
131
|
+
channels=self.channels,
|
|
132
|
+
continuation_store=self.continuation_store,
|
|
133
|
+
artifact_store=artifact_facade,
|
|
134
|
+
wait_registry=self.wait_registry,
|
|
135
|
+
clock=self.clock,
|
|
136
|
+
logger=self.logger_factory,
|
|
137
|
+
kv=self.container.kv_hot, # keep using hot kv for ephemeral
|
|
138
|
+
memory=self.memory_factory, # factory (for other sessions if needed)
|
|
139
|
+
memory_facade=mem, # bound memory for this run/node
|
|
140
|
+
llm=self.llm_service, # LLMService
|
|
141
|
+
rag=self.rag_facade, # RAGService
|
|
142
|
+
mcp=self.mcp_service, # MCPService
|
|
143
|
+
)
|
|
144
|
+
return ExecutionContext(
|
|
145
|
+
run_id=self.run_id,
|
|
146
|
+
graph_id=self.graph_id,
|
|
147
|
+
graph_inputs=self.graph_inputs,
|
|
148
|
+
outputs_by_node=self.outputs_by_node,
|
|
149
|
+
services=services,
|
|
150
|
+
logger_factory=self.logger_factory,
|
|
151
|
+
clock=self.clock,
|
|
152
|
+
resume_payload=resume_payload,
|
|
153
|
+
should_run_fn=self.should_run_fn,
|
|
154
|
+
# Back-compat shim for old ctx.mem()
|
|
155
|
+
bound_memory=BoundMemoryAdapter(mem, defaults),
|
|
156
|
+
resume_router=self.resume_router,
|
|
157
|
+
)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from contextvars import ContextVar
|
|
2
|
+
|
|
3
|
+
from aethergraph.services.registry.unified_registry import UnifiedRegistry
|
|
4
|
+
|
|
5
|
+
__singleton_registry: UnifiedRegistry = UnifiedRegistry()
|
|
6
|
+
_current_registry: ContextVar[UnifiedRegistry | None] = ContextVar("ag_registry", default=None)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def set_current_registry(reg: UnifiedRegistry):
|
|
10
|
+
"""Set the current registry in contextvar."""
|
|
11
|
+
_current_registry.set(reg)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def current_registry() -> UnifiedRegistry:
|
|
15
|
+
"""Get the current registry from contextvar, or raise if not set."""
|
|
16
|
+
# first try if services has a registry set
|
|
17
|
+
from .runtime_services import current_services
|
|
18
|
+
|
|
19
|
+
svc = None
|
|
20
|
+
try:
|
|
21
|
+
# get current services and registry from there
|
|
22
|
+
svc = current_services()
|
|
23
|
+
if hasattr(svc, "registry") and svc.registry is not None:
|
|
24
|
+
return svc.registry
|
|
25
|
+
except Exception:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
# otherwise use contextvar
|
|
29
|
+
reg = _current_registry.get()
|
|
30
|
+
if reg is None:
|
|
31
|
+
return __singleton_registry # fallback to singleton if not set in local context
|
|
32
|
+
return reg
|