aethergraph 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +49 -0
- aethergraph/config/__init__.py +0 -0
- aethergraph/config/config.py +121 -0
- aethergraph/config/context.py +16 -0
- aethergraph/config/llm.py +26 -0
- aethergraph/config/loader.py +60 -0
- aethergraph/config/runtime.py +9 -0
- aethergraph/contracts/errors/errors.py +44 -0
- aethergraph/contracts/services/artifacts.py +142 -0
- aethergraph/contracts/services/channel.py +72 -0
- aethergraph/contracts/services/continuations.py +23 -0
- aethergraph/contracts/services/eventbus.py +12 -0
- aethergraph/contracts/services/kv.py +24 -0
- aethergraph/contracts/services/llm.py +17 -0
- aethergraph/contracts/services/mcp.py +22 -0
- aethergraph/contracts/services/memory.py +108 -0
- aethergraph/contracts/services/resume.py +28 -0
- aethergraph/contracts/services/state_stores.py +33 -0
- aethergraph/contracts/services/wakeup.py +28 -0
- aethergraph/core/execution/base_scheduler.py +77 -0
- aethergraph/core/execution/forward_scheduler.py +777 -0
- aethergraph/core/execution/global_scheduler.py +634 -0
- aethergraph/core/execution/retry_policy.py +22 -0
- aethergraph/core/execution/step_forward.py +411 -0
- aethergraph/core/execution/step_result.py +18 -0
- aethergraph/core/execution/wait_types.py +72 -0
- aethergraph/core/graph/graph_builder.py +192 -0
- aethergraph/core/graph/graph_fn.py +219 -0
- aethergraph/core/graph/graph_io.py +67 -0
- aethergraph/core/graph/graph_refs.py +154 -0
- aethergraph/core/graph/graph_spec.py +115 -0
- aethergraph/core/graph/graph_state.py +59 -0
- aethergraph/core/graph/graphify.py +128 -0
- aethergraph/core/graph/interpreter.py +145 -0
- aethergraph/core/graph/node_handle.py +33 -0
- aethergraph/core/graph/node_spec.py +46 -0
- aethergraph/core/graph/node_state.py +63 -0
- aethergraph/core/graph/task_graph.py +747 -0
- aethergraph/core/graph/task_node.py +82 -0
- aethergraph/core/graph/utils.py +37 -0
- aethergraph/core/graph/visualize.py +239 -0
- aethergraph/core/runtime/ad_hoc_context.py +61 -0
- aethergraph/core/runtime/base_service.py +153 -0
- aethergraph/core/runtime/bind_adapter.py +42 -0
- aethergraph/core/runtime/bound_memory.py +69 -0
- aethergraph/core/runtime/execution_context.py +220 -0
- aethergraph/core/runtime/graph_runner.py +349 -0
- aethergraph/core/runtime/lifecycle.py +26 -0
- aethergraph/core/runtime/node_context.py +203 -0
- aethergraph/core/runtime/node_services.py +30 -0
- aethergraph/core/runtime/recovery.py +159 -0
- aethergraph/core/runtime/run_registration.py +33 -0
- aethergraph/core/runtime/runtime_env.py +157 -0
- aethergraph/core/runtime/runtime_registry.py +32 -0
- aethergraph/core/runtime/runtime_services.py +224 -0
- aethergraph/core/runtime/wakeup_watcher.py +40 -0
- aethergraph/core/tools/__init__.py +10 -0
- aethergraph/core/tools/builtins/channel_tools.py +194 -0
- aethergraph/core/tools/builtins/toolset.py +134 -0
- aethergraph/core/tools/toolkit.py +510 -0
- aethergraph/core/tools/waitable.py +109 -0
- aethergraph/plugins/channel/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/console.py +106 -0
- aethergraph/plugins/channel/adapters/file.py +102 -0
- aethergraph/plugins/channel/adapters/slack.py +285 -0
- aethergraph/plugins/channel/adapters/telegram.py +302 -0
- aethergraph/plugins/channel/adapters/webhook.py +104 -0
- aethergraph/plugins/channel/adapters/webui.py +134 -0
- aethergraph/plugins/channel/routes/__init__.py +0 -0
- aethergraph/plugins/channel/routes/console_routes.py +86 -0
- aethergraph/plugins/channel/routes/slack_routes.py +49 -0
- aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
- aethergraph/plugins/channel/routes/webui_routes.py +136 -0
- aethergraph/plugins/channel/utils/__init__.py +0 -0
- aethergraph/plugins/channel/utils/slack_utils.py +278 -0
- aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
- aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
- aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
- aethergraph/plugins/mcp/fs_server.py +128 -0
- aethergraph/plugins/mcp/http_server.py +101 -0
- aethergraph/plugins/mcp/ws_server.py +180 -0
- aethergraph/plugins/net/http.py +10 -0
- aethergraph/plugins/utils/data_io.py +359 -0
- aethergraph/runner/__init__.py +5 -0
- aethergraph/runtime/__init__.py +62 -0
- aethergraph/server/__init__.py +3 -0
- aethergraph/server/app_factory.py +84 -0
- aethergraph/server/start.py +122 -0
- aethergraph/services/__init__.py +10 -0
- aethergraph/services/artifacts/facade.py +284 -0
- aethergraph/services/artifacts/factory.py +35 -0
- aethergraph/services/artifacts/fs_store.py +656 -0
- aethergraph/services/artifacts/jsonl_index.py +123 -0
- aethergraph/services/artifacts/paths.py +23 -0
- aethergraph/services/artifacts/sqlite_index.py +209 -0
- aethergraph/services/artifacts/utils.py +124 -0
- aethergraph/services/auth/dev.py +16 -0
- aethergraph/services/channel/channel_bus.py +293 -0
- aethergraph/services/channel/factory.py +44 -0
- aethergraph/services/channel/session.py +511 -0
- aethergraph/services/channel/wait_helpers.py +57 -0
- aethergraph/services/clock/clock.py +9 -0
- aethergraph/services/container/default_container.py +320 -0
- aethergraph/services/continuations/continuation.py +56 -0
- aethergraph/services/continuations/factory.py +34 -0
- aethergraph/services/continuations/stores/fs_store.py +264 -0
- aethergraph/services/continuations/stores/inmem_store.py +95 -0
- aethergraph/services/eventbus/inmem.py +21 -0
- aethergraph/services/features/static.py +10 -0
- aethergraph/services/kv/ephemeral.py +90 -0
- aethergraph/services/kv/factory.py +27 -0
- aethergraph/services/kv/layered.py +41 -0
- aethergraph/services/kv/sqlite_kv.py +128 -0
- aethergraph/services/llm/factory.py +157 -0
- aethergraph/services/llm/generic_client.py +542 -0
- aethergraph/services/llm/providers.py +3 -0
- aethergraph/services/llm/service.py +105 -0
- aethergraph/services/logger/base.py +36 -0
- aethergraph/services/logger/compat.py +50 -0
- aethergraph/services/logger/formatters.py +106 -0
- aethergraph/services/logger/std.py +203 -0
- aethergraph/services/mcp/helpers.py +23 -0
- aethergraph/services/mcp/http_client.py +70 -0
- aethergraph/services/mcp/mcp_tools.py +21 -0
- aethergraph/services/mcp/registry.py +14 -0
- aethergraph/services/mcp/service.py +100 -0
- aethergraph/services/mcp/stdio_client.py +70 -0
- aethergraph/services/mcp/ws_client.py +115 -0
- aethergraph/services/memory/bound.py +106 -0
- aethergraph/services/memory/distillers/episode.py +116 -0
- aethergraph/services/memory/distillers/rolling.py +74 -0
- aethergraph/services/memory/facade.py +633 -0
- aethergraph/services/memory/factory.py +78 -0
- aethergraph/services/memory/hotlog_kv.py +27 -0
- aethergraph/services/memory/indices.py +74 -0
- aethergraph/services/memory/io_helpers.py +72 -0
- aethergraph/services/memory/persist_fs.py +40 -0
- aethergraph/services/memory/resolver.py +152 -0
- aethergraph/services/metering/noop.py +4 -0
- aethergraph/services/prompts/file_store.py +41 -0
- aethergraph/services/rag/chunker.py +29 -0
- aethergraph/services/rag/facade.py +593 -0
- aethergraph/services/rag/index/base.py +27 -0
- aethergraph/services/rag/index/faiss_index.py +121 -0
- aethergraph/services/rag/index/sqlite_index.py +134 -0
- aethergraph/services/rag/index_factory.py +52 -0
- aethergraph/services/rag/parsers/md.py +7 -0
- aethergraph/services/rag/parsers/pdf.py +14 -0
- aethergraph/services/rag/parsers/txt.py +7 -0
- aethergraph/services/rag/utils/hybrid.py +39 -0
- aethergraph/services/rag/utils/make_fs_key.py +62 -0
- aethergraph/services/redactor/simple.py +16 -0
- aethergraph/services/registry/key_parsing.py +44 -0
- aethergraph/services/registry/registry_key.py +19 -0
- aethergraph/services/registry/unified_registry.py +185 -0
- aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
- aethergraph/services/resume/router.py +73 -0
- aethergraph/services/schedulers/registry.py +41 -0
- aethergraph/services/secrets/base.py +7 -0
- aethergraph/services/secrets/env.py +8 -0
- aethergraph/services/state_stores/externalize.py +135 -0
- aethergraph/services/state_stores/graph_observer.py +131 -0
- aethergraph/services/state_stores/json_store.py +67 -0
- aethergraph/services/state_stores/resume_policy.py +119 -0
- aethergraph/services/state_stores/serialize.py +249 -0
- aethergraph/services/state_stores/utils.py +91 -0
- aethergraph/services/state_stores/validate.py +78 -0
- aethergraph/services/tracing/noop.py +18 -0
- aethergraph/services/waits/wait_registry.py +91 -0
- aethergraph/services/wakeup/memory_queue.py +57 -0
- aethergraph/services/wakeup/scanner_producer.py +56 -0
- aethergraph/services/wakeup/worker.py +31 -0
- aethergraph/tools/__init__.py +25 -0
- aethergraph/utils/optdeps.py +8 -0
- aethergraph-0.1.0a1.dist-info/METADATA +410 -0
- aethergraph-0.1.0a1.dist-info/RECORD +182 -0
- aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
- aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
- aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
- aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
- aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
4
|
+
import re
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
# Prefer packaging for correct PEP 440 / pre-release ordering
|
|
10
|
+
from packaging.version import Version
|
|
11
|
+
|
|
12
|
+
_has_packaging = True
|
|
13
|
+
except Exception:
|
|
14
|
+
_has_packaging = False
|
|
15
|
+
|
|
16
|
+
from .key_parsing import parse_ref
|
|
17
|
+
from .registry_key import NS, Key
|
|
18
|
+
|
|
19
|
+
# allow storing either the object, or a factory that returns the object on first use
|
|
20
|
+
RegistryObject = Any
|
|
21
|
+
RegistryFactory = Callable[[], Any]
|
|
22
|
+
RegistryValue = RegistryObject | RegistryFactory
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class UnifiedRegistry:
|
|
26
|
+
"""
|
|
27
|
+
Runtime-only registry: (nspace, name, version) -> object (or lazy factory).
|
|
28
|
+
Maintains a 'latest' pointer per (nspace, name).
|
|
29
|
+
|
|
30
|
+
Thread-safe for concurrent get/register operations.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, *, allow_overwrite: bool = True):
|
|
34
|
+
self._store: dict[tuple[str, str], dict[str, RegistryValue]] = {}
|
|
35
|
+
self._latest: dict[tuple[str, str], str] = {}
|
|
36
|
+
self._aliases: dict[tuple[str, str], dict[str, str]] = {} # (ns,name) -> alias -> version
|
|
37
|
+
self._lock = threading.RLock()
|
|
38
|
+
self._allow_overwrite = allow_overwrite
|
|
39
|
+
|
|
40
|
+
# ---------- registration ----------
|
|
41
|
+
|
|
42
|
+
def register(self, *, nspace: str, name: str, version: str, obj: RegistryValue) -> None:
|
|
43
|
+
if nspace not in NS:
|
|
44
|
+
raise ValueError(f"Unknown namespace: {nspace}")
|
|
45
|
+
key = (nspace, name)
|
|
46
|
+
with self._lock:
|
|
47
|
+
versions = self._store.setdefault(key, {})
|
|
48
|
+
if (version in versions) and not self._allow_overwrite:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"{nspace}:{name}@{version} already registered and overwrite disabled"
|
|
51
|
+
)
|
|
52
|
+
versions[version] = obj
|
|
53
|
+
self._latest[key] = self._pick_latest(versions.keys())
|
|
54
|
+
|
|
55
|
+
def register_latest(
|
|
56
|
+
self, *, nspace: str, name: str, obj: RegistryValue, version: str = "0.0.0"
|
|
57
|
+
) -> None:
|
|
58
|
+
# Explicit version anyway; also marks latest via _pick_latest
|
|
59
|
+
self.register(nspace=nspace, name=name, version=version, obj=obj)
|
|
60
|
+
|
|
61
|
+
def alias(self, *, nspace: str, name: str, tag: str, to_version: str) -> None:
|
|
62
|
+
"""Define tag aliases like 'stable', 'canary' mapping to a concrete version."""
|
|
63
|
+
key = (nspace, name)
|
|
64
|
+
with self._lock:
|
|
65
|
+
if key not in self._store or to_version not in self._store[key]:
|
|
66
|
+
raise KeyError(f"Cannot alias to missing version: {nspace}:{name}@{to_version}")
|
|
67
|
+
m = self._aliases.setdefault(key, {})
|
|
68
|
+
m[tag] = to_version
|
|
69
|
+
|
|
70
|
+
# ---------- resolve ----------
|
|
71
|
+
|
|
72
|
+
def get(self, ref: str | Key) -> Any:
|
|
73
|
+
key = parse_ref(ref) if isinstance(ref, str) else ref
|
|
74
|
+
k = (key.nspace, key.name)
|
|
75
|
+
with self._lock:
|
|
76
|
+
versions = self._store.get(k)
|
|
77
|
+
if not versions:
|
|
78
|
+
raise KeyError(f"Not found: {key.canonical()}")
|
|
79
|
+
|
|
80
|
+
# resolve version: explicit → alias → latest
|
|
81
|
+
ver = key.version
|
|
82
|
+
ver = self._aliases.get(k, {}).get(ver, ver) if ver else self._latest.get(k)
|
|
83
|
+
|
|
84
|
+
if ver not in versions:
|
|
85
|
+
raise KeyError(f"Version not found: {key.nspace}:{key.name}@{ver}")
|
|
86
|
+
|
|
87
|
+
val = versions[ver]
|
|
88
|
+
# Materialize factories lazily (and cache)
|
|
89
|
+
if callable(val):
|
|
90
|
+
obj = val()
|
|
91
|
+
versions[ver] = obj
|
|
92
|
+
return obj
|
|
93
|
+
return val
|
|
94
|
+
|
|
95
|
+
# ---------- listing / admin ----------
|
|
96
|
+
|
|
97
|
+
def list(self, nspace: str | None = None) -> dict[str, str]:
|
|
98
|
+
"""Return { 'ns:name': '<latest_version>' } optionally filtered."""
|
|
99
|
+
out: dict[str, str] = {}
|
|
100
|
+
with self._lock:
|
|
101
|
+
for (ns, name), _ in self._store.items():
|
|
102
|
+
if nspace and ns != nspace:
|
|
103
|
+
continue
|
|
104
|
+
out[f"{ns}:{name}"] = self._latest.get((ns, name), "unknown")
|
|
105
|
+
return out
|
|
106
|
+
|
|
107
|
+
def list_versions(self, *, nspace: str, name: str) -> Iterable[str]:
|
|
108
|
+
k = (nspace, name)
|
|
109
|
+
with self._lock:
|
|
110
|
+
return tuple(sorted(self._store.get(k, {}).keys(), key=self._semver_sort_key))
|
|
111
|
+
|
|
112
|
+
def get_aliases(self, *, nspace: str, name: str) -> Mapping[str, str]:
|
|
113
|
+
with self._lock:
|
|
114
|
+
return dict(self._aliases.get((nspace, name), {}))
|
|
115
|
+
|
|
116
|
+
def unregister(self, *, nspace: str, name: str, version: str | None = None) -> None:
|
|
117
|
+
with self._lock:
|
|
118
|
+
k = (nspace, name)
|
|
119
|
+
if k not in self._store:
|
|
120
|
+
return
|
|
121
|
+
if version is None:
|
|
122
|
+
# remove all versions and aliases
|
|
123
|
+
self._store.pop(k, None)
|
|
124
|
+
self._latest.pop(k, None)
|
|
125
|
+
self._aliases.pop(k, None)
|
|
126
|
+
return
|
|
127
|
+
vers = self._store[k]
|
|
128
|
+
vers.pop(version, None)
|
|
129
|
+
# drop aliases pointing to this version
|
|
130
|
+
if k in self._aliases:
|
|
131
|
+
for tag, v in list(self._aliases[k].items()):
|
|
132
|
+
if v == version:
|
|
133
|
+
self._aliases[k].pop(tag, None)
|
|
134
|
+
# recompute latest
|
|
135
|
+
if vers:
|
|
136
|
+
self._latest[k] = self._pick_latest(vers.keys())
|
|
137
|
+
else:
|
|
138
|
+
self._store.pop(k, None)
|
|
139
|
+
self._latest.pop(k, None)
|
|
140
|
+
self._aliases.pop(k, None)
|
|
141
|
+
|
|
142
|
+
def clear(self) -> None:
|
|
143
|
+
with self._lock:
|
|
144
|
+
self._store.clear()
|
|
145
|
+
self._latest.clear()
|
|
146
|
+
self._aliases.clear()
|
|
147
|
+
|
|
148
|
+
# ---------- typed getters ----------
|
|
149
|
+
|
|
150
|
+
def get_tool(self, name: str, version: str | None = None) -> Any:
|
|
151
|
+
return self.get(Key(nspace="tool", name=name, version=version))
|
|
152
|
+
|
|
153
|
+
def get_graph(self, name: str, version: str | None = None) -> Any:
|
|
154
|
+
return self.get(Key(nspace="graph", name=name, version=version))
|
|
155
|
+
|
|
156
|
+
def get_graphfn(self, name: str, version: str | None = None) -> Any:
|
|
157
|
+
return self.get(Key(nspace="graphfn", name=name, version=version))
|
|
158
|
+
|
|
159
|
+
def get_agent(self, name: str, version: str | None = None) -> Any:
|
|
160
|
+
return self.get(Key(nspace="agent", name=name, version=version))
|
|
161
|
+
|
|
162
|
+
# ---------- helpers ----------
|
|
163
|
+
|
|
164
|
+
@staticmethod
|
|
165
|
+
def _semver_sort_key(v: str):
|
|
166
|
+
if _has_packaging:
|
|
167
|
+
try:
|
|
168
|
+
return Version(v)
|
|
169
|
+
except Exception:
|
|
170
|
+
# Fall back to naive
|
|
171
|
+
pass
|
|
172
|
+
# naive: split on dots and dashes, integers first
|
|
173
|
+
parts = []
|
|
174
|
+
for token in re.split(r"[.\-+]", v):
|
|
175
|
+
try:
|
|
176
|
+
parts.append((0, int(token)))
|
|
177
|
+
except ValueError:
|
|
178
|
+
parts.append((1, token))
|
|
179
|
+
return tuple(parts)
|
|
180
|
+
|
|
181
|
+
def _pick_latest(self, versions: Iterable[str]) -> str:
|
|
182
|
+
vs = list(versions)
|
|
183
|
+
if not vs:
|
|
184
|
+
return "0.0.0"
|
|
185
|
+
return sorted(vs, key=self._semver_sort_key)[-1]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import hmac
|
|
3
|
+
from logging import getLogger
|
|
4
|
+
|
|
5
|
+
from aethergraph.contracts.services.continuations import AsyncContinuationStore
|
|
6
|
+
from aethergraph.contracts.services.resume import ResumeBus
|
|
7
|
+
from aethergraph.services.schedulers.registry import SchedulerRegistry
|
|
8
|
+
|
|
9
|
+
log = getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MultiSchedulerResumeBus(ResumeBus):
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
registry: SchedulerRegistry,
|
|
17
|
+
store: AsyncContinuationStore,
|
|
18
|
+
delete_after_resume: bool = True,
|
|
19
|
+
logger=None,
|
|
20
|
+
):
|
|
21
|
+
self.registry = registry
|
|
22
|
+
self.store = store
|
|
23
|
+
self.delete_after_resume = delete_after_resume
|
|
24
|
+
self.logger = logger or log
|
|
25
|
+
|
|
26
|
+
async def enqueue_resume(self, *, run_id: str, node_id: str, token: str, payload: dict) -> None:
|
|
27
|
+
cont = await self.store.get(run_id, node_id)
|
|
28
|
+
if not cont or not hmac.compare_digest(cont.token, token):
|
|
29
|
+
self.logger.warning(
|
|
30
|
+
"[multi-resume-bus] invalid continuation/token for %s/%s", run_id, node_id
|
|
31
|
+
)
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
sched = self.registry.get(run_id)
|
|
35
|
+
if not sched:
|
|
36
|
+
self.logger.warning("[multi-resume-bus] no scheduler for run_id=%s", run_id)
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
loop = getattr(sched, "loop", None)
|
|
40
|
+
if loop is None:
|
|
41
|
+
self.logger.error(
|
|
42
|
+
"[multi-resume-bus] scheduler.loop is not set yet for run_id=%s", run_id
|
|
43
|
+
)
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
# Always post to the scheduler's loop
|
|
47
|
+
fut = asyncio.run_coroutine_threadsafe(
|
|
48
|
+
sched.on_resume_event(run_id, node_id, payload), loop
|
|
49
|
+
)
|
|
50
|
+
try:
|
|
51
|
+
await asyncio.wrap_future(fut)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
self.logger.error("[multi-resume-bus] dispatch failed: %s", e, exc_info=True)
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
if self.delete_after_resume:
|
|
57
|
+
try:
|
|
58
|
+
await self.store.delete(run_id, node_id)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
self.logger.warning(
|
|
61
|
+
f"[multi-resume-bus] failed to delete continuation for {run_id}/{node_id}: {e}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
sched.post_resume_event_threadsafe(run_id, node_id, payload)
|
|
65
|
+
return
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hmac
|
|
4
|
+
from logging import getLogger
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from jsonschema import ValidationError, validate
|
|
8
|
+
|
|
9
|
+
from aethergraph.contracts.services.continuations import AsyncContinuationStore
|
|
10
|
+
from aethergraph.contracts.services.resume import ResumeBus
|
|
11
|
+
|
|
12
|
+
log = getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ResumeRouter:
|
|
16
|
+
def __init__(
|
|
17
|
+
self, *, store: AsyncContinuationStore, runner: ResumeBus, logger=None, wait_registry=None
|
|
18
|
+
):
|
|
19
|
+
self.store = store
|
|
20
|
+
self.runner = runner
|
|
21
|
+
self.logger = logger or log
|
|
22
|
+
self.waits = wait_registry
|
|
23
|
+
|
|
24
|
+
async def resume(self, run_id: str, node_id: str, token: str, payload: dict[str, Any]) -> None:
|
|
25
|
+
cont = await self.store.get(run_id, node_id)
|
|
26
|
+
if not cont:
|
|
27
|
+
self.logger.error("No continuation for %s/%s", run_id, node_id)
|
|
28
|
+
raise PermissionError("Invalid continuation or token")
|
|
29
|
+
|
|
30
|
+
if not hmac.compare_digest(token, cont.token):
|
|
31
|
+
self.logger.error("Invalid token for %s/%s", run_id, node_id)
|
|
32
|
+
raise PermissionError("Invalid continuation or token")
|
|
33
|
+
|
|
34
|
+
# Merge continuation payload (setup-time) with incoming resume payload (adapter-time)
|
|
35
|
+
base_payload = getattr(cont, "payload", None) or {}
|
|
36
|
+
full_payload: dict[str, Any] = {**base_payload, **(payload or {})}
|
|
37
|
+
|
|
38
|
+
# Cooperative fast path
|
|
39
|
+
if self.waits and token in getattr(self.waits, "_futs", {}):
|
|
40
|
+
try:
|
|
41
|
+
self.waits.resolve(token, full_payload)
|
|
42
|
+
self.logger.info("Resolved cooperative wait for %s/%s", run_id, node_id)
|
|
43
|
+
try:
|
|
44
|
+
await self.store.delete(run_id, node_id)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
self.logger.warning(
|
|
47
|
+
f"Failed to delete continuation after cooperative resolution: {e}"
|
|
48
|
+
)
|
|
49
|
+
return
|
|
50
|
+
except Exception as e:
|
|
51
|
+
self.logger.error(
|
|
52
|
+
"Error resolving cooperative wait for %s/%s: %s",
|
|
53
|
+
run_id,
|
|
54
|
+
node_id,
|
|
55
|
+
e,
|
|
56
|
+
exc_info=True,
|
|
57
|
+
)
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
# Schema validate
|
|
61
|
+
if cont.resume_schema:
|
|
62
|
+
try:
|
|
63
|
+
validate(
|
|
64
|
+
instance=payload, schema=cont.resume_schema
|
|
65
|
+
) # validate incoming payload only
|
|
66
|
+
except ValidationError as e:
|
|
67
|
+
self.logger.error("Resume payload validation error: %s", e.message)
|
|
68
|
+
raise ValueError(f"Invalid resume payload: {e.message}") from e
|
|
69
|
+
|
|
70
|
+
# Hand off to scheduler bus
|
|
71
|
+
await self.runner.enqueue_resume(
|
|
72
|
+
run_id=run_id, node_id=node_id, token=token, payload=full_payload
|
|
73
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from contextlib import asynccontextmanager
|
|
2
|
+
import threading
|
|
3
|
+
|
|
4
|
+
from aethergraph.core.execution.global_scheduler import GlobalForwardScheduler
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SchedulerRegistry:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self._by_run: dict[str, GlobalForwardScheduler] = {}
|
|
10
|
+
self._lock = threading.RLock()
|
|
11
|
+
|
|
12
|
+
def register(self, run_id: str, scheduler: GlobalForwardScheduler) -> None:
|
|
13
|
+
with self._lock:
|
|
14
|
+
self._by_run[run_id] = scheduler
|
|
15
|
+
|
|
16
|
+
def unregister(self, run_id: str) -> None:
|
|
17
|
+
with self._lock:
|
|
18
|
+
self._by_run.pop(run_id, None)
|
|
19
|
+
|
|
20
|
+
def get(self, run_id: str) -> GlobalForwardScheduler | None:
|
|
21
|
+
with self._lock:
|
|
22
|
+
return self._by_run.get(run_id)
|
|
23
|
+
|
|
24
|
+
def list_run_ids(self) -> dict[str, GlobalForwardScheduler]:
|
|
25
|
+
with self._lock:
|
|
26
|
+
return dict(self._by_run)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@asynccontextmanager
|
|
30
|
+
async def registered_scheduler(registry: SchedulerRegistry, run_id: str, scheduler):
|
|
31
|
+
registry.register(run_id, scheduler)
|
|
32
|
+
try:
|
|
33
|
+
yield
|
|
34
|
+
finally:
|
|
35
|
+
registry.unregister(run_id)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
"""# Example usage:
|
|
39
|
+
async with registered_scheduler(SCHEDULERS, run_id, scheduler):
|
|
40
|
+
await scheduler.run()
|
|
41
|
+
"""
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import io
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _sha256_bytes(data: bytes) -> str:
|
|
10
|
+
return hashlib.sha256(data).hexdigest()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _maybe_numpy_to_bytes(obj: Any) -> bytes | None:
|
|
14
|
+
try:
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
if isinstance(obj, np.ndarray):
|
|
18
|
+
buf = io.BytesIO()
|
|
19
|
+
# .npy
|
|
20
|
+
import numpy as _np
|
|
21
|
+
|
|
22
|
+
_np.save(buf, obj, allow_pickle=False)
|
|
23
|
+
return buf.getvalue()
|
|
24
|
+
except Exception:
|
|
25
|
+
pass
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _maybe_torch_to_bytes(obj: Any) -> bytes | None:
|
|
30
|
+
try:
|
|
31
|
+
import torch
|
|
32
|
+
|
|
33
|
+
if torch.is_tensor(obj):
|
|
34
|
+
buf = io.BytesIO()
|
|
35
|
+
torch.save(obj, buf) # binary, portable within torch
|
|
36
|
+
return buf.getvalue()
|
|
37
|
+
except Exception:
|
|
38
|
+
pass
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _maybe_json_bytes(obj: Any) -> bytes | None:
|
|
43
|
+
# Only if JSON-serializable (pure)
|
|
44
|
+
try:
|
|
45
|
+
payload = json.dumps(obj, ensure_ascii=False).encode("utf-8")
|
|
46
|
+
return payload
|
|
47
|
+
except Exception:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _pickle_fallback(obj: Any) -> bytes | None:
|
|
52
|
+
try:
|
|
53
|
+
import pickle
|
|
54
|
+
|
|
55
|
+
return pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
|
|
56
|
+
except Exception:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def externalize_to_artifact(
|
|
61
|
+
obj: Any,
|
|
62
|
+
*,
|
|
63
|
+
run_id: str,
|
|
64
|
+
graph_id: str,
|
|
65
|
+
node_id: str,
|
|
66
|
+
tool_name: str | None,
|
|
67
|
+
tool_version: str | None,
|
|
68
|
+
artifacts, # AsyncArtifactStore
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
"""
|
|
71
|
+
Try to persist obj into artifact store; return a standard __aether_ref__ dict.
|
|
72
|
+
Priority: bytes | numpy | torch | json | pickle
|
|
73
|
+
"""
|
|
74
|
+
binary: bytes | None = None
|
|
75
|
+
mime = "application/octet-stream"
|
|
76
|
+
planned_ext = ".bin"
|
|
77
|
+
|
|
78
|
+
# Already bytes?
|
|
79
|
+
if isinstance(obj, bytes | bytearray):
|
|
80
|
+
binary = bytes(obj)
|
|
81
|
+
|
|
82
|
+
if binary is None:
|
|
83
|
+
binary = _maybe_numpy_to_bytes(obj)
|
|
84
|
+
if binary is not None:
|
|
85
|
+
mime = "application/x-npy"
|
|
86
|
+
planned_ext = ".npy"
|
|
87
|
+
|
|
88
|
+
if binary is None:
|
|
89
|
+
binary = _maybe_torch_to_bytes(obj)
|
|
90
|
+
if binary is not None:
|
|
91
|
+
mime = "application/x-pytorch"
|
|
92
|
+
planned_ext = ".pt"
|
|
93
|
+
|
|
94
|
+
if binary is None:
|
|
95
|
+
json_bytes = _maybe_json_bytes(obj)
|
|
96
|
+
if json_bytes is not None:
|
|
97
|
+
# Use .json for nicer preview; still count as binary save
|
|
98
|
+
binary = json_bytes
|
|
99
|
+
mime = "application/json"
|
|
100
|
+
planned_ext = ".json"
|
|
101
|
+
|
|
102
|
+
if binary is None:
|
|
103
|
+
binary = _pickle_fallback(obj)
|
|
104
|
+
if binary is not None:
|
|
105
|
+
mime = "application/x-pickle"
|
|
106
|
+
planned_ext = ".pkl"
|
|
107
|
+
|
|
108
|
+
if binary is None:
|
|
109
|
+
# Give up: write a tiny JSON marker
|
|
110
|
+
a = await artifacts.save_json(
|
|
111
|
+
{"note": "unexternalizable-object", "repr": repr(obj)[:200]}, suggested_uri=None
|
|
112
|
+
)
|
|
113
|
+
return {
|
|
114
|
+
"__aether_ref__": a.uri,
|
|
115
|
+
"mime": "application/json",
|
|
116
|
+
"sha256": a.sha256,
|
|
117
|
+
"kind": a.kind,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
sha = _sha256_bytes(binary)
|
|
121
|
+
# Use staged writer for atomicity FIXME: this write causes async loop errors, disable externalize for now
|
|
122
|
+
async with await artifacts.open_writer(
|
|
123
|
+
kind="blob",
|
|
124
|
+
run_id=run_id,
|
|
125
|
+
graph_id=graph_id,
|
|
126
|
+
node_id=node_id,
|
|
127
|
+
tool_name=tool_name or "externalize",
|
|
128
|
+
tool_version=tool_version or "0.1.0",
|
|
129
|
+
planned_ext=planned_ext,
|
|
130
|
+
pin=True,
|
|
131
|
+
) as w:
|
|
132
|
+
await w.write(binary) # FS wrapper should support .write
|
|
133
|
+
a = await w.commit(mime=mime, sha256=sha)
|
|
134
|
+
|
|
135
|
+
return {"__aether_ref__": a.uri, "mime": mime, "sha256": sha, "kind": a.kind}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# aethergraph/persist/observer.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
from aethergraph.contracts.services.artifacts import AsyncArtifactStore
|
|
7
|
+
from aethergraph.contracts.services.state_stores import GraphStateStore, StateEvent
|
|
8
|
+
from aethergraph.core.graph.task_node import NodeStatus
|
|
9
|
+
from aethergraph.services.state_stores.serialize import _jsonish_outputs_with_refs
|
|
10
|
+
from aethergraph.services.state_stores.utils import snapshot_from_graph
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PersistenceObserver:
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
*,
|
|
17
|
+
store: GraphStateStore,
|
|
18
|
+
artifact_store: AsyncArtifactStore,
|
|
19
|
+
spec_hash: str,
|
|
20
|
+
snapshot_every: int = 50,
|
|
21
|
+
min_interval_s: float = 5.0,
|
|
22
|
+
):
|
|
23
|
+
self.store = store
|
|
24
|
+
self.artifact_store = artifact_store
|
|
25
|
+
self.spec_hash = spec_hash
|
|
26
|
+
self.snapshot_every = snapshot_every
|
|
27
|
+
self.min_interval_s = min_interval_s
|
|
28
|
+
self._event_count = 0
|
|
29
|
+
self._last_snap_ts = 0.0
|
|
30
|
+
|
|
31
|
+
async def on_node_status_change(self, runtime_node):
|
|
32
|
+
g = runtime_node._parent_graph
|
|
33
|
+
ev = StateEvent(
|
|
34
|
+
run_id=g.state.run_id or "unknown",
|
|
35
|
+
graph_id=g.graph_id,
|
|
36
|
+
rev=g.state.rev,
|
|
37
|
+
ts=time.time(),
|
|
38
|
+
kind="STATUS",
|
|
39
|
+
payload={
|
|
40
|
+
"node_id": runtime_node.node_id,
|
|
41
|
+
"status": runtime_node.state.status.name
|
|
42
|
+
if isinstance(runtime_node.state.status, NodeStatus)
|
|
43
|
+
else str(runtime_node.state.status),
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
await self.store.append_event(ev)
|
|
47
|
+
await self._maybe_snapshot(g)
|
|
48
|
+
|
|
49
|
+
async def on_node_output_change(self, runtime_node):
|
|
50
|
+
g = runtime_node._parent_graph
|
|
51
|
+
# make outputs JSON-safe for events (no externalization)
|
|
52
|
+
safe_outputs = await _jsonish_outputs_with_refs(
|
|
53
|
+
outputs=getattr(runtime_node.state, "outputs", None),
|
|
54
|
+
run_id=g.state.run_id or "unknown",
|
|
55
|
+
graph_id=g.graph_id,
|
|
56
|
+
node_id=runtime_node.node_id,
|
|
57
|
+
tool_name=getattr(runtime_node.state, "tool_name", None)
|
|
58
|
+
or getattr(getattr(runtime_node, "spec", None), "tool_name", None),
|
|
59
|
+
tool_version=getattr(runtime_node.state, "tool_version", None)
|
|
60
|
+
or getattr(getattr(runtime_node, "spec", None), "tool_version", None),
|
|
61
|
+
artifacts=None, # ← keep events self-contained
|
|
62
|
+
allow_externalize=False, # ← do not write artifacts from events
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
ev = StateEvent(
|
|
66
|
+
run_id=g.state.run_id or "unknown",
|
|
67
|
+
graph_id=g.graph_id,
|
|
68
|
+
rev=g.state.rev,
|
|
69
|
+
ts=time.time(),
|
|
70
|
+
kind="OUTPUT",
|
|
71
|
+
payload={
|
|
72
|
+
"node_id": runtime_node.node_id,
|
|
73
|
+
"outputs": safe_outputs or {}, # ✅ JSON-safe
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
await self.store.append_event(ev)
|
|
77
|
+
await self._maybe_snapshot(g)
|
|
78
|
+
|
|
79
|
+
async def on_inputs_bound(self, graph):
|
|
80
|
+
# also sanitize inputs for events (in case user passed non-JSON)
|
|
81
|
+
safe_inputs = await _jsonish_outputs_with_refs(
|
|
82
|
+
outputs=getattr(graph.state, "_bound_inputs", None),
|
|
83
|
+
run_id=graph.state.run_id or "unknown",
|
|
84
|
+
graph_id=graph.graph_id,
|
|
85
|
+
node_id="__graph_inputs__",
|
|
86
|
+
tool_name=None,
|
|
87
|
+
tool_version=None,
|
|
88
|
+
artifacts=None,
|
|
89
|
+
allow_externalize=False,
|
|
90
|
+
)
|
|
91
|
+
ev = StateEvent(
|
|
92
|
+
run_id=graph.state.run_id or "unknown",
|
|
93
|
+
graph_id=graph.graph_id,
|
|
94
|
+
rev=graph.state.rev,
|
|
95
|
+
ts=time.time(),
|
|
96
|
+
kind="INPUTS_BOUND",
|
|
97
|
+
payload={"inputs": safe_inputs or {}}, # JSON-safe
|
|
98
|
+
)
|
|
99
|
+
await self.store.append_event(ev)
|
|
100
|
+
await self._maybe_snapshot(graph)
|
|
101
|
+
|
|
102
|
+
async def on_patch_applied(self, graph, patch):
|
|
103
|
+
ev = StateEvent(
|
|
104
|
+
run_id=graph.state.run_id or "unknown",
|
|
105
|
+
graph_id=graph.graph_id,
|
|
106
|
+
rev=graph.state.rev,
|
|
107
|
+
ts=time.time(),
|
|
108
|
+
kind="PATCH",
|
|
109
|
+
payload={"patch": patch.__dict__},
|
|
110
|
+
)
|
|
111
|
+
await self.store.append_event(ev)
|
|
112
|
+
await self._maybe_snapshot(graph)
|
|
113
|
+
|
|
114
|
+
async def _maybe_snapshot(self, graph):
|
|
115
|
+
self._event_count += 1
|
|
116
|
+
now = time.time()
|
|
117
|
+
if (self._event_count % self.snapshot_every == 0) and (
|
|
118
|
+
now - self._last_snap_ts >= self.min_interval_s
|
|
119
|
+
):
|
|
120
|
+
snap = await snapshot_from_graph(
|
|
121
|
+
run_id=graph.state.run_id or "unknown",
|
|
122
|
+
graph_id=graph.graph_id,
|
|
123
|
+
rev=graph.state.rev,
|
|
124
|
+
spec_hash=self.spec_hash,
|
|
125
|
+
state_obj=graph.state,
|
|
126
|
+
artifacts=self.artifact_store,
|
|
127
|
+
allow_externalize=False, # keep snapshots JSON-only (opaque refs)
|
|
128
|
+
include_wait_spec=True,
|
|
129
|
+
)
|
|
130
|
+
await self.store.save_snapshot(snap)
|
|
131
|
+
self._last_snap_ts = now
|