aethergraph 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +49 -0
- aethergraph/config/__init__.py +0 -0
- aethergraph/config/config.py +121 -0
- aethergraph/config/context.py +16 -0
- aethergraph/config/llm.py +26 -0
- aethergraph/config/loader.py +60 -0
- aethergraph/config/runtime.py +9 -0
- aethergraph/contracts/errors/errors.py +44 -0
- aethergraph/contracts/services/artifacts.py +142 -0
- aethergraph/contracts/services/channel.py +72 -0
- aethergraph/contracts/services/continuations.py +23 -0
- aethergraph/contracts/services/eventbus.py +12 -0
- aethergraph/contracts/services/kv.py +24 -0
- aethergraph/contracts/services/llm.py +17 -0
- aethergraph/contracts/services/mcp.py +22 -0
- aethergraph/contracts/services/memory.py +108 -0
- aethergraph/contracts/services/resume.py +28 -0
- aethergraph/contracts/services/state_stores.py +33 -0
- aethergraph/contracts/services/wakeup.py +28 -0
- aethergraph/core/execution/base_scheduler.py +77 -0
- aethergraph/core/execution/forward_scheduler.py +777 -0
- aethergraph/core/execution/global_scheduler.py +634 -0
- aethergraph/core/execution/retry_policy.py +22 -0
- aethergraph/core/execution/step_forward.py +411 -0
- aethergraph/core/execution/step_result.py +18 -0
- aethergraph/core/execution/wait_types.py +72 -0
- aethergraph/core/graph/graph_builder.py +192 -0
- aethergraph/core/graph/graph_fn.py +219 -0
- aethergraph/core/graph/graph_io.py +67 -0
- aethergraph/core/graph/graph_refs.py +154 -0
- aethergraph/core/graph/graph_spec.py +115 -0
- aethergraph/core/graph/graph_state.py +59 -0
- aethergraph/core/graph/graphify.py +128 -0
- aethergraph/core/graph/interpreter.py +145 -0
- aethergraph/core/graph/node_handle.py +33 -0
- aethergraph/core/graph/node_spec.py +46 -0
- aethergraph/core/graph/node_state.py +63 -0
- aethergraph/core/graph/task_graph.py +747 -0
- aethergraph/core/graph/task_node.py +82 -0
- aethergraph/core/graph/utils.py +37 -0
- aethergraph/core/graph/visualize.py +239 -0
- aethergraph/core/runtime/ad_hoc_context.py +61 -0
- aethergraph/core/runtime/base_service.py +153 -0
- aethergraph/core/runtime/bind_adapter.py +42 -0
- aethergraph/core/runtime/bound_memory.py +69 -0
- aethergraph/core/runtime/execution_context.py +220 -0
- aethergraph/core/runtime/graph_runner.py +349 -0
- aethergraph/core/runtime/lifecycle.py +26 -0
- aethergraph/core/runtime/node_context.py +203 -0
- aethergraph/core/runtime/node_services.py +30 -0
- aethergraph/core/runtime/recovery.py +159 -0
- aethergraph/core/runtime/run_registration.py +33 -0
- aethergraph/core/runtime/runtime_env.py +157 -0
- aethergraph/core/runtime/runtime_registry.py +32 -0
- aethergraph/core/runtime/runtime_services.py +224 -0
- aethergraph/core/runtime/wakeup_watcher.py +40 -0
- aethergraph/core/tools/__init__.py +10 -0
- aethergraph/core/tools/builtins/channel_tools.py +194 -0
- aethergraph/core/tools/builtins/toolset.py +134 -0
- aethergraph/core/tools/toolkit.py +510 -0
- aethergraph/core/tools/waitable.py +109 -0
- aethergraph/plugins/channel/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/console.py +106 -0
- aethergraph/plugins/channel/adapters/file.py +102 -0
- aethergraph/plugins/channel/adapters/slack.py +285 -0
- aethergraph/plugins/channel/adapters/telegram.py +302 -0
- aethergraph/plugins/channel/adapters/webhook.py +104 -0
- aethergraph/plugins/channel/adapters/webui.py +134 -0
- aethergraph/plugins/channel/routes/__init__.py +0 -0
- aethergraph/plugins/channel/routes/console_routes.py +86 -0
- aethergraph/plugins/channel/routes/slack_routes.py +49 -0
- aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
- aethergraph/plugins/channel/routes/webui_routes.py +136 -0
- aethergraph/plugins/channel/utils/__init__.py +0 -0
- aethergraph/plugins/channel/utils/slack_utils.py +278 -0
- aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
- aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
- aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
- aethergraph/plugins/mcp/fs_server.py +128 -0
- aethergraph/plugins/mcp/http_server.py +101 -0
- aethergraph/plugins/mcp/ws_server.py +180 -0
- aethergraph/plugins/net/http.py +10 -0
- aethergraph/plugins/utils/data_io.py +359 -0
- aethergraph/runner/__init__.py +5 -0
- aethergraph/runtime/__init__.py +62 -0
- aethergraph/server/__init__.py +3 -0
- aethergraph/server/app_factory.py +84 -0
- aethergraph/server/start.py +122 -0
- aethergraph/services/__init__.py +10 -0
- aethergraph/services/artifacts/facade.py +284 -0
- aethergraph/services/artifacts/factory.py +35 -0
- aethergraph/services/artifacts/fs_store.py +656 -0
- aethergraph/services/artifacts/jsonl_index.py +123 -0
- aethergraph/services/artifacts/paths.py +23 -0
- aethergraph/services/artifacts/sqlite_index.py +209 -0
- aethergraph/services/artifacts/utils.py +124 -0
- aethergraph/services/auth/dev.py +16 -0
- aethergraph/services/channel/channel_bus.py +293 -0
- aethergraph/services/channel/factory.py +44 -0
- aethergraph/services/channel/session.py +511 -0
- aethergraph/services/channel/wait_helpers.py +57 -0
- aethergraph/services/clock/clock.py +9 -0
- aethergraph/services/container/default_container.py +320 -0
- aethergraph/services/continuations/continuation.py +56 -0
- aethergraph/services/continuations/factory.py +34 -0
- aethergraph/services/continuations/stores/fs_store.py +264 -0
- aethergraph/services/continuations/stores/inmem_store.py +95 -0
- aethergraph/services/eventbus/inmem.py +21 -0
- aethergraph/services/features/static.py +10 -0
- aethergraph/services/kv/ephemeral.py +90 -0
- aethergraph/services/kv/factory.py +27 -0
- aethergraph/services/kv/layered.py +41 -0
- aethergraph/services/kv/sqlite_kv.py +128 -0
- aethergraph/services/llm/factory.py +157 -0
- aethergraph/services/llm/generic_client.py +542 -0
- aethergraph/services/llm/providers.py +3 -0
- aethergraph/services/llm/service.py +105 -0
- aethergraph/services/logger/base.py +36 -0
- aethergraph/services/logger/compat.py +50 -0
- aethergraph/services/logger/formatters.py +106 -0
- aethergraph/services/logger/std.py +203 -0
- aethergraph/services/mcp/helpers.py +23 -0
- aethergraph/services/mcp/http_client.py +70 -0
- aethergraph/services/mcp/mcp_tools.py +21 -0
- aethergraph/services/mcp/registry.py +14 -0
- aethergraph/services/mcp/service.py +100 -0
- aethergraph/services/mcp/stdio_client.py +70 -0
- aethergraph/services/mcp/ws_client.py +115 -0
- aethergraph/services/memory/bound.py +106 -0
- aethergraph/services/memory/distillers/episode.py +116 -0
- aethergraph/services/memory/distillers/rolling.py +74 -0
- aethergraph/services/memory/facade.py +633 -0
- aethergraph/services/memory/factory.py +78 -0
- aethergraph/services/memory/hotlog_kv.py +27 -0
- aethergraph/services/memory/indices.py +74 -0
- aethergraph/services/memory/io_helpers.py +72 -0
- aethergraph/services/memory/persist_fs.py +40 -0
- aethergraph/services/memory/resolver.py +152 -0
- aethergraph/services/metering/noop.py +4 -0
- aethergraph/services/prompts/file_store.py +41 -0
- aethergraph/services/rag/chunker.py +29 -0
- aethergraph/services/rag/facade.py +593 -0
- aethergraph/services/rag/index/base.py +27 -0
- aethergraph/services/rag/index/faiss_index.py +121 -0
- aethergraph/services/rag/index/sqlite_index.py +134 -0
- aethergraph/services/rag/index_factory.py +52 -0
- aethergraph/services/rag/parsers/md.py +7 -0
- aethergraph/services/rag/parsers/pdf.py +14 -0
- aethergraph/services/rag/parsers/txt.py +7 -0
- aethergraph/services/rag/utils/hybrid.py +39 -0
- aethergraph/services/rag/utils/make_fs_key.py +62 -0
- aethergraph/services/redactor/simple.py +16 -0
- aethergraph/services/registry/key_parsing.py +44 -0
- aethergraph/services/registry/registry_key.py +19 -0
- aethergraph/services/registry/unified_registry.py +185 -0
- aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
- aethergraph/services/resume/router.py +73 -0
- aethergraph/services/schedulers/registry.py +41 -0
- aethergraph/services/secrets/base.py +7 -0
- aethergraph/services/secrets/env.py +8 -0
- aethergraph/services/state_stores/externalize.py +135 -0
- aethergraph/services/state_stores/graph_observer.py +131 -0
- aethergraph/services/state_stores/json_store.py +67 -0
- aethergraph/services/state_stores/resume_policy.py +119 -0
- aethergraph/services/state_stores/serialize.py +249 -0
- aethergraph/services/state_stores/utils.py +91 -0
- aethergraph/services/state_stores/validate.py +78 -0
- aethergraph/services/tracing/noop.py +18 -0
- aethergraph/services/waits/wait_registry.py +91 -0
- aethergraph/services/wakeup/memory_queue.py +57 -0
- aethergraph/services/wakeup/scanner_producer.py +56 -0
- aethergraph/services/wakeup/worker.py +31 -0
- aethergraph/tools/__init__.py +25 -0
- aethergraph/utils/optdeps.py +8 -0
- aethergraph-0.1.0a1.dist-info/METADATA +410 -0
- aethergraph-0.1.0a1.dist-info/RECORD +182 -0
- aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
- aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
- aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
- aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
- aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from aethergraph.contracts.services.state_stores import GraphSnapshot, GraphStateStore, StateEvent
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JsonGraphStateStore(GraphStateStore):
|
|
11
|
+
def __init__(self, root: str):
|
|
12
|
+
self.root = root
|
|
13
|
+
os.makedirs(root, exist_ok=True)
|
|
14
|
+
self._alock = asyncio.Lock()
|
|
15
|
+
self._tlock = threading.RLock()
|
|
16
|
+
|
|
17
|
+
def _run_dir(self, run_id: str) -> str:
|
|
18
|
+
d = os.path.join(self.root, run_id)
|
|
19
|
+
os.makedirs(d, exist_ok=True)
|
|
20
|
+
return d
|
|
21
|
+
|
|
22
|
+
async def save_snapshot(self, snap: GraphSnapshot) -> None:
|
|
23
|
+
d = self._run_dir(snap.run_id)
|
|
24
|
+
ts = int(time.time())
|
|
25
|
+
fn = f"snapshot_{snap.rev:08d}_{ts}.json"
|
|
26
|
+
tmp = os.path.join(d, fn + ".tmp")
|
|
27
|
+
dst = os.path.join(d, fn)
|
|
28
|
+
with self._tlock: # <— thread-safe region
|
|
29
|
+
with open(tmp, "w", encoding="utf-8") as f:
|
|
30
|
+
json.dump(snap.__dict__, f, ensure_ascii=False)
|
|
31
|
+
f.flush()
|
|
32
|
+
os.fsync(f.fileno())
|
|
33
|
+
os.replace(tmp, dst)
|
|
34
|
+
|
|
35
|
+
async def load_latest_snapshot(self, run_id: str) -> GraphSnapshot | None:
|
|
36
|
+
d = self._run_dir(run_id)
|
|
37
|
+
with self._tlock:
|
|
38
|
+
files = [x for x in os.listdir(d) if x.startswith("snapshot_")]
|
|
39
|
+
if not files:
|
|
40
|
+
return None
|
|
41
|
+
files.sort()
|
|
42
|
+
with open(os.path.join(d, files[-1]), encoding="utf-8") as f:
|
|
43
|
+
return GraphSnapshot(**json.load(f))
|
|
44
|
+
|
|
45
|
+
async def append_event(self, ev: StateEvent) -> None:
|
|
46
|
+
p = os.path.join(self._run_dir(ev.run_id), "events.jsonl")
|
|
47
|
+
line = json.dumps(ev.__dict__, ensure_ascii=False) + "\n"
|
|
48
|
+
with self._tlock, open(p, "a", encoding="utf-8") as f:
|
|
49
|
+
f.write(line)
|
|
50
|
+
f.flush()
|
|
51
|
+
os.fsync(f.fileno())
|
|
52
|
+
|
|
53
|
+
async def load_events_since(self, run_id: str, from_rev: int) -> list[StateEvent]:
|
|
54
|
+
p = os.path.join(self._run_dir(run_id), "events.jsonl")
|
|
55
|
+
if not os.path.exists(p):
|
|
56
|
+
return []
|
|
57
|
+
out = []
|
|
58
|
+
with open(p, encoding="utf-8") as f:
|
|
59
|
+
for line in f:
|
|
60
|
+
rec = json.loads(line)
|
|
61
|
+
if rec["rev"] > from_rev:
|
|
62
|
+
out.append(StateEvent(**rec))
|
|
63
|
+
return out
|
|
64
|
+
|
|
65
|
+
async def list_run_ids(self, graph_id: str | None = None) -> list[str]:
|
|
66
|
+
# best-effort: return all directories; filter by graph_id by reading latest snapshot if needed
|
|
67
|
+
return [d for d in os.listdir(self.root) if os.path.isdir(os.path.join(self.root, d))]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# aethergraph/core/persist/resume_policy.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from aethergraph.contracts.errors.errors import ResumeIncompatibleSnapshot
|
|
7
|
+
|
|
8
|
+
_JSON_PRIMITIVES = (str, int, float, bool, type(None))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _is_json_like(x: Any) -> bool:
|
|
12
|
+
if isinstance(x, _JSON_PRIMITIVES):
|
|
13
|
+
return True
|
|
14
|
+
if isinstance(x, list):
|
|
15
|
+
return all(_is_json_like(v) for v in x)
|
|
16
|
+
if isinstance(x, dict):
|
|
17
|
+
# Treat any dict that has __aether_ref__ as NOT allowed in strict JSON-only policy
|
|
18
|
+
if "__aether_ref__" in x:
|
|
19
|
+
return False
|
|
20
|
+
return all(isinstance(k, str) and _is_json_like(v) for k, v in x.items())
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _walk_non_json(obj, path="$"):
|
|
25
|
+
from collections.abc import Mapping, Sequence
|
|
26
|
+
|
|
27
|
+
if isinstance(obj, str | int | float | bool) or obj is None:
|
|
28
|
+
return
|
|
29
|
+
if isinstance(obj, Mapping):
|
|
30
|
+
if "__aether_ref__" in obj:
|
|
31
|
+
yield path
|
|
32
|
+
return
|
|
33
|
+
for k, v in obj.items():
|
|
34
|
+
yield from _walk_non_json(v, f"{path}.{k}")
|
|
35
|
+
return
|
|
36
|
+
if isinstance(obj, Sequence) and not isinstance(obj, str | bytes | bytearray):
|
|
37
|
+
for i, v in enumerate(obj):
|
|
38
|
+
yield from _walk_non_json(v, f"{path}[{i}]")
|
|
39
|
+
return
|
|
40
|
+
yield path # non-JSON leaf
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def assert_snapshot_json_only(
|
|
44
|
+
run_id: str,
|
|
45
|
+
snap_json: dict,
|
|
46
|
+
*,
|
|
47
|
+
mode: str = "reuse_only", # "strict" | "reuse_only"
|
|
48
|
+
ignore_nodes: set[str] | None = None, # node_ids to skip (e.g., graph output producers)
|
|
49
|
+
) -> None:
|
|
50
|
+
"""
|
|
51
|
+
- mode="strict": scan ALL nodes; forbid any non-JSON/ref anywhere.
|
|
52
|
+
- mode="reuse_only": ONLY check nodes whose outputs reuse (status DONE/SKIPPED).
|
|
53
|
+
- ignore_nodes: always skip these node_ids (e.g., final/sink nodes that feed graph outputs).
|
|
54
|
+
"""
|
|
55
|
+
ignore_nodes = ignore_nodes or set()
|
|
56
|
+
reasons: list[str] = []
|
|
57
|
+
|
|
58
|
+
state = snap_json.get("state") or snap_json
|
|
59
|
+
nodes = state.get("nodes", {})
|
|
60
|
+
|
|
61
|
+
def _should_check(nid: str, ns: dict) -> bool:
|
|
62
|
+
if nid in ignore_nodes:
|
|
63
|
+
return False
|
|
64
|
+
if mode == "strict":
|
|
65
|
+
return True
|
|
66
|
+
st = (ns.get("status") or "").upper()
|
|
67
|
+
return st in {"DONE", "SKIPPED"}
|
|
68
|
+
|
|
69
|
+
for nid, ns in nodes.items():
|
|
70
|
+
if not _should_check(nid, ns):
|
|
71
|
+
continue
|
|
72
|
+
outs = ns.get("outputs")
|
|
73
|
+
if not outs:
|
|
74
|
+
continue
|
|
75
|
+
bad_paths = list(_walk_non_json(outs))
|
|
76
|
+
if bad_paths:
|
|
77
|
+
reasons.append(
|
|
78
|
+
f"node '{nid}' outputs contain non-JSON or refs at: "
|
|
79
|
+
+ ", ".join(bad_paths[:8])
|
|
80
|
+
+ (" ..." if len(bad_paths) > 8 else "")
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if reasons:
|
|
84
|
+
raise ResumeIncompatibleSnapshot(run_id, reasons)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def output_node_ids_from_graph(graph) -> set[str]:
|
|
88
|
+
"""
|
|
89
|
+
Collect node_ids that directly feed the declared graph outputs.
|
|
90
|
+
Expected binding shape:
|
|
91
|
+
{'sum': {'_type': 'ref', 'from': 'combine_3', 'key': 'sum'}}
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
bindings = graph.io_signature().get("outputs", {}).get("bindings", {}) or {}
|
|
95
|
+
except Exception:
|
|
96
|
+
bindings = {}
|
|
97
|
+
|
|
98
|
+
out_nodes: set[str] = set()
|
|
99
|
+
|
|
100
|
+
def _collect(ref):
|
|
101
|
+
# Handle canonical case
|
|
102
|
+
if isinstance(ref, dict) and ref.get("_type") == "ref":
|
|
103
|
+
src = ref.get("from")
|
|
104
|
+
if isinstance(src, str) and src:
|
|
105
|
+
out_nodes.add(src)
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
# Be forgiving if future formats appear (list of refs, nested, etc.)
|
|
109
|
+
if isinstance(ref, dict):
|
|
110
|
+
for v in ref.values():
|
|
111
|
+
_collect(v)
|
|
112
|
+
elif isinstance(ref, list | tuple):
|
|
113
|
+
for v in ref:
|
|
114
|
+
_collect(v)
|
|
115
|
+
|
|
116
|
+
for v in bindings.values():
|
|
117
|
+
_collect(v)
|
|
118
|
+
|
|
119
|
+
return out_nodes
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping, Sequence
|
|
4
|
+
from dataclasses import asdict
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from .externalize import externalize_to_artifact
|
|
8
|
+
|
|
9
|
+
_JSON_SCALARS = (str, int, float, bool, type(None))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def is_json_pure(obj: Any) -> bool:
|
|
13
|
+
if isinstance(obj, _JSON_SCALARS):
|
|
14
|
+
return True
|
|
15
|
+
if isinstance(obj, Mapping):
|
|
16
|
+
# disallow references in "pure" mode
|
|
17
|
+
if "__aether_ref__" in obj or obj.get("__externalized__") is True:
|
|
18
|
+
return False
|
|
19
|
+
for k, v in obj.items():
|
|
20
|
+
if not isinstance(k, str):
|
|
21
|
+
return False
|
|
22
|
+
if not is_json_pure(v):
|
|
23
|
+
return False
|
|
24
|
+
return True
|
|
25
|
+
if isinstance(obj, Sequence) and not isinstance(obj, str | bytes | bytearray):
|
|
26
|
+
return all(is_json_pure(v) for v in obj)
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def jsonish_or_ref(
|
|
31
|
+
obj: Any,
|
|
32
|
+
*,
|
|
33
|
+
mk_ref: callable[[Any], dict[str, Any]] | None = None,
|
|
34
|
+
) -> tuple[Any, dict[str, Any] | None]:
|
|
35
|
+
"""
|
|
36
|
+
Returns (jsonish, ref) where:
|
|
37
|
+
- if obj is JSON-pure → (obj, None)
|
|
38
|
+
- else → ({"__aether_ref__": <...>}, ref_meta)
|
|
39
|
+
mk_ref(obj) must return a dict with at least {"__aether_ref__": "<uri-or-id>"}.
|
|
40
|
+
"""
|
|
41
|
+
if is_json_pure(obj):
|
|
42
|
+
return obj, None
|
|
43
|
+
|
|
44
|
+
if mk_ref is None:
|
|
45
|
+
# Default: opaque marker (will block resume; artifacts still saved by caller if desired)
|
|
46
|
+
return {"__aether_ref__": "opaque:nonjson"}, {"__aether_ref__": "opaque:nonjson"}
|
|
47
|
+
|
|
48
|
+
ref = mk_ref(obj) or {"__aether_ref__": "opaque:nonjson"}
|
|
49
|
+
return {
|
|
50
|
+
"__aether_ref__": ref.get("__aether_ref__", "opaque:nonjson"),
|
|
51
|
+
**{k: v for k, v in ref.items() if k != "__aether_ref__"},
|
|
52
|
+
}, ref
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def map_jsonish_or_ref(
|
|
56
|
+
payload: Any,
|
|
57
|
+
*,
|
|
58
|
+
mk_ref: callable[[Any], dict[str, Any]] | None = None,
|
|
59
|
+
) -> tuple[Any, bool]:
|
|
60
|
+
"""
|
|
61
|
+
Walk nested structures. Returns (jsonish_payload, had_refs).
|
|
62
|
+
Any non-JSON leaf becomes a {"__aether_ref__": ...} marker via mk_ref.
|
|
63
|
+
"""
|
|
64
|
+
if is_json_pure(payload):
|
|
65
|
+
return payload, False
|
|
66
|
+
|
|
67
|
+
# dict
|
|
68
|
+
if isinstance(payload, Mapping):
|
|
69
|
+
out = {}
|
|
70
|
+
had_ref = False
|
|
71
|
+
for k, v in payload.items():
|
|
72
|
+
if not isinstance(k, str):
|
|
73
|
+
# stringify non-string keys to keep snapshot JSON-safe
|
|
74
|
+
k = str(k)
|
|
75
|
+
vv, r = map_jsonish_or_ref(v, mk_ref=mk_ref)
|
|
76
|
+
out[k] = vv
|
|
77
|
+
had_ref = had_ref or r
|
|
78
|
+
return out, had_ref
|
|
79
|
+
|
|
80
|
+
# list/tuple
|
|
81
|
+
if isinstance(payload, Sequence) and not isinstance(payload, str | bytes | bytearray):
|
|
82
|
+
out = []
|
|
83
|
+
had_ref = False
|
|
84
|
+
for v in payload:
|
|
85
|
+
vv, r = map_jsonish_or_ref(v, mk_ref=mk_ref)
|
|
86
|
+
out.append(vv)
|
|
87
|
+
had_ref = had_ref or r
|
|
88
|
+
return out, had_ref
|
|
89
|
+
|
|
90
|
+
# leaf non-JSON → ref
|
|
91
|
+
jsonish, _ref_meta = jsonish_or_ref(payload, mk_ref=mk_ref)
|
|
92
|
+
return jsonish, True
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _externalize_leaf_to_artifact(
|
|
96
|
+
obj: Any,
|
|
97
|
+
*,
|
|
98
|
+
run_id: str,
|
|
99
|
+
graph_id: str,
|
|
100
|
+
node_id: str,
|
|
101
|
+
tool_name: str | None,
|
|
102
|
+
tool_version: str | None,
|
|
103
|
+
artifacts,
|
|
104
|
+
):
|
|
105
|
+
ref = await externalize_to_artifact(
|
|
106
|
+
obj,
|
|
107
|
+
run_id=run_id,
|
|
108
|
+
graph_id=graph_id,
|
|
109
|
+
node_id=node_id,
|
|
110
|
+
tool_name=tool_name,
|
|
111
|
+
tool_version=tool_version,
|
|
112
|
+
artifacts=artifacts,
|
|
113
|
+
)
|
|
114
|
+
return ref
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
async def _jsonish_outputs_with_refs(
|
|
118
|
+
*,
|
|
119
|
+
outputs: dict[str, Any] | None,
|
|
120
|
+
run_id: str,
|
|
121
|
+
graph_id: str,
|
|
122
|
+
node_id: str,
|
|
123
|
+
tool_name: str | None,
|
|
124
|
+
tool_version: str | None,
|
|
125
|
+
artifacts, # AsyncArtifactStore or None
|
|
126
|
+
allow_externalize: bool, # toggle
|
|
127
|
+
) -> dict[str, Any] | None:
|
|
128
|
+
if outputs is None:
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def mk_ref(obj):
|
|
132
|
+
# If we can't (or shouldn't) externalize, mark opaque
|
|
133
|
+
if not allow_externalize or artifacts is None:
|
|
134
|
+
return {"__aether_ref__": "opaque:nonjson"}
|
|
135
|
+
# We'll externalize synchronously at call site; placeholder here—replaced below
|
|
136
|
+
return {"__aether_ref__": "pending:externalize"}
|
|
137
|
+
|
|
138
|
+
# First pass: mark structure with refs
|
|
139
|
+
jsonish, had_refs = map_jsonish_or_ref(outputs, mk_ref=mk_ref)
|
|
140
|
+
|
|
141
|
+
if not had_refs or not allow_externalize or artifacts is None:
|
|
142
|
+
# Nothing to externalize (or disabled) → return as-is
|
|
143
|
+
return jsonish
|
|
144
|
+
|
|
145
|
+
# Second pass: walk and replace "pending:externalize" leaves with real URIs
|
|
146
|
+
async def _resolve_refs(x):
|
|
147
|
+
if isinstance(x, dict):
|
|
148
|
+
if x.get("__aether_ref__") == "pending:externalize" and len(x) == 1:
|
|
149
|
+
# We need original object to externalize; this simple walker can’t see it anymore.
|
|
150
|
+
# Approach: re-walk original outputs in parallel to find leaves that caused refs.
|
|
151
|
+
# For simplicity & performance, do a single-pass direct externalization below instead.
|
|
152
|
+
return x
|
|
153
|
+
return {k: await _resolve_refs(v) for k, v in x.items()}
|
|
154
|
+
if isinstance(x, list):
|
|
155
|
+
return [await _resolve_refs(v) for v in x]
|
|
156
|
+
return x
|
|
157
|
+
|
|
158
|
+
# Simpler approach: do a second real pass that externalizes by diffing original leaves.
|
|
159
|
+
# Implement a focused externalizer that walks original outputs again.
|
|
160
|
+
async def _externalize_in_place(orig):
|
|
161
|
+
# returns jsonish w/ real refs
|
|
162
|
+
from collections.abc import Mapping, Sequence
|
|
163
|
+
|
|
164
|
+
if isinstance(orig, str | int | float | bool | type(None)):
|
|
165
|
+
return orig
|
|
166
|
+
if isinstance(orig, Mapping):
|
|
167
|
+
out = {}
|
|
168
|
+
for k, v in orig.items():
|
|
169
|
+
out[str(k)] = await _externalize_in_place(v)
|
|
170
|
+
return out
|
|
171
|
+
if isinstance(orig, Sequence) and not isinstance(orig, str | bytes | bytearray):
|
|
172
|
+
return [await _externalize_in_place(v) for v in orig]
|
|
173
|
+
|
|
174
|
+
# leaf non-JSON → actual artifact ref
|
|
175
|
+
ref = await _externalize_leaf_to_artifact(
|
|
176
|
+
orig,
|
|
177
|
+
run_id=run_id,
|
|
178
|
+
graph_id=graph_id,
|
|
179
|
+
node_id=node_id,
|
|
180
|
+
tool_name=tool_name,
|
|
181
|
+
tool_version=tool_version,
|
|
182
|
+
artifacts=artifacts,
|
|
183
|
+
)
|
|
184
|
+
return {
|
|
185
|
+
"__aether_ref__": ref["__aether_ref__"],
|
|
186
|
+
**{k: v for k, v in ref.items() if k != "__aether_ref__"},
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return await _externalize_in_place(outputs)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
async def state_to_json_safe(
|
|
193
|
+
state_obj,
|
|
194
|
+
*,
|
|
195
|
+
run_id: str,
|
|
196
|
+
graph_id: str,
|
|
197
|
+
artifacts=None,
|
|
198
|
+
allow_externalize: bool = False, # Do not externalize by default until fixing artifacts writer
|
|
199
|
+
include_wait_spec: bool = True,
|
|
200
|
+
) -> dict[str, Any]:
|
|
201
|
+
"""
|
|
202
|
+
Convert TaskGraphState to a JSON-safe dict.
|
|
203
|
+
- JSON outputs inlined
|
|
204
|
+
- non-JSON leaves become {"__aether_ref__": "..."} and (optionally) are externalized to artifacts
|
|
205
|
+
"""
|
|
206
|
+
nodes_block = {}
|
|
207
|
+
for nid, ns in state_obj.nodes.items():
|
|
208
|
+
status = getattr(ns, "status", None)
|
|
209
|
+
status_name = getattr(status, "name", status) # Enum.name or string
|
|
210
|
+
tool_name = getattr(ns, "tool_name", None) or getattr(
|
|
211
|
+
getattr(ns, "spec", None), "tool_name", None
|
|
212
|
+
)
|
|
213
|
+
tool_version = getattr(ns, "tool_version", None) or getattr(
|
|
214
|
+
getattr(ns, "spec", None), "tool_version", None
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
outputs_json = await _jsonish_outputs_with_refs(
|
|
218
|
+
outputs=getattr(ns, "outputs", None),
|
|
219
|
+
run_id=run_id,
|
|
220
|
+
graph_id=graph_id,
|
|
221
|
+
node_id=nid,
|
|
222
|
+
tool_name=tool_name,
|
|
223
|
+
tool_version=tool_version,
|
|
224
|
+
artifacts=artifacts,
|
|
225
|
+
allow_externalize=allow_externalize,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
entry = {
|
|
229
|
+
"status": status_name,
|
|
230
|
+
"outputs": outputs_json,
|
|
231
|
+
"error": getattr(ns, "error", None),
|
|
232
|
+
"attempts": getattr(ns, "attempts", 0),
|
|
233
|
+
"next_wakeup_at": getattr(ns, "next_wakeup_at", None),
|
|
234
|
+
"wait_token": getattr(ns, "wait_token", None),
|
|
235
|
+
}
|
|
236
|
+
if include_wait_spec:
|
|
237
|
+
ws = getattr(ns, "wait_spec", None)
|
|
238
|
+
if ws:
|
|
239
|
+
# ensure JSON-safe wait_spec (it should be strings/lists/dicts already)
|
|
240
|
+
entry["wait_spec"] = ws
|
|
241
|
+
nodes_block[nid] = entry
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
"run_id": getattr(state_obj, "run_id", run_id),
|
|
245
|
+
"rev": getattr(state_obj, "rev", None),
|
|
246
|
+
"patches": [asdict(p) for p in getattr(state_obj, "patches", [])],
|
|
247
|
+
"_bound_inputs": getattr(state_obj, "_bound_inputs", None),
|
|
248
|
+
"nodes": nodes_block,
|
|
249
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict, is_dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from aethergraph.contracts.services.state_stores import GraphSnapshot
|
|
9
|
+
|
|
10
|
+
from .serialize import state_to_json_safe
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def snapshot_from_graph(
|
|
14
|
+
run_id: str,
|
|
15
|
+
graph_id: str,
|
|
16
|
+
rev: int,
|
|
17
|
+
spec_hash: str,
|
|
18
|
+
state_obj,
|
|
19
|
+
*,
|
|
20
|
+
artifacts=None, # AsyncArtifactStore or None
|
|
21
|
+
allow_externalize: bool = False,
|
|
22
|
+
include_wait_spec: bool = True,
|
|
23
|
+
):
|
|
24
|
+
json_state = await state_to_json_safe(
|
|
25
|
+
state_obj,
|
|
26
|
+
run_id=run_id,
|
|
27
|
+
graph_id=graph_id,
|
|
28
|
+
artifacts=artifacts,
|
|
29
|
+
allow_externalize=allow_externalize,
|
|
30
|
+
include_wait_spec=include_wait_spec,
|
|
31
|
+
)
|
|
32
|
+
return GraphSnapshot(
|
|
33
|
+
run_id=run_id,
|
|
34
|
+
graph_id=graph_id,
|
|
35
|
+
rev=rev,
|
|
36
|
+
created_at=datetime.utcnow().timestamp(),
|
|
37
|
+
spec_hash=spec_hash,
|
|
38
|
+
state=json_state,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _status_to_str(s) -> str:
|
|
43
|
+
if s is None:
|
|
44
|
+
return "PENDING"
|
|
45
|
+
if isinstance(s, Enum):
|
|
46
|
+
return s.name
|
|
47
|
+
# already a string or something printable
|
|
48
|
+
return str(s)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _enum_name_or_str(x):
|
|
52
|
+
try:
|
|
53
|
+
return x.name # Enum
|
|
54
|
+
except AttributeError:
|
|
55
|
+
return str(x)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _state_to_json(state_obj) -> dict[str, Any]:
|
|
59
|
+
return {
|
|
60
|
+
"run_id": getattr(state_obj, "run_id", None),
|
|
61
|
+
"rev": getattr(state_obj, "rev", None),
|
|
62
|
+
"patches": [asdict(p) if is_dataclass(p) else p for p in getattr(state_obj, "patches", [])],
|
|
63
|
+
"_bound_inputs": getattr(state_obj, "_bound_inputs", None),
|
|
64
|
+
"nodes": {
|
|
65
|
+
nid: {
|
|
66
|
+
"status": _enum_name_or_str(getattr(ns, "status", "PENDING")),
|
|
67
|
+
"outputs": getattr(ns, "outputs", None),
|
|
68
|
+
"error": getattr(ns, "error", None),
|
|
69
|
+
"attempts": getattr(ns, "attempts", 0),
|
|
70
|
+
"next_wakeup_at": getattr(ns, "next_wakeup_at", None),
|
|
71
|
+
"wait_token": getattr(ns, "wait_token", None),
|
|
72
|
+
# NEW: safe subset of wait_spec (avoid inline payload & tokens)
|
|
73
|
+
"wait_spec": _sanitize_wait_spec(getattr(ns, "wait_spec", None)),
|
|
74
|
+
}
|
|
75
|
+
for nid, ns in getattr(state_obj, "nodes", {}).items()
|
|
76
|
+
},
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _sanitize_wait_spec(ws):
|
|
81
|
+
if not ws:
|
|
82
|
+
return None
|
|
83
|
+
# Drop volatile/sensitive fields if present
|
|
84
|
+
return {
|
|
85
|
+
"kind": ws.get("kind"),
|
|
86
|
+
"channel": ws.get("channel"),
|
|
87
|
+
"prompt": ws.get("prompt"),
|
|
88
|
+
"options": ws.get("options"),
|
|
89
|
+
"meta": ws.get("meta") or {},
|
|
90
|
+
# DO NOT store: token / inline_payload / resume_schema with secrets
|
|
91
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping, Sequence
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ResumptionNotSupported(Exception):
|
|
8
|
+
"""Raised when a snapshot contains non-JSON-pure outputs and resume is disabled."""
|
|
9
|
+
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_JSON_SCALARS = (str, int, float, bool, type(None))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _is_json_pure(obj: Any) -> bool:
|
|
17
|
+
"""
|
|
18
|
+
Strict check: only JSON scalars, lists/tuples, and dicts with string keys.
|
|
19
|
+
No custom objects, no bytes, no externalization markers.
|
|
20
|
+
"""
|
|
21
|
+
if isinstance(obj, _JSON_SCALARS):
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
if isinstance(obj, Mapping):
|
|
25
|
+
# Disallow future externalization markers proactively
|
|
26
|
+
if "__aether_ref__" in obj or obj.get("__externalized__") is True:
|
|
27
|
+
return False
|
|
28
|
+
# JSON requires string keys
|
|
29
|
+
for k, v in obj.items():
|
|
30
|
+
if not isinstance(k, str):
|
|
31
|
+
return False
|
|
32
|
+
if not _is_json_pure(v):
|
|
33
|
+
return False
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
if isinstance(obj, Sequence) and not isinstance(obj, (str | bytes | bytearray)):
|
|
37
|
+
return all(_is_json_pure(v) for v in obj)
|
|
38
|
+
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def assert_snapshot_json_pure(
|
|
43
|
+
snapshot_state: dict, *, run_id: str, graph_id: str, allow_non_json: bool = False
|
|
44
|
+
) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Validate the *serialized* (dict) snapshot state produced by snapshot_from_graph(...).
|
|
47
|
+
If any node outputs are not strictly JSON-pure, raise ResumptionNotSupported
|
|
48
|
+
(unless allow_non_json=True).
|
|
49
|
+
"""
|
|
50
|
+
if allow_non_json:
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
if not isinstance(snapshot_state, dict):
|
|
54
|
+
raise ResumptionNotSupported(
|
|
55
|
+
f"Resume blocked: snapshot state is not a dict (run_id={run_id}, graph_id={graph_id})."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
nodes = snapshot_state.get("nodes", {})
|
|
59
|
+
bad_nodes = []
|
|
60
|
+
|
|
61
|
+
for nid, ns in nodes.items():
|
|
62
|
+
if not isinstance(ns, dict):
|
|
63
|
+
bad_nodes.append(nid)
|
|
64
|
+
continue
|
|
65
|
+
outs = ns.get("outputs", None)
|
|
66
|
+
if outs is None:
|
|
67
|
+
continue # output not produced yet is fine
|
|
68
|
+
if not _is_json_pure(outs):
|
|
69
|
+
bad_nodes.append(nid)
|
|
70
|
+
|
|
71
|
+
if bad_nodes:
|
|
72
|
+
listed = ", ".join(bad_nodes)
|
|
73
|
+
raise ResumptionNotSupported(
|
|
74
|
+
"Resume blocked: snapshot contains non-JSON outputs; "
|
|
75
|
+
f"nodes=({listed}) run_id={run_id} graph_id={graph_id}. "
|
|
76
|
+
"Ensure tools return JSON-friendly outputs (dict/list/str/float/int/bool/null), "
|
|
77
|
+
"or disable strict resume checks once tested."
|
|
78
|
+
)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# services/tracing/noop.py
|
|
2
|
+
import contextlib
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
logging.basicConfig(level=logging.INFO)
|
|
7
|
+
logger = logging.getLogger("aethergraph.tracing.noop")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NoopTracer:
|
|
11
|
+
@contextlib.contextmanager
|
|
12
|
+
def span(self, name: str, **attrs):
|
|
13
|
+
t = time.time()
|
|
14
|
+
yield
|
|
15
|
+
dt = (time.time() - t) * 1000 # milliseconds
|
|
16
|
+
# optionally log duration
|
|
17
|
+
logger.info(f"Span '{name}' took {dt} ms", **attrs)
|
|
18
|
+
return
|