aethergraph 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. aethergraph/__init__.py +49 -0
  2. aethergraph/config/__init__.py +0 -0
  3. aethergraph/config/config.py +121 -0
  4. aethergraph/config/context.py +16 -0
  5. aethergraph/config/llm.py +26 -0
  6. aethergraph/config/loader.py +60 -0
  7. aethergraph/config/runtime.py +9 -0
  8. aethergraph/contracts/errors/errors.py +44 -0
  9. aethergraph/contracts/services/artifacts.py +142 -0
  10. aethergraph/contracts/services/channel.py +72 -0
  11. aethergraph/contracts/services/continuations.py +23 -0
  12. aethergraph/contracts/services/eventbus.py +12 -0
  13. aethergraph/contracts/services/kv.py +24 -0
  14. aethergraph/contracts/services/llm.py +17 -0
  15. aethergraph/contracts/services/mcp.py +22 -0
  16. aethergraph/contracts/services/memory.py +108 -0
  17. aethergraph/contracts/services/resume.py +28 -0
  18. aethergraph/contracts/services/state_stores.py +33 -0
  19. aethergraph/contracts/services/wakeup.py +28 -0
  20. aethergraph/core/execution/base_scheduler.py +77 -0
  21. aethergraph/core/execution/forward_scheduler.py +777 -0
  22. aethergraph/core/execution/global_scheduler.py +634 -0
  23. aethergraph/core/execution/retry_policy.py +22 -0
  24. aethergraph/core/execution/step_forward.py +411 -0
  25. aethergraph/core/execution/step_result.py +18 -0
  26. aethergraph/core/execution/wait_types.py +72 -0
  27. aethergraph/core/graph/graph_builder.py +192 -0
  28. aethergraph/core/graph/graph_fn.py +219 -0
  29. aethergraph/core/graph/graph_io.py +67 -0
  30. aethergraph/core/graph/graph_refs.py +154 -0
  31. aethergraph/core/graph/graph_spec.py +115 -0
  32. aethergraph/core/graph/graph_state.py +59 -0
  33. aethergraph/core/graph/graphify.py +128 -0
  34. aethergraph/core/graph/interpreter.py +145 -0
  35. aethergraph/core/graph/node_handle.py +33 -0
  36. aethergraph/core/graph/node_spec.py +46 -0
  37. aethergraph/core/graph/node_state.py +63 -0
  38. aethergraph/core/graph/task_graph.py +747 -0
  39. aethergraph/core/graph/task_node.py +82 -0
  40. aethergraph/core/graph/utils.py +37 -0
  41. aethergraph/core/graph/visualize.py +239 -0
  42. aethergraph/core/runtime/ad_hoc_context.py +61 -0
  43. aethergraph/core/runtime/base_service.py +153 -0
  44. aethergraph/core/runtime/bind_adapter.py +42 -0
  45. aethergraph/core/runtime/bound_memory.py +69 -0
  46. aethergraph/core/runtime/execution_context.py +220 -0
  47. aethergraph/core/runtime/graph_runner.py +349 -0
  48. aethergraph/core/runtime/lifecycle.py +26 -0
  49. aethergraph/core/runtime/node_context.py +203 -0
  50. aethergraph/core/runtime/node_services.py +30 -0
  51. aethergraph/core/runtime/recovery.py +159 -0
  52. aethergraph/core/runtime/run_registration.py +33 -0
  53. aethergraph/core/runtime/runtime_env.py +157 -0
  54. aethergraph/core/runtime/runtime_registry.py +32 -0
  55. aethergraph/core/runtime/runtime_services.py +224 -0
  56. aethergraph/core/runtime/wakeup_watcher.py +40 -0
  57. aethergraph/core/tools/__init__.py +10 -0
  58. aethergraph/core/tools/builtins/channel_tools.py +194 -0
  59. aethergraph/core/tools/builtins/toolset.py +134 -0
  60. aethergraph/core/tools/toolkit.py +510 -0
  61. aethergraph/core/tools/waitable.py +109 -0
  62. aethergraph/plugins/channel/__init__.py +0 -0
  63. aethergraph/plugins/channel/adapters/__init__.py +0 -0
  64. aethergraph/plugins/channel/adapters/console.py +106 -0
  65. aethergraph/plugins/channel/adapters/file.py +102 -0
  66. aethergraph/plugins/channel/adapters/slack.py +285 -0
  67. aethergraph/plugins/channel/adapters/telegram.py +302 -0
  68. aethergraph/plugins/channel/adapters/webhook.py +104 -0
  69. aethergraph/plugins/channel/adapters/webui.py +134 -0
  70. aethergraph/plugins/channel/routes/__init__.py +0 -0
  71. aethergraph/plugins/channel/routes/console_routes.py +86 -0
  72. aethergraph/plugins/channel/routes/slack_routes.py +49 -0
  73. aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
  74. aethergraph/plugins/channel/routes/webui_routes.py +136 -0
  75. aethergraph/plugins/channel/utils/__init__.py +0 -0
  76. aethergraph/plugins/channel/utils/slack_utils.py +278 -0
  77. aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
  78. aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
  79. aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
  80. aethergraph/plugins/mcp/fs_server.py +128 -0
  81. aethergraph/plugins/mcp/http_server.py +101 -0
  82. aethergraph/plugins/mcp/ws_server.py +180 -0
  83. aethergraph/plugins/net/http.py +10 -0
  84. aethergraph/plugins/utils/data_io.py +359 -0
  85. aethergraph/runner/__init__.py +5 -0
  86. aethergraph/runtime/__init__.py +62 -0
  87. aethergraph/server/__init__.py +3 -0
  88. aethergraph/server/app_factory.py +84 -0
  89. aethergraph/server/start.py +122 -0
  90. aethergraph/services/__init__.py +10 -0
  91. aethergraph/services/artifacts/facade.py +284 -0
  92. aethergraph/services/artifacts/factory.py +35 -0
  93. aethergraph/services/artifacts/fs_store.py +656 -0
  94. aethergraph/services/artifacts/jsonl_index.py +123 -0
  95. aethergraph/services/artifacts/paths.py +23 -0
  96. aethergraph/services/artifacts/sqlite_index.py +209 -0
  97. aethergraph/services/artifacts/utils.py +124 -0
  98. aethergraph/services/auth/dev.py +16 -0
  99. aethergraph/services/channel/channel_bus.py +293 -0
  100. aethergraph/services/channel/factory.py +44 -0
  101. aethergraph/services/channel/session.py +511 -0
  102. aethergraph/services/channel/wait_helpers.py +57 -0
  103. aethergraph/services/clock/clock.py +9 -0
  104. aethergraph/services/container/default_container.py +320 -0
  105. aethergraph/services/continuations/continuation.py +56 -0
  106. aethergraph/services/continuations/factory.py +34 -0
  107. aethergraph/services/continuations/stores/fs_store.py +264 -0
  108. aethergraph/services/continuations/stores/inmem_store.py +95 -0
  109. aethergraph/services/eventbus/inmem.py +21 -0
  110. aethergraph/services/features/static.py +10 -0
  111. aethergraph/services/kv/ephemeral.py +90 -0
  112. aethergraph/services/kv/factory.py +27 -0
  113. aethergraph/services/kv/layered.py +41 -0
  114. aethergraph/services/kv/sqlite_kv.py +128 -0
  115. aethergraph/services/llm/factory.py +157 -0
  116. aethergraph/services/llm/generic_client.py +542 -0
  117. aethergraph/services/llm/providers.py +3 -0
  118. aethergraph/services/llm/service.py +105 -0
  119. aethergraph/services/logger/base.py +36 -0
  120. aethergraph/services/logger/compat.py +50 -0
  121. aethergraph/services/logger/formatters.py +106 -0
  122. aethergraph/services/logger/std.py +203 -0
  123. aethergraph/services/mcp/helpers.py +23 -0
  124. aethergraph/services/mcp/http_client.py +70 -0
  125. aethergraph/services/mcp/mcp_tools.py +21 -0
  126. aethergraph/services/mcp/registry.py +14 -0
  127. aethergraph/services/mcp/service.py +100 -0
  128. aethergraph/services/mcp/stdio_client.py +70 -0
  129. aethergraph/services/mcp/ws_client.py +115 -0
  130. aethergraph/services/memory/bound.py +106 -0
  131. aethergraph/services/memory/distillers/episode.py +116 -0
  132. aethergraph/services/memory/distillers/rolling.py +74 -0
  133. aethergraph/services/memory/facade.py +633 -0
  134. aethergraph/services/memory/factory.py +78 -0
  135. aethergraph/services/memory/hotlog_kv.py +27 -0
  136. aethergraph/services/memory/indices.py +74 -0
  137. aethergraph/services/memory/io_helpers.py +72 -0
  138. aethergraph/services/memory/persist_fs.py +40 -0
  139. aethergraph/services/memory/resolver.py +152 -0
  140. aethergraph/services/metering/noop.py +4 -0
  141. aethergraph/services/prompts/file_store.py +41 -0
  142. aethergraph/services/rag/chunker.py +29 -0
  143. aethergraph/services/rag/facade.py +593 -0
  144. aethergraph/services/rag/index/base.py +27 -0
  145. aethergraph/services/rag/index/faiss_index.py +121 -0
  146. aethergraph/services/rag/index/sqlite_index.py +134 -0
  147. aethergraph/services/rag/index_factory.py +52 -0
  148. aethergraph/services/rag/parsers/md.py +7 -0
  149. aethergraph/services/rag/parsers/pdf.py +14 -0
  150. aethergraph/services/rag/parsers/txt.py +7 -0
  151. aethergraph/services/rag/utils/hybrid.py +39 -0
  152. aethergraph/services/rag/utils/make_fs_key.py +62 -0
  153. aethergraph/services/redactor/simple.py +16 -0
  154. aethergraph/services/registry/key_parsing.py +44 -0
  155. aethergraph/services/registry/registry_key.py +19 -0
  156. aethergraph/services/registry/unified_registry.py +185 -0
  157. aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
  158. aethergraph/services/resume/router.py +73 -0
  159. aethergraph/services/schedulers/registry.py +41 -0
  160. aethergraph/services/secrets/base.py +7 -0
  161. aethergraph/services/secrets/env.py +8 -0
  162. aethergraph/services/state_stores/externalize.py +135 -0
  163. aethergraph/services/state_stores/graph_observer.py +131 -0
  164. aethergraph/services/state_stores/json_store.py +67 -0
  165. aethergraph/services/state_stores/resume_policy.py +119 -0
  166. aethergraph/services/state_stores/serialize.py +249 -0
  167. aethergraph/services/state_stores/utils.py +91 -0
  168. aethergraph/services/state_stores/validate.py +78 -0
  169. aethergraph/services/tracing/noop.py +18 -0
  170. aethergraph/services/waits/wait_registry.py +91 -0
  171. aethergraph/services/wakeup/memory_queue.py +57 -0
  172. aethergraph/services/wakeup/scanner_producer.py +56 -0
  173. aethergraph/services/wakeup/worker.py +31 -0
  174. aethergraph/tools/__init__.py +25 -0
  175. aethergraph/utils/optdeps.py +8 -0
  176. aethergraph-0.1.0a1.dist-info/METADATA +410 -0
  177. aethergraph-0.1.0a1.dist-info/RECORD +182 -0
  178. aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
  179. aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
  180. aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
  181. aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
  182. aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,67 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import threading
5
+ import time
6
+
7
+ from aethergraph.contracts.services.state_stores import GraphSnapshot, GraphStateStore, StateEvent
8
+
9
+
10
+ class JsonGraphStateStore(GraphStateStore):
11
+ def __init__(self, root: str):
12
+ self.root = root
13
+ os.makedirs(root, exist_ok=True)
14
+ self._alock = asyncio.Lock()
15
+ self._tlock = threading.RLock()
16
+
17
+ def _run_dir(self, run_id: str) -> str:
18
+ d = os.path.join(self.root, run_id)
19
+ os.makedirs(d, exist_ok=True)
20
+ return d
21
+
22
+ async def save_snapshot(self, snap: GraphSnapshot) -> None:
23
+ d = self._run_dir(snap.run_id)
24
+ ts = int(time.time())
25
+ fn = f"snapshot_{snap.rev:08d}_{ts}.json"
26
+ tmp = os.path.join(d, fn + ".tmp")
27
+ dst = os.path.join(d, fn)
28
+ with self._tlock: # <— thread-safe region
29
+ with open(tmp, "w", encoding="utf-8") as f:
30
+ json.dump(snap.__dict__, f, ensure_ascii=False)
31
+ f.flush()
32
+ os.fsync(f.fileno())
33
+ os.replace(tmp, dst)
34
+
35
+ async def load_latest_snapshot(self, run_id: str) -> GraphSnapshot | None:
36
+ d = self._run_dir(run_id)
37
+ with self._tlock:
38
+ files = [x for x in os.listdir(d) if x.startswith("snapshot_")]
39
+ if not files:
40
+ return None
41
+ files.sort()
42
+ with open(os.path.join(d, files[-1]), encoding="utf-8") as f:
43
+ return GraphSnapshot(**json.load(f))
44
+
45
+ async def append_event(self, ev: StateEvent) -> None:
46
+ p = os.path.join(self._run_dir(ev.run_id), "events.jsonl")
47
+ line = json.dumps(ev.__dict__, ensure_ascii=False) + "\n"
48
+ with self._tlock, open(p, "a", encoding="utf-8") as f:
49
+ f.write(line)
50
+ f.flush()
51
+ os.fsync(f.fileno())
52
+
53
+ async def load_events_since(self, run_id: str, from_rev: int) -> list[StateEvent]:
54
+ p = os.path.join(self._run_dir(run_id), "events.jsonl")
55
+ if not os.path.exists(p):
56
+ return []
57
+ out = []
58
+ with open(p, encoding="utf-8") as f:
59
+ for line in f:
60
+ rec = json.loads(line)
61
+ if rec["rev"] > from_rev:
62
+ out.append(StateEvent(**rec))
63
+ return out
64
+
65
+ async def list_run_ids(self, graph_id: str | None = None) -> list[str]:
66
+ # best-effort: return all directories; filter by graph_id by reading latest snapshot if needed
67
+ return [d for d in os.listdir(self.root) if os.path.isdir(os.path.join(self.root, d))]
@@ -0,0 +1,119 @@
1
+ # aethergraph/core/persist/resume_policy.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ from aethergraph.contracts.errors.errors import ResumeIncompatibleSnapshot
7
+
8
+ _JSON_PRIMITIVES = (str, int, float, bool, type(None))
9
+
10
+
11
+ def _is_json_like(x: Any) -> bool:
12
+ if isinstance(x, _JSON_PRIMITIVES):
13
+ return True
14
+ if isinstance(x, list):
15
+ return all(_is_json_like(v) for v in x)
16
+ if isinstance(x, dict):
17
+ # Treat any dict that has __aether_ref__ as NOT allowed in strict JSON-only policy
18
+ if "__aether_ref__" in x:
19
+ return False
20
+ return all(isinstance(k, str) and _is_json_like(v) for k, v in x.items())
21
+ return False
22
+
23
+
24
+ def _walk_non_json(obj, path="$"):
25
+ from collections.abc import Mapping, Sequence
26
+
27
+ if isinstance(obj, str | int | float | bool) or obj is None:
28
+ return
29
+ if isinstance(obj, Mapping):
30
+ if "__aether_ref__" in obj:
31
+ yield path
32
+ return
33
+ for k, v in obj.items():
34
+ yield from _walk_non_json(v, f"{path}.{k}")
35
+ return
36
+ if isinstance(obj, Sequence) and not isinstance(obj, str | bytes | bytearray):
37
+ for i, v in enumerate(obj):
38
+ yield from _walk_non_json(v, f"{path}[{i}]")
39
+ return
40
+ yield path # non-JSON leaf
41
+
42
+
43
+ def assert_snapshot_json_only(
44
+ run_id: str,
45
+ snap_json: dict,
46
+ *,
47
+ mode: str = "reuse_only", # "strict" | "reuse_only"
48
+ ignore_nodes: set[str] | None = None, # node_ids to skip (e.g., graph output producers)
49
+ ) -> None:
50
+ """
51
+ - mode="strict": scan ALL nodes; forbid any non-JSON/ref anywhere.
52
+ - mode="reuse_only": ONLY check nodes whose outputs reuse (status DONE/SKIPPED).
53
+ - ignore_nodes: always skip these node_ids (e.g., final/sink nodes that feed graph outputs).
54
+ """
55
+ ignore_nodes = ignore_nodes or set()
56
+ reasons: list[str] = []
57
+
58
+ state = snap_json.get("state") or snap_json
59
+ nodes = state.get("nodes", {})
60
+
61
+ def _should_check(nid: str, ns: dict) -> bool:
62
+ if nid in ignore_nodes:
63
+ return False
64
+ if mode == "strict":
65
+ return True
66
+ st = (ns.get("status") or "").upper()
67
+ return st in {"DONE", "SKIPPED"}
68
+
69
+ for nid, ns in nodes.items():
70
+ if not _should_check(nid, ns):
71
+ continue
72
+ outs = ns.get("outputs")
73
+ if not outs:
74
+ continue
75
+ bad_paths = list(_walk_non_json(outs))
76
+ if bad_paths:
77
+ reasons.append(
78
+ f"node '{nid}' outputs contain non-JSON or refs at: "
79
+ + ", ".join(bad_paths[:8])
80
+ + (" ..." if len(bad_paths) > 8 else "")
81
+ )
82
+
83
+ if reasons:
84
+ raise ResumeIncompatibleSnapshot(run_id, reasons)
85
+
86
+
87
+ def output_node_ids_from_graph(graph) -> set[str]:
88
+ """
89
+ Collect node_ids that directly feed the declared graph outputs.
90
+ Expected binding shape:
91
+ {'sum': {'_type': 'ref', 'from': 'combine_3', 'key': 'sum'}}
92
+ """
93
+ try:
94
+ bindings = graph.io_signature().get("outputs", {}).get("bindings", {}) or {}
95
+ except Exception:
96
+ bindings = {}
97
+
98
+ out_nodes: set[str] = set()
99
+
100
+ def _collect(ref):
101
+ # Handle canonical case
102
+ if isinstance(ref, dict) and ref.get("_type") == "ref":
103
+ src = ref.get("from")
104
+ if isinstance(src, str) and src:
105
+ out_nodes.add(src)
106
+ return
107
+
108
+ # Be forgiving if future formats appear (list of refs, nested, etc.)
109
+ if isinstance(ref, dict):
110
+ for v in ref.values():
111
+ _collect(v)
112
+ elif isinstance(ref, list | tuple):
113
+ for v in ref:
114
+ _collect(v)
115
+
116
+ for v in bindings.values():
117
+ _collect(v)
118
+
119
+ return out_nodes
@@ -0,0 +1,249 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
4
+ from dataclasses import asdict
5
+ from typing import Any
6
+
7
+ from .externalize import externalize_to_artifact
8
+
9
+ _JSON_SCALARS = (str, int, float, bool, type(None))
10
+
11
+
12
+ def is_json_pure(obj: Any) -> bool:
13
+ if isinstance(obj, _JSON_SCALARS):
14
+ return True
15
+ if isinstance(obj, Mapping):
16
+ # disallow references in "pure" mode
17
+ if "__aether_ref__" in obj or obj.get("__externalized__") is True:
18
+ return False
19
+ for k, v in obj.items():
20
+ if not isinstance(k, str):
21
+ return False
22
+ if not is_json_pure(v):
23
+ return False
24
+ return True
25
+ if isinstance(obj, Sequence) and not isinstance(obj, str | bytes | bytearray):
26
+ return all(is_json_pure(v) for v in obj)
27
+ return False
28
+
29
+
30
+ def jsonish_or_ref(
31
+ obj: Any,
32
+ *,
33
+ mk_ref: callable[[Any], dict[str, Any]] | None = None,
34
+ ) -> tuple[Any, dict[str, Any] | None]:
35
+ """
36
+ Returns (jsonish, ref) where:
37
+ - if obj is JSON-pure → (obj, None)
38
+ - else → ({"__aether_ref__": <...>}, ref_meta)
39
+ mk_ref(obj) must return a dict with at least {"__aether_ref__": "<uri-or-id>"}.
40
+ """
41
+ if is_json_pure(obj):
42
+ return obj, None
43
+
44
+ if mk_ref is None:
45
+ # Default: opaque marker (will block resume; artifacts still saved by caller if desired)
46
+ return {"__aether_ref__": "opaque:nonjson"}, {"__aether_ref__": "opaque:nonjson"}
47
+
48
+ ref = mk_ref(obj) or {"__aether_ref__": "opaque:nonjson"}
49
+ return {
50
+ "__aether_ref__": ref.get("__aether_ref__", "opaque:nonjson"),
51
+ **{k: v for k, v in ref.items() if k != "__aether_ref__"},
52
+ }, ref
53
+
54
+
55
+ def map_jsonish_or_ref(
56
+ payload: Any,
57
+ *,
58
+ mk_ref: callable[[Any], dict[str, Any]] | None = None,
59
+ ) -> tuple[Any, bool]:
60
+ """
61
+ Walk nested structures. Returns (jsonish_payload, had_refs).
62
+ Any non-JSON leaf becomes a {"__aether_ref__": ...} marker via mk_ref.
63
+ """
64
+ if is_json_pure(payload):
65
+ return payload, False
66
+
67
+ # dict
68
+ if isinstance(payload, Mapping):
69
+ out = {}
70
+ had_ref = False
71
+ for k, v in payload.items():
72
+ if not isinstance(k, str):
73
+ # stringify non-string keys to keep snapshot JSON-safe
74
+ k = str(k)
75
+ vv, r = map_jsonish_or_ref(v, mk_ref=mk_ref)
76
+ out[k] = vv
77
+ had_ref = had_ref or r
78
+ return out, had_ref
79
+
80
+ # list/tuple
81
+ if isinstance(payload, Sequence) and not isinstance(payload, str | bytes | bytearray):
82
+ out = []
83
+ had_ref = False
84
+ for v in payload:
85
+ vv, r = map_jsonish_or_ref(v, mk_ref=mk_ref)
86
+ out.append(vv)
87
+ had_ref = had_ref or r
88
+ return out, had_ref
89
+
90
+ # leaf non-JSON → ref
91
+ jsonish, _ref_meta = jsonish_or_ref(payload, mk_ref=mk_ref)
92
+ return jsonish, True
93
+
94
+
95
+ async def _externalize_leaf_to_artifact(
96
+ obj: Any,
97
+ *,
98
+ run_id: str,
99
+ graph_id: str,
100
+ node_id: str,
101
+ tool_name: str | None,
102
+ tool_version: str | None,
103
+ artifacts,
104
+ ):
105
+ ref = await externalize_to_artifact(
106
+ obj,
107
+ run_id=run_id,
108
+ graph_id=graph_id,
109
+ node_id=node_id,
110
+ tool_name=tool_name,
111
+ tool_version=tool_version,
112
+ artifacts=artifacts,
113
+ )
114
+ return ref
115
+
116
+
117
+ async def _jsonish_outputs_with_refs(
118
+ *,
119
+ outputs: dict[str, Any] | None,
120
+ run_id: str,
121
+ graph_id: str,
122
+ node_id: str,
123
+ tool_name: str | None,
124
+ tool_version: str | None,
125
+ artifacts, # AsyncArtifactStore or None
126
+ allow_externalize: bool, # toggle
127
+ ) -> dict[str, Any] | None:
128
+ if outputs is None:
129
+ return None
130
+
131
+ def mk_ref(obj):
132
+ # If we can't (or shouldn't) externalize, mark opaque
133
+ if not allow_externalize or artifacts is None:
134
+ return {"__aether_ref__": "opaque:nonjson"}
135
+ # We'll externalize synchronously at call site; placeholder here—replaced below
136
+ return {"__aether_ref__": "pending:externalize"}
137
+
138
+ # First pass: mark structure with refs
139
+ jsonish, had_refs = map_jsonish_or_ref(outputs, mk_ref=mk_ref)
140
+
141
+ if not had_refs or not allow_externalize or artifacts is None:
142
+ # Nothing to externalize (or disabled) → return as-is
143
+ return jsonish
144
+
145
+ # Second pass: walk and replace "pending:externalize" leaves with real URIs
146
+ async def _resolve_refs(x):
147
+ if isinstance(x, dict):
148
+ if x.get("__aether_ref__") == "pending:externalize" and len(x) == 1:
149
+ # We need original object to externalize; this simple walker can’t see it anymore.
150
+ # Approach: re-walk original outputs in parallel to find leaves that caused refs.
151
+ # For simplicity & performance, do a single-pass direct externalization below instead.
152
+ return x
153
+ return {k: await _resolve_refs(v) for k, v in x.items()}
154
+ if isinstance(x, list):
155
+ return [await _resolve_refs(v) for v in x]
156
+ return x
157
+
158
+ # Simpler approach: do a second real pass that externalizes by diffing original leaves.
159
+ # Implement a focused externalizer that walks original outputs again.
160
+ async def _externalize_in_place(orig):
161
+ # returns jsonish w/ real refs
162
+ from collections.abc import Mapping, Sequence
163
+
164
+ if isinstance(orig, str | int | float | bool | type(None)):
165
+ return orig
166
+ if isinstance(orig, Mapping):
167
+ out = {}
168
+ for k, v in orig.items():
169
+ out[str(k)] = await _externalize_in_place(v)
170
+ return out
171
+ if isinstance(orig, Sequence) and not isinstance(orig, str | bytes | bytearray):
172
+ return [await _externalize_in_place(v) for v in orig]
173
+
174
+ # leaf non-JSON → actual artifact ref
175
+ ref = await _externalize_leaf_to_artifact(
176
+ orig,
177
+ run_id=run_id,
178
+ graph_id=graph_id,
179
+ node_id=node_id,
180
+ tool_name=tool_name,
181
+ tool_version=tool_version,
182
+ artifacts=artifacts,
183
+ )
184
+ return {
185
+ "__aether_ref__": ref["__aether_ref__"],
186
+ **{k: v for k, v in ref.items() if k != "__aether_ref__"},
187
+ }
188
+
189
+ return await _externalize_in_place(outputs)
190
+
191
+
192
+ async def state_to_json_safe(
193
+ state_obj,
194
+ *,
195
+ run_id: str,
196
+ graph_id: str,
197
+ artifacts=None,
198
+ allow_externalize: bool = False, # Do not externalize by default until fixing artifacts writer
199
+ include_wait_spec: bool = True,
200
+ ) -> dict[str, Any]:
201
+ """
202
+ Convert TaskGraphState to a JSON-safe dict.
203
+ - JSON outputs inlined
204
+ - non-JSON leaves become {"__aether_ref__": "..."} and (optionally) are externalized to artifacts
205
+ """
206
+ nodes_block = {}
207
+ for nid, ns in state_obj.nodes.items():
208
+ status = getattr(ns, "status", None)
209
+ status_name = getattr(status, "name", status) # Enum.name or string
210
+ tool_name = getattr(ns, "tool_name", None) or getattr(
211
+ getattr(ns, "spec", None), "tool_name", None
212
+ )
213
+ tool_version = getattr(ns, "tool_version", None) or getattr(
214
+ getattr(ns, "spec", None), "tool_version", None
215
+ )
216
+
217
+ outputs_json = await _jsonish_outputs_with_refs(
218
+ outputs=getattr(ns, "outputs", None),
219
+ run_id=run_id,
220
+ graph_id=graph_id,
221
+ node_id=nid,
222
+ tool_name=tool_name,
223
+ tool_version=tool_version,
224
+ artifacts=artifacts,
225
+ allow_externalize=allow_externalize,
226
+ )
227
+
228
+ entry = {
229
+ "status": status_name,
230
+ "outputs": outputs_json,
231
+ "error": getattr(ns, "error", None),
232
+ "attempts": getattr(ns, "attempts", 0),
233
+ "next_wakeup_at": getattr(ns, "next_wakeup_at", None),
234
+ "wait_token": getattr(ns, "wait_token", None),
235
+ }
236
+ if include_wait_spec:
237
+ ws = getattr(ns, "wait_spec", None)
238
+ if ws:
239
+ # ensure JSON-safe wait_spec (it should be strings/lists/dicts already)
240
+ entry["wait_spec"] = ws
241
+ nodes_block[nid] = entry
242
+
243
+ return {
244
+ "run_id": getattr(state_obj, "run_id", run_id),
245
+ "rev": getattr(state_obj, "rev", None),
246
+ "patches": [asdict(p) for p in getattr(state_obj, "patches", [])],
247
+ "_bound_inputs": getattr(state_obj, "_bound_inputs", None),
248
+ "nodes": nodes_block,
249
+ }
@@ -0,0 +1,91 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, is_dataclass
4
+ from datetime import datetime
5
+ from enum import Enum
6
+ from typing import Any
7
+
8
+ from aethergraph.contracts.services.state_stores import GraphSnapshot
9
+
10
+ from .serialize import state_to_json_safe
11
+
12
+
13
+ async def snapshot_from_graph(
14
+ run_id: str,
15
+ graph_id: str,
16
+ rev: int,
17
+ spec_hash: str,
18
+ state_obj,
19
+ *,
20
+ artifacts=None, # AsyncArtifactStore or None
21
+ allow_externalize: bool = False,
22
+ include_wait_spec: bool = True,
23
+ ):
24
+ json_state = await state_to_json_safe(
25
+ state_obj,
26
+ run_id=run_id,
27
+ graph_id=graph_id,
28
+ artifacts=artifacts,
29
+ allow_externalize=allow_externalize,
30
+ include_wait_spec=include_wait_spec,
31
+ )
32
+ return GraphSnapshot(
33
+ run_id=run_id,
34
+ graph_id=graph_id,
35
+ rev=rev,
36
+ created_at=datetime.utcnow().timestamp(),
37
+ spec_hash=spec_hash,
38
+ state=json_state,
39
+ )
40
+
41
+
42
+ def _status_to_str(s) -> str:
43
+ if s is None:
44
+ return "PENDING"
45
+ if isinstance(s, Enum):
46
+ return s.name
47
+ # already a string or something printable
48
+ return str(s)
49
+
50
+
51
+ def _enum_name_or_str(x):
52
+ try:
53
+ return x.name # Enum
54
+ except AttributeError:
55
+ return str(x)
56
+
57
+
58
+ def _state_to_json(state_obj) -> dict[str, Any]:
59
+ return {
60
+ "run_id": getattr(state_obj, "run_id", None),
61
+ "rev": getattr(state_obj, "rev", None),
62
+ "patches": [asdict(p) if is_dataclass(p) else p for p in getattr(state_obj, "patches", [])],
63
+ "_bound_inputs": getattr(state_obj, "_bound_inputs", None),
64
+ "nodes": {
65
+ nid: {
66
+ "status": _enum_name_or_str(getattr(ns, "status", "PENDING")),
67
+ "outputs": getattr(ns, "outputs", None),
68
+ "error": getattr(ns, "error", None),
69
+ "attempts": getattr(ns, "attempts", 0),
70
+ "next_wakeup_at": getattr(ns, "next_wakeup_at", None),
71
+ "wait_token": getattr(ns, "wait_token", None),
72
+ # NEW: safe subset of wait_spec (avoid inline payload & tokens)
73
+ "wait_spec": _sanitize_wait_spec(getattr(ns, "wait_spec", None)),
74
+ }
75
+ for nid, ns in getattr(state_obj, "nodes", {}).items()
76
+ },
77
+ }
78
+
79
+
80
+ def _sanitize_wait_spec(ws):
81
+ if not ws:
82
+ return None
83
+ # Drop volatile/sensitive fields if present
84
+ return {
85
+ "kind": ws.get("kind"),
86
+ "channel": ws.get("channel"),
87
+ "prompt": ws.get("prompt"),
88
+ "options": ws.get("options"),
89
+ "meta": ws.get("meta") or {},
90
+ # DO NOT store: token / inline_payload / resume_schema with secrets
91
+ }
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
4
+ from typing import Any
5
+
6
+
7
+ class ResumptionNotSupported(Exception):
8
+ """Raised when a snapshot contains non-JSON-pure outputs and resume is disabled."""
9
+
10
+ pass
11
+
12
+
13
+ _JSON_SCALARS = (str, int, float, bool, type(None))
14
+
15
+
16
+ def _is_json_pure(obj: Any) -> bool:
17
+ """
18
+ Strict check: only JSON scalars, lists/tuples, and dicts with string keys.
19
+ No custom objects, no bytes, no externalization markers.
20
+ """
21
+ if isinstance(obj, _JSON_SCALARS):
22
+ return True
23
+
24
+ if isinstance(obj, Mapping):
25
+ # Disallow future externalization markers proactively
26
+ if "__aether_ref__" in obj or obj.get("__externalized__") is True:
27
+ return False
28
+ # JSON requires string keys
29
+ for k, v in obj.items():
30
+ if not isinstance(k, str):
31
+ return False
32
+ if not _is_json_pure(v):
33
+ return False
34
+ return True
35
+
36
+ if isinstance(obj, Sequence) and not isinstance(obj, (str | bytes | bytearray)):
37
+ return all(_is_json_pure(v) for v in obj)
38
+
39
+ return False
40
+
41
+
42
+ def assert_snapshot_json_pure(
43
+ snapshot_state: dict, *, run_id: str, graph_id: str, allow_non_json: bool = False
44
+ ) -> None:
45
+ """
46
+ Validate the *serialized* (dict) snapshot state produced by snapshot_from_graph(...).
47
+ If any node outputs are not strictly JSON-pure, raise ResumptionNotSupported
48
+ (unless allow_non_json=True).
49
+ """
50
+ if allow_non_json:
51
+ return
52
+
53
+ if not isinstance(snapshot_state, dict):
54
+ raise ResumptionNotSupported(
55
+ f"Resume blocked: snapshot state is not a dict (run_id={run_id}, graph_id={graph_id})."
56
+ )
57
+
58
+ nodes = snapshot_state.get("nodes", {})
59
+ bad_nodes = []
60
+
61
+ for nid, ns in nodes.items():
62
+ if not isinstance(ns, dict):
63
+ bad_nodes.append(nid)
64
+ continue
65
+ outs = ns.get("outputs", None)
66
+ if outs is None:
67
+ continue # output not produced yet is fine
68
+ if not _is_json_pure(outs):
69
+ bad_nodes.append(nid)
70
+
71
+ if bad_nodes:
72
+ listed = ", ".join(bad_nodes)
73
+ raise ResumptionNotSupported(
74
+ "Resume blocked: snapshot contains non-JSON outputs; "
75
+ f"nodes=({listed}) run_id={run_id} graph_id={graph_id}. "
76
+ "Ensure tools return JSON-friendly outputs (dict/list/str/float/int/bool/null), "
77
+ "or disable strict resume checks once tested."
78
+ )
@@ -0,0 +1,18 @@
1
+ # services/tracing/noop.py
2
+ import contextlib
3
+ import logging
4
+ import time
5
+
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger("aethergraph.tracing.noop")
8
+
9
+
10
+ class NoopTracer:
11
+ @contextlib.contextmanager
12
+ def span(self, name: str, **attrs):
13
+ t = time.time()
14
+ yield
15
+ dt = (time.time() - t) * 1000 # milliseconds
16
+ # optionally log duration
17
+ logger.info(f"Span '{name}' took {dt} ms", **attrs)
18
+ return