aethergraph 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. aethergraph/__init__.py +49 -0
  2. aethergraph/config/__init__.py +0 -0
  3. aethergraph/config/config.py +121 -0
  4. aethergraph/config/context.py +16 -0
  5. aethergraph/config/llm.py +26 -0
  6. aethergraph/config/loader.py +60 -0
  7. aethergraph/config/runtime.py +9 -0
  8. aethergraph/contracts/errors/errors.py +44 -0
  9. aethergraph/contracts/services/artifacts.py +142 -0
  10. aethergraph/contracts/services/channel.py +72 -0
  11. aethergraph/contracts/services/continuations.py +23 -0
  12. aethergraph/contracts/services/eventbus.py +12 -0
  13. aethergraph/contracts/services/kv.py +24 -0
  14. aethergraph/contracts/services/llm.py +17 -0
  15. aethergraph/contracts/services/mcp.py +22 -0
  16. aethergraph/contracts/services/memory.py +108 -0
  17. aethergraph/contracts/services/resume.py +28 -0
  18. aethergraph/contracts/services/state_stores.py +33 -0
  19. aethergraph/contracts/services/wakeup.py +28 -0
  20. aethergraph/core/execution/base_scheduler.py +77 -0
  21. aethergraph/core/execution/forward_scheduler.py +777 -0
  22. aethergraph/core/execution/global_scheduler.py +634 -0
  23. aethergraph/core/execution/retry_policy.py +22 -0
  24. aethergraph/core/execution/step_forward.py +411 -0
  25. aethergraph/core/execution/step_result.py +18 -0
  26. aethergraph/core/execution/wait_types.py +72 -0
  27. aethergraph/core/graph/graph_builder.py +192 -0
  28. aethergraph/core/graph/graph_fn.py +219 -0
  29. aethergraph/core/graph/graph_io.py +67 -0
  30. aethergraph/core/graph/graph_refs.py +154 -0
  31. aethergraph/core/graph/graph_spec.py +115 -0
  32. aethergraph/core/graph/graph_state.py +59 -0
  33. aethergraph/core/graph/graphify.py +128 -0
  34. aethergraph/core/graph/interpreter.py +145 -0
  35. aethergraph/core/graph/node_handle.py +33 -0
  36. aethergraph/core/graph/node_spec.py +46 -0
  37. aethergraph/core/graph/node_state.py +63 -0
  38. aethergraph/core/graph/task_graph.py +747 -0
  39. aethergraph/core/graph/task_node.py +82 -0
  40. aethergraph/core/graph/utils.py +37 -0
  41. aethergraph/core/graph/visualize.py +239 -0
  42. aethergraph/core/runtime/ad_hoc_context.py +61 -0
  43. aethergraph/core/runtime/base_service.py +153 -0
  44. aethergraph/core/runtime/bind_adapter.py +42 -0
  45. aethergraph/core/runtime/bound_memory.py +69 -0
  46. aethergraph/core/runtime/execution_context.py +220 -0
  47. aethergraph/core/runtime/graph_runner.py +349 -0
  48. aethergraph/core/runtime/lifecycle.py +26 -0
  49. aethergraph/core/runtime/node_context.py +203 -0
  50. aethergraph/core/runtime/node_services.py +30 -0
  51. aethergraph/core/runtime/recovery.py +159 -0
  52. aethergraph/core/runtime/run_registration.py +33 -0
  53. aethergraph/core/runtime/runtime_env.py +157 -0
  54. aethergraph/core/runtime/runtime_registry.py +32 -0
  55. aethergraph/core/runtime/runtime_services.py +224 -0
  56. aethergraph/core/runtime/wakeup_watcher.py +40 -0
  57. aethergraph/core/tools/__init__.py +10 -0
  58. aethergraph/core/tools/builtins/channel_tools.py +194 -0
  59. aethergraph/core/tools/builtins/toolset.py +134 -0
  60. aethergraph/core/tools/toolkit.py +510 -0
  61. aethergraph/core/tools/waitable.py +109 -0
  62. aethergraph/plugins/channel/__init__.py +0 -0
  63. aethergraph/plugins/channel/adapters/__init__.py +0 -0
  64. aethergraph/plugins/channel/adapters/console.py +106 -0
  65. aethergraph/plugins/channel/adapters/file.py +102 -0
  66. aethergraph/plugins/channel/adapters/slack.py +285 -0
  67. aethergraph/plugins/channel/adapters/telegram.py +302 -0
  68. aethergraph/plugins/channel/adapters/webhook.py +104 -0
  69. aethergraph/plugins/channel/adapters/webui.py +134 -0
  70. aethergraph/plugins/channel/routes/__init__.py +0 -0
  71. aethergraph/plugins/channel/routes/console_routes.py +86 -0
  72. aethergraph/plugins/channel/routes/slack_routes.py +49 -0
  73. aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
  74. aethergraph/plugins/channel/routes/webui_routes.py +136 -0
  75. aethergraph/plugins/channel/utils/__init__.py +0 -0
  76. aethergraph/plugins/channel/utils/slack_utils.py +278 -0
  77. aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
  78. aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
  79. aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
  80. aethergraph/plugins/mcp/fs_server.py +128 -0
  81. aethergraph/plugins/mcp/http_server.py +101 -0
  82. aethergraph/plugins/mcp/ws_server.py +180 -0
  83. aethergraph/plugins/net/http.py +10 -0
  84. aethergraph/plugins/utils/data_io.py +359 -0
  85. aethergraph/runner/__init__.py +5 -0
  86. aethergraph/runtime/__init__.py +62 -0
  87. aethergraph/server/__init__.py +3 -0
  88. aethergraph/server/app_factory.py +84 -0
  89. aethergraph/server/start.py +122 -0
  90. aethergraph/services/__init__.py +10 -0
  91. aethergraph/services/artifacts/facade.py +284 -0
  92. aethergraph/services/artifacts/factory.py +35 -0
  93. aethergraph/services/artifacts/fs_store.py +656 -0
  94. aethergraph/services/artifacts/jsonl_index.py +123 -0
  95. aethergraph/services/artifacts/paths.py +23 -0
  96. aethergraph/services/artifacts/sqlite_index.py +209 -0
  97. aethergraph/services/artifacts/utils.py +124 -0
  98. aethergraph/services/auth/dev.py +16 -0
  99. aethergraph/services/channel/channel_bus.py +293 -0
  100. aethergraph/services/channel/factory.py +44 -0
  101. aethergraph/services/channel/session.py +511 -0
  102. aethergraph/services/channel/wait_helpers.py +57 -0
  103. aethergraph/services/clock/clock.py +9 -0
  104. aethergraph/services/container/default_container.py +320 -0
  105. aethergraph/services/continuations/continuation.py +56 -0
  106. aethergraph/services/continuations/factory.py +34 -0
  107. aethergraph/services/continuations/stores/fs_store.py +264 -0
  108. aethergraph/services/continuations/stores/inmem_store.py +95 -0
  109. aethergraph/services/eventbus/inmem.py +21 -0
  110. aethergraph/services/features/static.py +10 -0
  111. aethergraph/services/kv/ephemeral.py +90 -0
  112. aethergraph/services/kv/factory.py +27 -0
  113. aethergraph/services/kv/layered.py +41 -0
  114. aethergraph/services/kv/sqlite_kv.py +128 -0
  115. aethergraph/services/llm/factory.py +157 -0
  116. aethergraph/services/llm/generic_client.py +542 -0
  117. aethergraph/services/llm/providers.py +3 -0
  118. aethergraph/services/llm/service.py +105 -0
  119. aethergraph/services/logger/base.py +36 -0
  120. aethergraph/services/logger/compat.py +50 -0
  121. aethergraph/services/logger/formatters.py +106 -0
  122. aethergraph/services/logger/std.py +203 -0
  123. aethergraph/services/mcp/helpers.py +23 -0
  124. aethergraph/services/mcp/http_client.py +70 -0
  125. aethergraph/services/mcp/mcp_tools.py +21 -0
  126. aethergraph/services/mcp/registry.py +14 -0
  127. aethergraph/services/mcp/service.py +100 -0
  128. aethergraph/services/mcp/stdio_client.py +70 -0
  129. aethergraph/services/mcp/ws_client.py +115 -0
  130. aethergraph/services/memory/bound.py +106 -0
  131. aethergraph/services/memory/distillers/episode.py +116 -0
  132. aethergraph/services/memory/distillers/rolling.py +74 -0
  133. aethergraph/services/memory/facade.py +633 -0
  134. aethergraph/services/memory/factory.py +78 -0
  135. aethergraph/services/memory/hotlog_kv.py +27 -0
  136. aethergraph/services/memory/indices.py +74 -0
  137. aethergraph/services/memory/io_helpers.py +72 -0
  138. aethergraph/services/memory/persist_fs.py +40 -0
  139. aethergraph/services/memory/resolver.py +152 -0
  140. aethergraph/services/metering/noop.py +4 -0
  141. aethergraph/services/prompts/file_store.py +41 -0
  142. aethergraph/services/rag/chunker.py +29 -0
  143. aethergraph/services/rag/facade.py +593 -0
  144. aethergraph/services/rag/index/base.py +27 -0
  145. aethergraph/services/rag/index/faiss_index.py +121 -0
  146. aethergraph/services/rag/index/sqlite_index.py +134 -0
  147. aethergraph/services/rag/index_factory.py +52 -0
  148. aethergraph/services/rag/parsers/md.py +7 -0
  149. aethergraph/services/rag/parsers/pdf.py +14 -0
  150. aethergraph/services/rag/parsers/txt.py +7 -0
  151. aethergraph/services/rag/utils/hybrid.py +39 -0
  152. aethergraph/services/rag/utils/make_fs_key.py +62 -0
  153. aethergraph/services/redactor/simple.py +16 -0
  154. aethergraph/services/registry/key_parsing.py +44 -0
  155. aethergraph/services/registry/registry_key.py +19 -0
  156. aethergraph/services/registry/unified_registry.py +185 -0
  157. aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
  158. aethergraph/services/resume/router.py +73 -0
  159. aethergraph/services/schedulers/registry.py +41 -0
  160. aethergraph/services/secrets/base.py +7 -0
  161. aethergraph/services/secrets/env.py +8 -0
  162. aethergraph/services/state_stores/externalize.py +135 -0
  163. aethergraph/services/state_stores/graph_observer.py +131 -0
  164. aethergraph/services/state_stores/json_store.py +67 -0
  165. aethergraph/services/state_stores/resume_policy.py +119 -0
  166. aethergraph/services/state_stores/serialize.py +249 -0
  167. aethergraph/services/state_stores/utils.py +91 -0
  168. aethergraph/services/state_stores/validate.py +78 -0
  169. aethergraph/services/tracing/noop.py +18 -0
  170. aethergraph/services/waits/wait_registry.py +91 -0
  171. aethergraph/services/wakeup/memory_queue.py +57 -0
  172. aethergraph/services/wakeup/scanner_producer.py +56 -0
  173. aethergraph/services/wakeup/worker.py +31 -0
  174. aethergraph/tools/__init__.py +25 -0
  175. aethergraph/utils/optdeps.py +8 -0
  176. aethergraph-0.1.0a1.dist-info/METADATA +410 -0
  177. aethergraph-0.1.0a1.dist-info/RECORD +182 -0
  178. aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
  179. aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
  180. aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
  181. aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
  182. aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,633 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import re
8
+ import time
9
+ from typing import Any, Literal
10
+ import unicodedata
11
+
12
+ from aethergraph.contracts.services.llm import LLMClientProtocol
13
+ from aethergraph.contracts.services.memory import Event, HotLog, Indices, Persistence
14
+ from aethergraph.services.artifacts.fs_store import FileArtifactStoreSync
15
+ from aethergraph.services.rag.facade import RAGFacade
16
+
17
+ _SAFE = re.compile(r"[^A-Za-z0-9._-]+")
18
+
19
+
20
+ def now_iso() -> str:
21
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
22
+
23
+
24
+ def stable_event_id(parts: dict[str, Any]) -> str:
25
+ blob = json.dumps(parts, sort_keys=True, ensure_ascii=False).encode("utf-8")
26
+ return hashlib.sha256(blob).hexdigest()[:24]
27
+
28
+
29
+ def _short_hash(s: str, n: int = 8) -> str:
30
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()[:n]
31
+
32
+
33
+ def _slug(s: str) -> str:
34
+ s = unicodedata.normalize("NFKC", str(s)).strip()
35
+ s = s.replace(" ", "-")
36
+ s = _SAFE.sub("-", s)
37
+ return s.strip("-") or "default"
38
+
39
+
40
+ def _load_sticky(path: str) -> dict:
41
+ try:
42
+ with open(path, encoding="utf-8") as f:
43
+ return json.load(f)
44
+ except Exception:
45
+ return {}
46
+
47
+
48
+ def _save_sticky(path: str, m: dict):
49
+ os.makedirs(os.path.dirname(path), exist_ok=True)
50
+ with open(path, "w", encoding="utf-8") as f:
51
+ json.dump(m, f, ensure_ascii=False, indent=2)
52
+
53
+
54
+ class MemoryFacade:
55
+ """
56
+ MemoryFacade = “session memory” front-door bound to a specific runtime scope.
57
+
58
+ What it is:
59
+ -----------
60
+ A small, async façade that coordinates three *core* memory components:
61
+ • HotLog — fast, transient ring-buffer of recent events (in KV/Redis/etc.)
62
+ • Persistence — durable append/replay of events & JSON blobs (e.g., FS JSONL, S3, DB)
63
+ • Indices — small KV-based derived views for fast lookups (latest by name/ref kind/topic)
64
+
65
+ Optionally:
66
+ • artifact_store — for content-addressed, immutable artifacts (large files/dirs, bundles).
67
+ Not required for core memory; used by distillers/tools when you want CAS URIs.
68
+
69
+ Why this split:
70
+ ---------------
71
+ • HotLog: low latency read/write for “what just happened?”, used by routers/LLM context builders.
72
+ • Persistence: durable, append-only event log + JSON blobs (summaries, episodes) for replay/analytics.
73
+ • Indices: derived KV tables to avoid scanning logs for common “last X” queries.
74
+ • Artifacts: big assets (images, datasets, reports) that benefit from CAS, pinning, and reuse.
75
+
76
+ Binding / Scope:
77
+ ----------------
78
+ A MemoryFacade instance is bound to a scope via:
79
+ run_id, graph_id, node_id, agent_id
80
+ Typically constructed by a MemoryFactory at run/node creation, so tools/agents can just call:
81
+ await ctx.services.memory.record_raw(...)
82
+ await ctx.services.memory.write_result(...)
83
+
84
+ Concurrency:
85
+ ------------
86
+ All public methods are async; implementations of HotLog/Persistence/Indices SHOULD be non-blocking
87
+ (use asyncio primitives or run blocking IO via asyncio.to_thread).
88
+
89
+ Configuration knobs:
90
+ --------------------
91
+ • hot_limit: max events kept in HotLog per session (ring buffer).
92
+ • hot_ttl_s: TTL for HotLog entries (e.g., 7 days).
93
+ • default_signal_threshold: heuristic floor for “signal” scoring in rolling summaries, etc.
94
+
95
+ Typical flow:
96
+ -------------
97
+ 1) `record_raw(...)` appends an Event to HotLog (fast) and to Persistence JSONL (durable).
98
+ 2) `write_result(...)` is a typed helper for tool/agent outputs; also updates Indices.
99
+ 3) `recent(...)`, `last_by_name(...)`, `latest_refs_by_kind(...)` read from HotLog/Indices.
100
+ 4) Distillers (rolling / episode) pull from HotLog & Persistence to synthesize summaries,
101
+ then write back via Persistence (JSON) and/or ArtifactStore (CAS) if configured.
102
+
103
+ Extensibility:
104
+ --------------
105
+ • Add more distillers (RAG digests, long-term memory compaction).
106
+ • Add helpers to save content-addressed artifacts (e.g., `save_summary_as_artifact`).
107
+ • Swap backends by providing different implementations of the protocols.
108
+ """
109
+
110
+ def __init__(
111
+ self,
112
+ *,
113
+ run_id: str,
114
+ graph_id: str | None,
115
+ node_id: str | None,
116
+ agent_id: str | None,
117
+ hotlog: HotLog,
118
+ persistence: Persistence,
119
+ indices: Indices,
120
+ artifact_store: FileArtifactStoreSync,
121
+ hot_limit: int = 1000,
122
+ hot_ttl_s: int = 7 * 24 * 3600,
123
+ default_signal_threshold: float = 0.25,
124
+ logger=None,
125
+ rag: RAGFacade | None = None,
126
+ llm: LLMClientProtocol | None = None,
127
+ ):
128
+ self.run_id = run_id
129
+ self.graph_id = graph_id
130
+ self.node_id = node_id
131
+ self.agent_id = agent_id
132
+ self.hotlog = hotlog
133
+ self.persistence = persistence
134
+ self.indices = indices
135
+ self.artifacts = artifact_store
136
+ self.hot_limit = hot_limit
137
+ self.hot_ttl_s = hot_ttl_s
138
+ self.default_signal_threshold = default_signal_threshold
139
+ self.logger = logger
140
+ self.rag = rag
141
+ self.llm = llm # optional LLM service for RAG answering, etc.
142
+
143
+ # ---------- recording ----------
144
+ async def record_raw(
145
+ self,
146
+ *,
147
+ base: dict[str, Any],
148
+ text: str | None = None,
149
+ metrics: dict[str, Any] | None = None,
150
+ sources: list[str] | None = None,
151
+ ) -> Event:
152
+ """
153
+ Append a normalized event to HotLog (fast) and Persistence (durable).
154
+
155
+ - `base` carries identity + classification:
156
+ { kind, stage, severity, tool, tags, entities, inputs, outputs, ... }
157
+ The façade stamps missing scope with (run_id, graph_id, node_id, agent_id).
158
+ - `text` : optional human-readable note/message
159
+ - `metrics`: optional numeric map (latency, token counts, costs, etc.)
160
+ - `sources`: optional list of event_ids this event summarizes/derives from
161
+
162
+ Returns the Event (with stable event_id and computed `signal`).
163
+
164
+ Notes:
165
+ - We compute a lightweight “signal” score if caller didn’t set one.
166
+ - We DO NOT update `indices` here automatically; only `write_result(...)` does that,
167
+ because indices are tuned for typed outputs (Value[]). You can call `indices.update`
168
+ yourself if you need to index from a raw event.
169
+ """
170
+ ts = now_iso()
171
+ base.setdefault("run_id", self.run_id)
172
+ base.setdefault("graph_id", self.graph_id)
173
+ base.setdefault("node_id", self.node_id)
174
+ base.setdefault("agent_id", self.agent_id)
175
+ severity = int(base.get("severity", 2))
176
+ signal = base.get("signal")
177
+ if signal is None:
178
+ signal = self._estimate_signal(text=text, metrics=metrics, severity=severity)
179
+
180
+ eid = stable_event_id(
181
+ {
182
+ "ts": ts,
183
+ "run_id": base["run_id"],
184
+ "graph_id": base.get("graph_id"),
185
+ "node_id": base.get("node_id"),
186
+ "agent_id": base.get("agent_id"),
187
+ "tool": base.get("tool"),
188
+ "kind": base.get("kind"),
189
+ "stage": base.get("stage"),
190
+ "severity": severity,
191
+ "text": (text or "")[:6000],
192
+ "metrics_present": bool(metrics),
193
+ "sources": sources or [],
194
+ }
195
+ )
196
+
197
+ evt = Event(event_id=eid, ts=ts, text=text, metrics=metrics, signal=signal, **base)
198
+ await self.hotlog.append(self.run_id, evt, ttl_s=self.hot_ttl_s, limit=self.hot_limit)
199
+ await self.persistence.append_event(self.run_id, evt)
200
+
201
+ # cheap per-kind index (kv_index_key) – optional to keep:
202
+ # await kv.list_append_unique(f"mem:{self.run_id}:idx:{base.get('kind','misc')}", [{"id": eid}], id_key="id", ttl_s=self.hot_ttl_s)
203
+
204
+ return evt
205
+
206
+ async def record(
207
+ self,
208
+ kind,
209
+ data,
210
+ tags=None,
211
+ entities=None,
212
+ severity=2,
213
+ stage=None,
214
+ inputs_ref=None,
215
+ outputs_ref=None,
216
+ metrics=None,
217
+ sources=None,
218
+ signal=None,
219
+ ) -> Event:
220
+ """
221
+ Convenience wrapper around record_raw() with common fields.
222
+
223
+ Parameters:
224
+ - kind : event kind (e.g., "user_msg", "tool_call", etc.)
225
+ - data : json-serializable text content (will be stringified)
226
+ - tags : optional list of string tags
227
+ - entities : optional list of entity IDs
228
+ - severity : integer severity (1=low ... 5=high)
229
+ - stage : optional stage label (e.g., "observe", "act", etc.)
230
+ - inputs_ref : optional typed input references (e.g., List[Value] dicts)
231
+ - outputs_ref: optional typed output references (e.g., List[Value] dicts)
232
+ - metrics : optional numeric map (latency, token counts, costs, etc.)
233
+ - sources : optional list of event_ids this event summarizes/derives from
234
+ - signal : optional float signal score (0.0–1.0); if None, computed heuristically
235
+
236
+ Returns the Event.
237
+ """
238
+ # if data is not a json-serializable string, log warning and log as json string
239
+ text = None
240
+ if data is not None:
241
+ if isinstance(data, str):
242
+ text = data
243
+ else:
244
+ try:
245
+ text = json.dumps(data, ensure_ascii=False)
246
+ except Exception as e:
247
+ text = f"<unserializable data: {e!s}>"
248
+ if self.logger:
249
+ self.logger.warning(text)
250
+ base = dict(
251
+ kind=kind,
252
+ stage=stage,
253
+ severity=severity,
254
+ tags=tags or [],
255
+ entities=entities or [],
256
+ inputs=inputs_ref,
257
+ outputs=outputs_ref,
258
+ )
259
+ return await self.record_raw(base=base, text=text, metrics=metrics, sources=sources)
260
+
261
+ async def write_result(
262
+ self,
263
+ *,
264
+ topic: str,
265
+ inputs: list[dict[str, Any]] | None = None,
266
+ outputs: list[dict[str, Any]] | None = None,
267
+ tags: list[str] | None = None,
268
+ metrics: dict[str, float] | None = None,
269
+ message: str | None = None,
270
+ severity: int = 3,
271
+ ) -> Event:
272
+ """
273
+ Convenience for recording a “tool/agent/flow result” with typed I/O.
274
+
275
+ Why this exists:
276
+ - Creates a normalized `tool_result` event.
277
+ - Updates `indices` with latest-by-name, latest-ref-by-kind, and last outputs-by-topic.
278
+ - Keeps raw event appends (HotLog + Persistence) consistent.
279
+
280
+ `topic` : tool/agent/flow identifier (used by indices.last_outputs_by_topic)
281
+ `inputs` : List[Value]
282
+ `outputs` : List[Value] <-- indices derive from these
283
+ """
284
+ inputs = inputs or []
285
+ outputs = outputs or []
286
+ evt = await self.record_raw(
287
+ base=dict(
288
+ tool=topic,
289
+ kind="tool_result",
290
+ severity=severity,
291
+ tags=tags or [],
292
+ inputs=inputs,
293
+ outputs=outputs,
294
+ ),
295
+ text=message,
296
+ metrics=metrics,
297
+ )
298
+ await self.indices.update(self.run_id, evt)
299
+ return evt
300
+
301
+ # ---------- retrieval ----------
302
+ async def recent(self, *, kinds: list[str] | None = None, limit: int = 50) -> list[Event]:
303
+ """Return recent events from HotLog (most recent last), optionally filtered by kind."""
304
+ return await self.hotlog.recent(self.run_id, kinds=kinds, limit=limit)
305
+
306
+ async def recent_data(self, *, kinds: list[str], limit: int = 50) -> list[Any]:
307
+ """
308
+ Convenience wrapper around `recent()` that returns decoded `data`
309
+ instead of raw Event objects.
310
+
311
+ Works with the same JSON-in-text convention as `record()`.
312
+ """
313
+ evts = await self.recent(kinds=kinds, limit=limit)
314
+ out = []
315
+ for evt in evts:
316
+ if not evt.text:
317
+ continue
318
+ try:
319
+ out.append(json.loads(evt.text))
320
+ except Exception:
321
+ out.append(evt.text)
322
+ return out
323
+
324
+ async def last_by_name(self, name: str):
325
+ """Return the last output value by `name` from Indices (fast path)."""
326
+ return await self.indices.last_by_name(self.run_id, name)
327
+
328
+ async def latest_refs_by_kind(self, kind: str, *, limit: int = 50):
329
+ """Return latest ref outputs by ref.kind (fast path, KV-backed)."""
330
+ return await self.indices.latest_refs_by_kind(self.run_id, kind, limit=limit)
331
+
332
+ async def last_outputs_by_topic(self, topic: str):
333
+ """Return the last output map for a given topic (tool/flow/agent) from Indices."""
334
+ return await self.indices.last_outputs_by_topic(self.run_id, topic)
335
+
336
+ # alias for easy readability for users
337
+ async def get_last_value(self, name: str):
338
+ """Alias for last_by_name()."""
339
+ return await self.last_by_name(name)
340
+
341
+ async def get_latest_values_by_kind(self, kind: str, *, limit: int = 50):
342
+ """Alias for latest_refs_by_kind()."""
343
+ return await self.latest_refs_by_kind(kind, limit=limit)
344
+
345
+ async def get_last_outputs_for_topic(self, topic: str):
346
+ """Alias for last_outputs_by_topic()."""
347
+ return await self.last_outputs_by_topic(topic)
348
+
349
+ # ---------- distillation (plug strategies) ----------
350
+ async def distill_rolling_chat(
351
+ self,
352
+ *,
353
+ max_turns: int = 20,
354
+ min_signal: float | None = None,
355
+ turn_kinds: list[str] | None = None,
356
+ ) -> dict[str, Any]:
357
+ """
358
+ Build a rolling chat summary from recent user/assistant turns.
359
+ - Reads from HotLog; may emit a JSON summary via Persistence.
360
+ - Uses `default_signal_threshold` unless overridden.
361
+ - Returns a small descriptor (e.g., { "uri": ..., "sources": [...], ... }).
362
+
363
+ For turn_kinds, default to ["user_msg","assistant_msg"] if not provided.
364
+ """
365
+ from aethergraph.services.memory.distillers.rolling import RollingSummarizer
366
+
367
+ d = RollingSummarizer(
368
+ max_turns=max_turns,
369
+ min_signal=min_signal or self.default_signal_threshold,
370
+ turn_kinds=turn_kinds,
371
+ )
372
+ return await d.distill(
373
+ self.run_id, hotlog=self.hotlog, persistence=self.persistence, indices=self.indices
374
+ )
375
+
376
+ async def distill_episode(
377
+ self, *, tool: str, run_id: str, include_metrics: bool = True
378
+ ) -> dict[str, Any]:
379
+ """
380
+ Summarize a tool/agent episode (all events for a given run_id+tool).
381
+ - Reads from HotLog/Persistence, writes back a summary JSON (and optionally CAS bundle).
382
+ - Returns descriptor (e.g., { "uri": ..., "sources": [...], "metrics": {...} }).
383
+ """
384
+ from aethergraph.services.memory.distillers.episode import EpisodeSummarizer
385
+
386
+ d = EpisodeSummarizer(
387
+ include_metrics=include_metrics,
388
+ )
389
+ return await d.distill(
390
+ self.run_id,
391
+ hotlog=self.hotlog,
392
+ persistence=self.persistence,
393
+ indices=self.indices,
394
+ tool=tool,
395
+ run_id=run_id,
396
+ )
397
+
398
+ # ---------- RAG facade ----------
399
+ async def rag_upsert(
400
+ self, *, corpus_id: str, docs: Sequence[dict[str, Any]], topic: str | None = None
401
+ ) -> dict[str, Any]:
402
+ """Upsert documents into RAG corpus via RAG facade, if configured."""
403
+ if not self.rag:
404
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
405
+ stats = await self.rag.upsert_docs(corpus_id=corpus_id, docs=list(docs))
406
+ # Optional write result -- disable for now
407
+ # self.write_result(
408
+ # topic=topic or f"rag.upsert.{corpus_id}",
409
+ # outputs=[{"name": "stats", "kind": "json", "value": stats}],
410
+ # tags=["rag", "ingest"],
411
+ # message=f"Upserted {stats.get('chunks',0)} chunks into {corpus_id}"
412
+ # )
413
+ return stats
414
+
415
+ # ---------- helpers ----------
416
+ def _estimate_signal(
417
+ self, *, text: str | None, metrics: dict[str, Any] | None, severity: int
418
+ ) -> float:
419
+ """
420
+ Cheap heuristic to gauge “signal” of an event (0.0–1.0).
421
+ - Rewards presence/length of text and presence of metrics.
422
+ - Used as a noise gate in rolling summaries; can be overridden by caller.
423
+ """
424
+ score = 0.15 + 0.1 * severity
425
+ if text:
426
+ score += min(len(text) / 400.0, 0.4)
427
+ if metrics:
428
+ score += 0.2
429
+ return max(0.0, min(1.0, score))
430
+
431
+ def resolve(self, params: dict[str, Any]) -> dict[str, Any]:
432
+ """
433
+ Synchronous version of parameter resolution (for sync contexts).
434
+ See `aethergraph.services.memory.resolver.resolve_params` for details.
435
+ """
436
+ from aethergraph.services.memory.resolver import ResolverContext, resolve_params
437
+
438
+ rctx = ResolverContext(mem=self)
439
+ return resolve_params(params, rctx)
440
+
441
+ # ----------- RAG: corpus binding & status -----------
442
+ async def rag_bind(
443
+ self,
444
+ *,
445
+ corpus_id: str | None = None,
446
+ key: str | None = None,
447
+ create_if_missing: bool = True,
448
+ labels: dict | None = None,
449
+ ) -> str:
450
+ if not self.rag:
451
+ raise RuntimeError("RAG facade not configured")
452
+
453
+ if corpus_id:
454
+ if create_if_missing:
455
+ await self.rag.add_corpus(corpus_id, meta=labels or {})
456
+ return corpus_id
457
+
458
+ # prefer explicit key; else stable from run_id
459
+ chosen = key or self.run_id
460
+ cid = f"run:{_slug(chosen)}-{_short_hash(chosen, 12)}"
461
+ if create_if_missing:
462
+ await self.rag.add_corpus(cid, meta=labels or {})
463
+ return cid
464
+
465
+ async def rag_status(self, *, corpus_id: str) -> dict:
466
+ """Quick stats about a corpus."""
467
+ if not self.rag:
468
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
469
+ # lightweight: count docs/chunks by scanning the jsonl (fast enough for now)
470
+ return await self.rag.stats(corpus_id)
471
+
472
+ async def rag_snapshot(self, *, corpus_id: str, title: str, labels: dict | None = None) -> dict:
473
+ """Export corpus into an artifact bundle and return its URI."""
474
+ if not self.rag:
475
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
476
+ bundle = await self.rag.export(corpus_id)
477
+ # Optionally log a tool_result
478
+ await self.write_result(
479
+ topic=f"rag.snapshot.{corpus_id}",
480
+ outputs=[{"name": "bundle_uri", "kind": "uri", "value": bundle.get("uri")}],
481
+ tags=["rag", "snapshot"],
482
+ message=title,
483
+ severity=2,
484
+ )
485
+ return bundle
486
+
487
+ async def rag_compact(self, *, corpus_id: str, policy: dict | None = None) -> dict:
488
+ """
489
+ Simple compaction policy:
490
+ - Optionally drop docs by label or min_score
491
+ - Optional re-embed with a new model
492
+ For now we just expose reembed() plumbing and a placeholder for pruning.
493
+
494
+ NOTE: this function is a placeholder for future compaction strategies.
495
+ """
496
+ if not self.rag:
497
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
498
+ policy = policy or {}
499
+ model = policy.get("reembed_model")
500
+ pruned = 0 # placeholder
501
+ if model:
502
+ await self.rag.reembed(corpus_id, model=model)
503
+ return {"pruned_docs": pruned, "reembedded": bool(model)}
504
+
505
+ # ----------- RAG: event → doc promotion -----------
506
+ async def rag_promote_events(
507
+ self,
508
+ *,
509
+ corpus_id: str,
510
+ events: list[Event] | None = None,
511
+ where: dict | None = None,
512
+ policy: dict | None = None,
513
+ ) -> dict:
514
+ """
515
+ Convert events to documents and upsert.
516
+ where: optional filter like {"kinds": ["tool_result"], "min_signal": 0.25, "limit": 200}
517
+ policy: {"min_signal": float} In the future may support more (chunksize, overlap, etc.)
518
+ """
519
+ if not self.rag:
520
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
521
+ policy = policy or {}
522
+ min_signal = policy.get("min_signal", self.default_signal_threshold)
523
+
524
+ # Select events if not provided
525
+ if events is None:
526
+ kinds = (where or {}).get("kinds")
527
+ limit = int((where or {}).get("limit", 200))
528
+ recent = await self.recent(kinds=kinds, limit=limit)
529
+ events = [e for e in recent if (getattr(e, "signal", 0.0) or 0.0) >= float(min_signal)]
530
+
531
+ docs: list[dict] = []
532
+ for e in events:
533
+ title = f"{e.kind}:{(e.tool or e.stage or 'n/a')}:{e.ts}"
534
+ labels = {
535
+ "kind": e.kind,
536
+ "tool": e.tool,
537
+ "stage": e.stage,
538
+ "severity": e.severity,
539
+ "run_id": e.run_id,
540
+ "graph_id": e.graph_id,
541
+ "node_id": e.node_id,
542
+ "agent_id": e.agent_id,
543
+ "tags": list(e.tags or []),
544
+ }
545
+ body = e.text
546
+ if not body:
547
+ # Fallback to compact JSON of I/O + metrics
548
+ body = json.dumps(
549
+ {"inputs": e.inputs, "outputs": e.outputs, "metrics": e.metrics},
550
+ ensure_ascii=False,
551
+ )
552
+ docs.append({"text": body, "title": title, "labels": labels})
553
+
554
+ if not docs:
555
+ return {
556
+ "added": 0,
557
+ "chunks": 0,
558
+ "index": getattr(self.rag.index, "__class__", type("X", (object,), {})).__name__,
559
+ }
560
+
561
+ stats = await self.rag.upsert_docs(corpus_id=corpus_id, docs=docs)
562
+ # (Optional) write a result for traceability
563
+ await self.write_result(
564
+ topic=f"rag.promote.{corpus_id}",
565
+ outputs=[
566
+ {"name": "added_docs", "kind": "number", "value": stats.get("added", 0)},
567
+ {"name": "chunks", "kind": "number", "value": stats.get("chunks", 0)},
568
+ ],
569
+ tags=["rag", "ingest"],
570
+ message=f"Promoted {stats.get('added', 0)} events into {corpus_id}",
571
+ severity=2,
572
+ )
573
+ return stats
574
+
575
+ # ----------- RAG: search & answer -----------
576
+ async def rag_search(
577
+ self,
578
+ *,
579
+ corpus_id: str,
580
+ query: str,
581
+ k: int = 8,
582
+ filters: dict | None = None,
583
+ mode: Literal["hybrid", "dense"] = "hybrid",
584
+ ) -> list[dict]:
585
+ """Thin pass-through, but returns serializable dicts."""
586
+ if not self.rag:
587
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
588
+ hits = await self.rag.search(corpus_id, query, k=k, filters=filters, mode=mode)
589
+ return [
590
+ dict(
591
+ chunk_id=h.chunk_id,
592
+ doc_id=h.doc_id,
593
+ corpus_id=h.corpus_id,
594
+ score=h.score,
595
+ text=h.text,
596
+ meta=h.meta,
597
+ )
598
+ for h in hits
599
+ ]
600
+
601
+ async def rag_answer(
602
+ self,
603
+ *,
604
+ corpus_id: str,
605
+ question: str,
606
+ style: Literal["concise", "detailed"] = "concise",
607
+ with_citations: bool = True,
608
+ k: int = 6,
609
+ ) -> dict:
610
+ """Answer with citations, then log as a tool_result."""
611
+ if not self.rag:
612
+ raise RuntimeError("RAG facade not configured in MemoryFacade")
613
+ ans = await self.rag.answer(
614
+ corpus_id=corpus_id,
615
+ question=question,
616
+ llm=self.llm,
617
+ style=style,
618
+ with_citations=with_citations,
619
+ k=k,
620
+ )
621
+ # Flatten citations into outputs for indices
622
+ outs = [{"name": "answer", "kind": "text", "value": ans.get("answer", "")}]
623
+ for i, rc in enumerate(ans.get("resolved_citations", []), start=1):
624
+ outs.append({"name": f"cite_{i}", "kind": "json", "value": rc})
625
+ await self.write_result(
626
+ topic=f"rag.answer.{corpus_id}",
627
+ outputs=outs,
628
+ tags=["rag", "qa"],
629
+ message=f"Q: {question}",
630
+ metrics=ans.get("usage", {}),
631
+ severity=2,
632
+ )
633
+ return ans