aethergraph 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. aethergraph/api/v1/artifacts.py +23 -4
  2. aethergraph/api/v1/schemas.py +7 -0
  3. aethergraph/api/v1/session.py +123 -4
  4. aethergraph/config/config.py +2 -0
  5. aethergraph/config/search.py +49 -0
  6. aethergraph/contracts/services/channel.py +18 -1
  7. aethergraph/contracts/services/execution.py +58 -0
  8. aethergraph/contracts/services/llm.py +26 -0
  9. aethergraph/contracts/services/memory.py +10 -4
  10. aethergraph/contracts/services/planning.py +53 -0
  11. aethergraph/contracts/storage/event_log.py +8 -0
  12. aethergraph/contracts/storage/search_backend.py +47 -0
  13. aethergraph/contracts/storage/vector_index.py +73 -0
  14. aethergraph/core/graph/action_spec.py +76 -0
  15. aethergraph/core/graph/graph_fn.py +75 -2
  16. aethergraph/core/graph/graphify.py +74 -2
  17. aethergraph/core/runtime/graph_runner.py +2 -1
  18. aethergraph/core/runtime/node_context.py +66 -3
  19. aethergraph/core/runtime/node_services.py +8 -0
  20. aethergraph/core/runtime/run_manager.py +263 -271
  21. aethergraph/core/runtime/run_types.py +54 -1
  22. aethergraph/core/runtime/runtime_env.py +35 -14
  23. aethergraph/core/runtime/runtime_services.py +308 -18
  24. aethergraph/plugins/agents/default_chat_agent.py +266 -74
  25. aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
  26. aethergraph/plugins/channel/adapters/webui.py +69 -21
  27. aethergraph/plugins/channel/routes/webui_routes.py +8 -48
  28. aethergraph/runtime/__init__.py +12 -0
  29. aethergraph/server/app_factory.py +3 -0
  30. aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
  31. aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
  32. aethergraph/server/ui_static/index.html +2 -2
  33. aethergraph/services/artifacts/facade.py +157 -21
  34. aethergraph/services/artifacts/types.py +35 -0
  35. aethergraph/services/artifacts/utils.py +42 -0
  36. aethergraph/services/channel/channel_bus.py +3 -1
  37. aethergraph/services/channel/event_hub copy.py +55 -0
  38. aethergraph/services/channel/event_hub.py +81 -0
  39. aethergraph/services/channel/factory.py +3 -2
  40. aethergraph/services/channel/session.py +709 -74
  41. aethergraph/services/container/default_container.py +69 -7
  42. aethergraph/services/execution/__init__.py +0 -0
  43. aethergraph/services/execution/local_python.py +118 -0
  44. aethergraph/services/indices/__init__.py +0 -0
  45. aethergraph/services/indices/global_indices.py +21 -0
  46. aethergraph/services/indices/scoped_indices.py +292 -0
  47. aethergraph/services/llm/generic_client.py +342 -46
  48. aethergraph/services/llm/generic_embed_client.py +359 -0
  49. aethergraph/services/llm/types.py +3 -1
  50. aethergraph/services/memory/distillers/llm_long_term.py +60 -109
  51. aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
  52. aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
  53. aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
  54. aethergraph/services/memory/distillers/long_term.py +48 -131
  55. aethergraph/services/memory/distillers/long_term_v1.py +170 -0
  56. aethergraph/services/memory/facade/chat.py +18 -8
  57. aethergraph/services/memory/facade/core.py +159 -19
  58. aethergraph/services/memory/facade/distillation.py +86 -31
  59. aethergraph/services/memory/facade/retrieval.py +100 -1
  60. aethergraph/services/memory/factory.py +4 -1
  61. aethergraph/services/planning/__init__.py +0 -0
  62. aethergraph/services/planning/action_catalog.py +271 -0
  63. aethergraph/services/planning/bindings.py +56 -0
  64. aethergraph/services/planning/dependency_index.py +65 -0
  65. aethergraph/services/planning/flow_validator.py +263 -0
  66. aethergraph/services/planning/graph_io_adapter.py +150 -0
  67. aethergraph/services/planning/input_parser.py +312 -0
  68. aethergraph/services/planning/missing_inputs.py +28 -0
  69. aethergraph/services/planning/node_planner.py +613 -0
  70. aethergraph/services/planning/orchestrator.py +112 -0
  71. aethergraph/services/planning/plan_executor.py +506 -0
  72. aethergraph/services/planning/plan_types.py +321 -0
  73. aethergraph/services/planning/planner.py +617 -0
  74. aethergraph/services/planning/planner_service.py +369 -0
  75. aethergraph/services/planning/planning_context_builder.py +43 -0
  76. aethergraph/services/planning/quick_actions.py +29 -0
  77. aethergraph/services/planning/routers/__init__.py +0 -0
  78. aethergraph/services/planning/routers/simple_router.py +26 -0
  79. aethergraph/services/rag/facade.py +0 -3
  80. aethergraph/services/scope/scope.py +30 -30
  81. aethergraph/services/scope/scope_factory.py +15 -7
  82. aethergraph/services/skills/__init__.py +0 -0
  83. aethergraph/services/skills/skill_registry.py +465 -0
  84. aethergraph/services/skills/skills.py +220 -0
  85. aethergraph/services/skills/utils.py +194 -0
  86. aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
  87. aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
  88. aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
  89. aethergraph/storage/memory/event_persist.py +42 -2
  90. aethergraph/storage/memory/fs_persist.py +32 -2
  91. aethergraph/storage/search_backend/__init__.py +0 -0
  92. aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
  93. aethergraph/storage/search_backend/null_backend.py +34 -0
  94. aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
  95. aethergraph/storage/search_backend/utils.py +31 -0
  96. aethergraph/storage/search_factory.py +75 -0
  97. aethergraph/storage/vector_index/faiss_index.py +72 -4
  98. aethergraph/storage/vector_index/sqlite_index.py +521 -52
  99. aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
  100. aethergraph/storage/vector_index/utils.py +22 -0
  101. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
  102. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +107 -63
  103. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
  104. aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
  105. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
  106. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
  107. aethergraph/services/eventhub/event_hub.py +0 -76
  108. aethergraph/services/llm/generic_client copy.py +0 -691
  109. aethergraph/services/prompts/file_store.py +0 -41
  110. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
  111. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
  112. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
  113. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,342 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable
4
+ import json
5
+ from typing import Any
6
+
7
+ from aethergraph.contracts.services.llm import LLMClientProtocol
8
+ from aethergraph.contracts.services.memory import Distiller, Event, HotLog
9
+ from aethergraph.contracts.storage.doc_store import DocStore
10
+ from aethergraph.services.memory.distillers.long_term import ar_summary_uri
11
+ from aethergraph.services.memory.facade.utils import now_iso
12
+ from aethergraph.services.memory.utils import _summary_doc_id, _summary_prefix
13
+
14
+ """
15
+ Meta-summary pipeline (multi-scale memory):
16
+
17
+ 1) Raw events (chat_user / chat_assistant) are recorded via `mem.record(...)`.
18
+ 2) `mem.distill_long_term(...)` compresses recent events into JSON summaries under:
19
+ mem/<scope_id>/summaries/<summary_tag>/...
20
+ e.g. summary_tag="session" → session-level long-term summaries.
21
+ 3) `mem.distill_meta_summary(...)` loads those saved summaries from disk and asks the LLM
22
+ to produce a higher-level "summary of summaries" (meta summary), written under:
23
+ mem/<scope_id>/summaries/<meta_tag>/...
24
+
25
+ ASCII view:
26
+
27
+ [events in HotLog + Persistence]
28
+
29
+
30
+ distill_long_term(...)
31
+
32
+
33
+ file://mem/<scope>/summaries/session/*.json (long_term_summary)
34
+
35
+
36
+ distill_meta_summary(...)
37
+
38
+
39
+ file://mem/<scope>/summaries/meta/*.json (meta_summary: summary of summaries)
40
+
41
+ You control time scales via `summary_tag` (e.g. "session", "weekly", "meta") and
42
+ `scope_id` (e.g. user+persona).
43
+ """
44
+
45
+
46
+ class LLMMetaSummaryDistiller(Distiller):
47
+ """
48
+ LLM-based "summary of summaries" distiller.
49
+
50
+ Intended use:
51
+ - Input: previously generated summary Events (e.g. kind="long_term_summary").
52
+ - Output: higher-level meta summary (e.g. kind="meta_summary") for a broader time scale.
53
+
54
+ Example:
55
+ - Source: summary_tag="session" (daily/session summaries)
56
+ - Target: summary_tag="meta" (multi-session / weekly/monthly view)
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ *,
62
+ llm: LLMClientProtocol,
63
+ # Source summaries (what we are compressing)
64
+ source_kind: str = "long_term_summary",
65
+ source_tag: str = "session",
66
+ # Target summary (what we produce)
67
+ summary_kind: str = "meta_summary",
68
+ summary_tag: str = "meta",
69
+ max_summaries: int = 20,
70
+ min_signal: float = 0.0,
71
+ model: str | None = None,
72
+ ):
73
+ self.llm = llm
74
+ self.source_kind = source_kind
75
+ self.source_tag = source_tag
76
+ self.summary_kind = summary_kind
77
+ self.summary_tag = summary_tag
78
+ self.max_summaries = max_summaries
79
+ self.min_signal = min_signal
80
+ self.model = model # optional model override
81
+
82
+ def _filter_source_summaries(self, events: Iterable[Event]) -> list[Event]:
83
+ """
84
+ Keep only summary Events matching:
85
+ - kind == source_kind
86
+ - tags include source_tag (and ideally 'summary')
87
+ - signal >= min_signal
88
+ """
89
+ out: list[Event] = []
90
+ for e in events:
91
+ if e.kind != self.source_kind:
92
+ continue
93
+ if (e.signal or 0.0) < self.min_signal:
94
+ continue
95
+ tags = set(e.tags or [])
96
+ if self.source_tag and self.source_tag not in tags:
97
+ continue
98
+ # Optional, but helps avoid mixing random summaries:
99
+ # require generic "summary" tag if present in your existing pipeline.
100
+ # if "summary" not in tags:
101
+ # continue
102
+ out.append(e)
103
+ return out
104
+
105
+ def _build_prompt(self, summaries: list[Event]) -> list[dict[str, str]]:
106
+ """
107
+ Convert summary Events into a chat prompt for the LLM.
108
+
109
+ We use:
110
+ - e.text as the main human-readable summary preview.
111
+ - e.data.get("time_window") if present.
112
+ """
113
+
114
+ lines: list[str] = []
115
+
116
+ for idx, e in enumerate(summaries, start=1):
117
+ tw = (e.data or {}).get("time_window") if e.data else None
118
+ tw_from = (tw or {}).get("from", e.ts)
119
+ tw_to = (tw or {}).get("to", e.ts)
120
+ body = e.text or ""
121
+ lines.append(f"Summary {idx} [{tw_from} → {tw_to}]:\n{body}\n")
122
+
123
+ transcript = "\n\n".join(lines)
124
+
125
+ system = (
126
+ "You are a higher-level summarizer over an agent's existing summaries. "
127
+ "Given multiple prior summaries (each covering a period of time), you "
128
+ "should produce a concise, higher-level meta-summary capturing: "
129
+ " - long-term themes and patterns, "
130
+ " - important user facts that remain true, "
131
+ " - long-running goals or open loops."
132
+ )
133
+
134
+ user = (
135
+ "Here are several previous summaries, each describing a time window:"
136
+ "\n\n"
137
+ f"{transcript}\n\n"
138
+ "Return a JSON object with keys: "
139
+ "`summary` (string), "
140
+ "`key_facts` (list of strings), "
141
+ "`open_loops` (list of strings). "
142
+ "Do not use markdown or include explanations outside the JSON."
143
+ )
144
+
145
+ return [
146
+ {"role": "system", "content": system},
147
+ {"role": "user", "content": user},
148
+ ]
149
+
150
+ def _build_prompt_from_saved(self, summaries: list[dict[str, Any]]) -> list[dict[str, str]]:
151
+ """
152
+ Build an LLM prompt from persisted summary JSONs.
153
+
154
+ Each summary dict is the JSON you showed:
155
+ {
156
+ "type": "long_term_summary",
157
+ "summary_tag": "session",
158
+ "summary": "...",
159
+ "time_window": {...},
160
+ ...
161
+ }
162
+ """
163
+ lines: list[str] = []
164
+
165
+ for idx, s in enumerate(summaries, start=1):
166
+ tw = s.get("time_window") or {}
167
+ tw_from = tw.get("from", s.get("ts"))
168
+ tw_to = tw.get("to", s.get("ts"))
169
+ body = s.get("summary", "") or ""
170
+
171
+ # (Optional) strip ```json fences if present
172
+ stripped = body.strip()
173
+ if stripped.startswith("```"):
174
+ # very minimal fence strip; you can refine later
175
+ stripped = stripped.strip("`")
176
+ # fall back to original if this gets too messy
177
+ body_for_prompt = stripped or body
178
+ else:
179
+ body_for_prompt = body
180
+
181
+ lines.append(f"Summary {idx} [{tw_from} → {tw_to}]:\n{body_for_prompt}\n")
182
+
183
+ transcript = "\n\n".join(lines)
184
+
185
+ system = (
186
+ "You are a higher-level summarizer over an agent's existing long-term summaries. "
187
+ "Given multiple prior summaries (each describing a period), produce a meta-summary "
188
+ "that captures long-term themes, stable user facts, and persistent open loops."
189
+ )
190
+
191
+ user = (
192
+ "Here are several previous summaries:\n\n"
193
+ f"{transcript}\n\n"
194
+ "Return a JSON object with keys: "
195
+ "`summary` (string), "
196
+ "`key_facts` (list of strings), "
197
+ "`open_loops` (list of strings). "
198
+ "Do not include any extra explanation outside the JSON."
199
+ )
200
+
201
+ return [
202
+ {"role": "system", "content": system},
203
+ {"role": "user", "content": user},
204
+ ]
205
+
206
+ async def distill(
207
+ self,
208
+ run_id: str,
209
+ timeline_id: str,
210
+ scope_id: str = None,
211
+ *,
212
+ hotlog: HotLog,
213
+ docs: DocStore,
214
+ **kw: Any,
215
+ ) -> dict[str, Any]:
216
+ """
217
+ Distill method following the Distiller protocol.
218
+
219
+ IMPORTANT:
220
+ - This implementation is optimized for FSPersistence and reads
221
+ previously saved summary JSONs from:
222
+ mem/<scope_id>/summaries/<source_tag>/*.json
223
+ - If a different Persistence is used, we currently bail out.
224
+ """
225
+ scope = scope_id or run_id
226
+ prefix = _summary_prefix(scope, self.source_tag)
227
+
228
+ # 1) Load existing long-term summary JSONs from DocStore
229
+ try:
230
+ all_ids = await docs.list()
231
+ except Exception:
232
+ all_ids = []
233
+
234
+ candidates = sorted(d for d in all_ids if d.startswith(prefix))
235
+ if not candidates:
236
+ return {}
237
+
238
+ chosen_ids = candidates[-self.max_summaries :]
239
+ summaries: list[dict[str, Any]] = []
240
+ for doc_id in chosen_ids:
241
+ try:
242
+ doc = await docs.get(doc_id)
243
+ if doc is not None:
244
+ summaries.append(doc) # type: ignore[arg-type]
245
+ except Exception:
246
+ continue
247
+
248
+ if not summaries:
249
+ return {}
250
+
251
+ # Optional: filter by min_signal if present in saved JSON
252
+ filtered: list[dict[str, Any]] = []
253
+ for s in summaries:
254
+ sig = (
255
+ float(s.get("signal", 0.0)) if isinstance(s.get("signal"), int | float) else 1.0
256
+ ) # default 1.0
257
+ if sig < self.min_signal:
258
+ continue
259
+ # Also enforce type/tag consistency:
260
+ if s.get("type") != self.source_kind:
261
+ continue
262
+ if s.get("summary_tag") != self.source_tag:
263
+ continue
264
+ filtered.append(s)
265
+
266
+ if not filtered:
267
+ return {}
268
+
269
+ # Keep order as loaded (already sorted by filename)
270
+ kept = filtered
271
+
272
+ # 2) Derive aggregated time window
273
+ first_from = None
274
+ last_to = None
275
+ for s in kept:
276
+ tw = s.get("time_window") or {}
277
+ start = tw.get("from") or s.get("ts")
278
+ end = tw.get("to") or s.get("ts")
279
+ if start:
280
+ first_from = start if first_from is None else min(first_from, start)
281
+ if end:
282
+ last_to = end if last_to is None else max(last_to, end)
283
+ if first_from is None:
284
+ first_from = kept[0].get("ts")
285
+ if last_to is None:
286
+ last_to = kept[-1].get("ts")
287
+
288
+ # 3) Build prompt and call LLM
289
+ messages = self._build_prompt_from_saved(kept)
290
+ summary_json_str, usage = await self.llm.chat(messages)
291
+
292
+ # 4) Parse LLM JSON response
293
+ try:
294
+ payload = json.loads(summary_json_str)
295
+ except Exception:
296
+ payload = {
297
+ "summary": summary_json_str,
298
+ "key_facts": [],
299
+ "open_loops": [],
300
+ }
301
+
302
+ ts = now_iso()
303
+ summary_obj = {
304
+ "type": self.summary_kind,
305
+ "version": 1,
306
+ "run_id": run_id,
307
+ "scope_id": scope,
308
+ "summary_tag": self.summary_tag,
309
+ "source_summary_kind": self.source_kind,
310
+ "source_summary_tag": self.source_tag,
311
+ "ts": ts,
312
+ "time_window": {"from": first_from, "to": last_to},
313
+ "num_source_summaries": len(kept),
314
+ "source_summary_uris": [
315
+ # reconstruct the URI pattern we originally use
316
+ # (this assumes summaries were written under ar_summary_uri)
317
+ ar_summary_uri(scope, self.source_tag, s.get("ts", ts))
318
+ for s in kept
319
+ ],
320
+ "summary": payload.get("summary", ""),
321
+ "key_facts": payload.get("key_facts", []),
322
+ "open_loops": payload.get("open_loops", []),
323
+ "llm_usage": usage,
324
+ "llm_model": getattr(self.llm, "model", None),
325
+ }
326
+
327
+ doc_id = _summary_doc_id(scope, self.summary_tag, ts)
328
+ await docs.put(doc_id, summary_obj)
329
+
330
+ # 5) Emit meta_summary Event
331
+ text = summary_obj["summary"] or ""
332
+ preview = text[:2000] + (" …[truncated]" if len(text) > 2000 else "")
333
+
334
+ return {
335
+ "summary_doc_id": doc_id,
336
+ "summary_kind": self.summary_kind,
337
+ "summary_tag": self.summary_tag,
338
+ "time_window": summary_obj["time_window"],
339
+ "num_source_summaries": summary_obj["num_source_summaries"],
340
+ "preview": preview,
341
+ "ts": ts,
342
+ }
@@ -1,15 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Iterable
4
+ import json
4
5
  import time
5
6
  from typing import Any
6
7
 
7
- from aethergraph.contracts.services.memory import Distiller, Event, HotLog, Indices, Persistence
8
-
9
- # re-use stable_event_id from the MemoryFacade module
8
+ from aethergraph.contracts.services.memory import Distiller, Event, HotLog
10
9
  from aethergraph.contracts.storage.doc_store import DocStore
11
- from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
12
- from aethergraph.services.memory.facade.utils import stable_event_id
13
10
  from aethergraph.services.memory.utils import _summary_doc_id
14
11
 
15
12
 
@@ -17,45 +14,7 @@ def _now_iso() -> str:
17
14
  return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
18
15
 
19
16
 
20
- def ar_summary_uri_by_run_id(run_id: str, tag: str, ts: str) -> str:
21
- """
22
- NOTE: To deprecate this function in favor of ar_summary_uri below.
23
-
24
- Save summaries under the same base "mem/<run_id>/..." tree as append_event,
25
- but using a file:// URI so FSPersistence can handle it.
26
- """
27
- safe_ts = ts.replace(":", "-")
28
- return f"file://mem/{run_id}/summaries/{tag}/{safe_ts}.json"
29
-
30
-
31
- def ar_summary_uri(scope_id: str, tag: str, ts: str) -> str:
32
- """
33
- Scope summaries by a logical memory scope, not by run_id.
34
- In simple setups, scope_id == run_id. For long-lived companions, scope_id
35
- might be something like "user:zcliu:persona:companion_v1".
36
- """
37
- safe_ts = ts.replace(":", "-")
38
- return f"file://mem/{scope_id}/summaries/{tag}/{safe_ts}.json"
39
-
40
-
41
17
  class LongTermSummarizer(Distiller):
42
- """
43
- Generic long-term summarizer.
44
-
45
- Goal:
46
- - Take a slice of recent events (by kind and/or tag).
47
- - Build a compact textual digest plus small structured metadata.
48
- - Persist the summary as JSON via Persistence.save_json(...).
49
- - Emit a summary Event with kind=summary_kind and data["summary_uri"].
50
-
51
- This does NOT call an LLM by itself; it's a structural/logical summarizer.
52
- An LLM-based distiller can be layered on top later (using the same URI scheme).
53
-
54
- Typical usage:
55
- - Kinds: ["chat_user", "chat_assistant"] or app-specific kinds.
56
- - Tag: "session", "daily", "episode:<id>", etc.
57
- """
58
-
59
18
  def __init__(
60
19
  self,
61
20
  *,
@@ -82,9 +41,8 @@ class LongTermSummarizer(Distiller):
82
41
  if kinds is not None and e.kind not in kinds:
83
42
  continue
84
43
  if tags is not None:
85
- if not e.tags:
86
- continue
87
- if not tags.issubset(set(e.tags)):
44
+ et = set(e.tags or [])
45
+ if not tags.issubset(et): # AND semantics
88
46
  continue
89
47
  if (e.signal or 0.0) < self.min_signal:
90
48
  continue
@@ -95,131 +53,90 @@ class LongTermSummarizer(Distiller):
95
53
  self,
96
54
  run_id: str,
97
55
  timeline_id: str,
98
- scope_id: str = None,
56
+ scope_id: str | None = None,
99
57
  *,
100
58
  hotlog: HotLog,
101
- persistence: Persistence,
102
- indices: Indices,
103
59
  docs: DocStore,
104
60
  **kw: Any,
105
61
  ) -> dict[str, Any]:
106
- """
107
- Steps:
108
- 1) Grab recent events from HotLog for this run.
109
- 2) Filter by kinds/tags/min_signal.
110
- 3) Build a digest:
111
- - simple text transcript (role: text)
112
- - metadata: ts range, num events
113
- 4) Save JSON summary via Persistence.save_json(file://...).
114
- 5) Log a summary Event to hotlog + persistence, with data.summary_uri.
115
- """
116
- # 1) fetch more than we might keep to give filter some slack
117
- raw = await hotlog.recent(timeline_id, kinds=None, limit=self.max_events * 2)
62
+ # Over-fetch strategy:
63
+ # Tag filtering can be very selective (thread/session tags), so fetch more.
64
+ base_mult = 2
65
+ if self.include_tags:
66
+ base_mult = 8
67
+
68
+ fetch_limit = max(self.max_events * base_mult, 200)
69
+
70
+ # Narrow by kinds early when possible (less noise => more chance to fill max_events)
71
+ raw = await hotlog.recent(
72
+ timeline_id,
73
+ kinds=self.include_kinds,
74
+ limit=fetch_limit,
75
+ )
76
+
118
77
  kept = self._filter_events(raw)
119
78
  if not kept:
120
79
  return {}
121
80
 
122
- # keep only max_events most recent
123
81
  kept = kept[-self.max_events :]
124
82
 
125
- # 2) Build digest text (simple transcript-like format)
126
- lines: list[str] = []
127
- src_ids: list[str] = []
128
83
  first_ts = kept[0].ts
129
84
  last_ts = kept[-1].ts
130
85
 
86
+ # Build digest text (simple transcript-like format) + source ids
87
+ lines: list[str] = []
88
+ src_ids: list[str] = []
89
+
131
90
  for e in kept:
91
+ src_ids.append(e.event_id)
92
+
132
93
  role = e.stage or e.kind or "event"
133
- if e.text:
134
- lines.append(f"[{role}] {e.text}")
135
- src_ids.append(e.event_id)
94
+
95
+ content = (e.text or "").strip()
96
+ if not content and getattr(e, "data", None) is not None:
97
+ # fall back to a compact JSON line
98
+ try:
99
+ content = json.dumps(e.data, ensure_ascii=False)
100
+ except Exception:
101
+ content = str(e.data)
102
+
103
+ if content:
104
+ if len(content) > 500:
105
+ content = content[:500] + "…"
106
+ lines.append(f"[{role}] {content}")
136
107
 
137
108
  digest_text = "\n".join(lines)
138
109
  ts = _now_iso()
139
110
 
140
- # 3) Summary JSON shape
111
+ scope = scope_id or run_id
141
112
  summary = {
142
113
  "type": self.summary_kind,
143
114
  "version": 1,
144
115
  "run_id": run_id,
145
- "scope_id": scope_id or run_id,
116
+ "scope_id": scope,
146
117
  "summary_tag": self.summary_tag,
147
118
  "ts": ts,
148
- "time_window": {
149
- "from": first_ts,
150
- "to": last_ts,
151
- },
119
+ "time_window": {"from": first_ts, "to": last_ts},
152
120
  "num_events": len(kept),
153
121
  "source_event_ids": src_ids,
154
122
  "text": digest_text,
123
+ "include_kinds": self.include_kinds,
124
+ "include_tags": self.include_tags,
125
+ "min_signal": self.min_signal,
126
+ "fetch_limit": fetch_limit,
155
127
  }
156
128
 
157
- # 4) Persist JSON summary via DocStore
158
- scope = scope_id or run_id
159
129
  doc_id = _summary_doc_id(scope, self.summary_tag, ts)
160
130
  await docs.put(doc_id, summary)
161
131
 
162
- # 5) Emit summary Event
163
- # NOTE: we only store a preview in text and full summary in data["summary_uri"]
164
132
  preview = digest_text[:2000] + (" …[truncated]" if len(digest_text) > 2000 else "")
165
133
 
166
- evt = Event(
167
- event_id="", # fill below
168
- ts=ts,
169
- run_id=run_id,
170
- scope_id=scope,
171
- kind=self.summary_kind,
172
- stage="summary",
173
- text=preview,
174
- tags=["summary", self.summary_tag],
175
- data={
176
- "summary_doc_id": doc_id,
177
- "summary_tag": self.summary_tag,
178
- "time_window": summary["time_window"],
179
- "num_events": len(kept),
180
- },
181
- metrics={"num_events": len(kept)},
182
- severity=1,
183
- signal=0.5,
184
- )
185
-
186
- evt.event_id = stable_event_id(
187
- {
188
- "ts": ts,
189
- "run_id": run_id,
190
- "kind": self.summary_kind,
191
- "summary_tag": self.summary_tag,
192
- "text": preview[:200],
193
- }
194
- )
195
-
196
- await hotlog.append(timeline_id, evt, ttl_s=7 * 24 * 3600, limit=1000)
197
- await persistence.append_event(timeline_id, evt)
198
-
199
- # Metering: record summary event
200
- try:
201
- meter = current_metering()
202
- ctx = current_meter_context.get()
203
- user_id = ctx.get("user_id")
204
- org_id = ctx.get("org_id")
205
-
206
- await meter.record_event(
207
- user_id=user_id,
208
- org_id=org_id,
209
- run_id=run_id,
210
- scope_id=scope,
211
- kind=f"memory.{self.summary_kind}", # e.g. "memory.long_term_summary"
212
- )
213
- except Exception:
214
- import logging
215
-
216
- logger = logging.getLogger("aethergraph.services.memory.distillers.long_term")
217
- logger.error("Failed to record metering event for long_term_summary")
218
-
219
134
  return {
220
135
  "summary_doc_id": doc_id,
221
136
  "summary_kind": self.summary_kind,
222
137
  "summary_tag": self.summary_tag,
223
138
  "time_window": summary["time_window"],
224
139
  "num_events": len(kept),
140
+ "preview": preview,
141
+ "ts": ts,
225
142
  }