aethergraph 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. aethergraph/api/v1/artifacts.py +23 -4
  2. aethergraph/api/v1/schemas.py +7 -0
  3. aethergraph/api/v1/session.py +123 -4
  4. aethergraph/config/config.py +2 -0
  5. aethergraph/config/search.py +49 -0
  6. aethergraph/contracts/services/channel.py +18 -1
  7. aethergraph/contracts/services/execution.py +58 -0
  8. aethergraph/contracts/services/llm.py +26 -0
  9. aethergraph/contracts/services/memory.py +10 -4
  10. aethergraph/contracts/services/planning.py +53 -0
  11. aethergraph/contracts/storage/event_log.py +8 -0
  12. aethergraph/contracts/storage/search_backend.py +47 -0
  13. aethergraph/contracts/storage/vector_index.py +73 -0
  14. aethergraph/core/graph/action_spec.py +76 -0
  15. aethergraph/core/graph/graph_fn.py +75 -2
  16. aethergraph/core/graph/graphify.py +74 -2
  17. aethergraph/core/runtime/graph_runner.py +2 -1
  18. aethergraph/core/runtime/node_context.py +66 -3
  19. aethergraph/core/runtime/node_services.py +8 -0
  20. aethergraph/core/runtime/run_manager.py +263 -271
  21. aethergraph/core/runtime/run_types.py +54 -1
  22. aethergraph/core/runtime/runtime_env.py +35 -14
  23. aethergraph/core/runtime/runtime_services.py +308 -18
  24. aethergraph/plugins/agents/default_chat_agent.py +266 -74
  25. aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
  26. aethergraph/plugins/channel/adapters/webui.py +69 -21
  27. aethergraph/plugins/channel/routes/webui_routes.py +8 -48
  28. aethergraph/runtime/__init__.py +12 -0
  29. aethergraph/server/app_factory.py +3 -0
  30. aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
  31. aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
  32. aethergraph/server/ui_static/index.html +2 -2
  33. aethergraph/services/artifacts/facade.py +157 -21
  34. aethergraph/services/artifacts/types.py +35 -0
  35. aethergraph/services/artifacts/utils.py +42 -0
  36. aethergraph/services/channel/channel_bus.py +3 -1
  37. aethergraph/services/channel/event_hub copy.py +55 -0
  38. aethergraph/services/channel/event_hub.py +81 -0
  39. aethergraph/services/channel/factory.py +3 -2
  40. aethergraph/services/channel/session.py +709 -74
  41. aethergraph/services/container/default_container.py +69 -7
  42. aethergraph/services/execution/__init__.py +0 -0
  43. aethergraph/services/execution/local_python.py +118 -0
  44. aethergraph/services/indices/__init__.py +0 -0
  45. aethergraph/services/indices/global_indices.py +21 -0
  46. aethergraph/services/indices/scoped_indices.py +292 -0
  47. aethergraph/services/llm/generic_client.py +342 -46
  48. aethergraph/services/llm/generic_embed_client.py +359 -0
  49. aethergraph/services/llm/types.py +3 -1
  50. aethergraph/services/memory/distillers/llm_long_term.py +60 -109
  51. aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
  52. aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
  53. aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
  54. aethergraph/services/memory/distillers/long_term.py +48 -131
  55. aethergraph/services/memory/distillers/long_term_v1.py +170 -0
  56. aethergraph/services/memory/facade/chat.py +18 -8
  57. aethergraph/services/memory/facade/core.py +159 -19
  58. aethergraph/services/memory/facade/distillation.py +86 -31
  59. aethergraph/services/memory/facade/retrieval.py +100 -1
  60. aethergraph/services/memory/factory.py +4 -1
  61. aethergraph/services/planning/__init__.py +0 -0
  62. aethergraph/services/planning/action_catalog.py +271 -0
  63. aethergraph/services/planning/bindings.py +56 -0
  64. aethergraph/services/planning/dependency_index.py +65 -0
  65. aethergraph/services/planning/flow_validator.py +263 -0
  66. aethergraph/services/planning/graph_io_adapter.py +150 -0
  67. aethergraph/services/planning/input_parser.py +312 -0
  68. aethergraph/services/planning/missing_inputs.py +28 -0
  69. aethergraph/services/planning/node_planner.py +613 -0
  70. aethergraph/services/planning/orchestrator.py +112 -0
  71. aethergraph/services/planning/plan_executor.py +506 -0
  72. aethergraph/services/planning/plan_types.py +321 -0
  73. aethergraph/services/planning/planner.py +617 -0
  74. aethergraph/services/planning/planner_service.py +369 -0
  75. aethergraph/services/planning/planning_context_builder.py +43 -0
  76. aethergraph/services/planning/quick_actions.py +29 -0
  77. aethergraph/services/planning/routers/__init__.py +0 -0
  78. aethergraph/services/planning/routers/simple_router.py +26 -0
  79. aethergraph/services/rag/facade.py +0 -3
  80. aethergraph/services/scope/scope.py +30 -30
  81. aethergraph/services/scope/scope_factory.py +15 -7
  82. aethergraph/services/skills/__init__.py +0 -0
  83. aethergraph/services/skills/skill_registry.py +465 -0
  84. aethergraph/services/skills/skills.py +220 -0
  85. aethergraph/services/skills/utils.py +194 -0
  86. aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
  87. aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
  88. aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
  89. aethergraph/storage/memory/event_persist.py +42 -2
  90. aethergraph/storage/memory/fs_persist.py +32 -2
  91. aethergraph/storage/search_backend/__init__.py +0 -0
  92. aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
  93. aethergraph/storage/search_backend/null_backend.py +34 -0
  94. aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
  95. aethergraph/storage/search_backend/utils.py +31 -0
  96. aethergraph/storage/search_factory.py +75 -0
  97. aethergraph/storage/vector_index/faiss_index.py +72 -4
  98. aethergraph/storage/vector_index/sqlite_index.py +521 -52
  99. aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
  100. aethergraph/storage/vector_index/utils.py +22 -0
  101. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
  102. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +107 -63
  103. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
  104. aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
  105. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
  106. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
  107. aethergraph/services/eventhub/event_hub.py +0 -76
  108. aethergraph/services/llm/generic_client copy.py +0 -691
  109. aethergraph/services/prompts/file_store.py +0 -41
  110. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
  111. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
  112. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
  113. {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,359 @@
1
+ # aethergraph/services/llm/embedding_client.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ from collections.abc import Sequence
6
+ from dataclasses import dataclass
7
+ import os
8
+ from typing import Any
9
+
10
+ import httpx
11
+
12
+ from aethergraph.contracts.services.llm import EmbeddingClientProtocol
13
+ from aethergraph.services.llm.generic_client import _Retry
14
+ from aethergraph.services.metering.eventlog_metering import MeteringService
15
+
16
+
17
+ @dataclass
18
+ class GenericEmbeddingClient(EmbeddingClientProtocol):
19
+ """
20
+ Provider-agnostic embedding client.
21
+
22
+ provider: one of {"openai","azure","anthropic","google","openrouter","lmstudio","ollama","dummy"}
23
+
24
+ Configuration (env defaults, but can be passed directly):
25
+
26
+ - OPENAI_API_KEY / OPENAI_BASE_URL
27
+ - AZURE_OPENAI_KEY / AZURE_OPENAI_ENDPOINT / AZURE_OPENAI_DEPLOYMENT
28
+ - ANTHROPIC_API_KEY
29
+ - GOOGLE_API_KEY
30
+ - OPENROUTER_API_KEY
31
+ - LMSTUDIO_BASE_URL (default http://localhost:1234/v1)
32
+ - OLLAMA_BASE_URL (default http://localhost:11434/v1)
33
+ """
34
+
35
+ provider: str | None = None
36
+ model: str | None = None
37
+ base_url: str | None = None
38
+ api_key: str | None = None
39
+ azure_deployment: str | None = None
40
+ timeout: float = 60.0
41
+
42
+ # metering (optional, can be None)
43
+ metering: MeteringService | None = None
44
+
45
+ def __post_init__(self) -> None:
46
+ self.provider = (
47
+ self.provider or os.getenv("EMBED_PROVIDER") or os.getenv("LLM_PROVIDER") or "openai"
48
+ ).lower() # type: ignore[assignment]
49
+ self.model = (
50
+ self.model
51
+ or os.getenv("EMBED_MODEL")
52
+ or os.getenv("LLM_EMBED_MODEL")
53
+ or "text-embedding-3-small"
54
+ )
55
+
56
+ # Pick an API key from provider-specific envs (or explicit api_key)
57
+ if self.api_key is None:
58
+ self.api_key = (
59
+ os.getenv("OPENAI_API_KEY")
60
+ or os.getenv("AZURE_OPENAI_KEY")
61
+ or os.getenv("ANTHROPIC_API_KEY")
62
+ or os.getenv("GOOGLE_API_KEY")
63
+ or os.getenv("OPENROUTER_API_KEY")
64
+ )
65
+
66
+ # Base URL defaults per provider
67
+ if self.base_url is None:
68
+ self.base_url = {
69
+ "openai": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
70
+ "azure": os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/"),
71
+ "anthropic": "https://api.anthropic.com",
72
+ "google": "https://generativelanguage.googleapis.com",
73
+ "openrouter": "https://openrouter.ai/api/v1",
74
+ "lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
75
+ "ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
76
+ "dummy": "http://localhost:8745", # for tests
77
+ }[self.provider]
78
+
79
+ # Azure deployment (for /deployments/{name}/embeddings)
80
+ if self.provider == "azure" and self.azure_deployment is None:
81
+ self.azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
82
+
83
+ self._retry = _Retry()
84
+ self._client: httpx.AsyncClient | None = None
85
+
86
+ # ------------ client management -----------------
87
+
88
+ async def _ensure_client(self) -> None:
89
+ """
90
+ Ensure we have an httpx.AsyncClient bound to the *current* event loop.
91
+
92
+ IMPORTANT: We do NOT try to aclose() a client created on a different loop,
93
+ because httpx/anyio expects it to be closed on the same loop it was created on.
94
+ """
95
+ loop = asyncio.get_running_loop()
96
+
97
+ if self._client is None:
98
+ # first-time init
99
+ self._client = httpx.AsyncClient(timeout=self.timeout)
100
+ self._bound_loop = loop
101
+ return
102
+
103
+ if self._bound_loop is not loop:
104
+ # We're now in a different loop -> do not reuse the old client.
105
+ # We also do NOT call aclose() here, because that tends to explode
106
+ # if the old loop is already closed.
107
+ self._client = httpx.AsyncClient(timeout=self.timeout)
108
+ self._bound_loop = loop
109
+
110
+ # ------------ public API ------------------------
111
+
112
+ async def embed(
113
+ self,
114
+ texts: Sequence[str],
115
+ *,
116
+ model: str | None = None,
117
+ **kw: Any,
118
+ ) -> list[list[float]]:
119
+ """
120
+ Provider-agnostic batch embedding.
121
+ """
122
+ await self._ensure_client()
123
+ assert self._client is not None
124
+
125
+ if not isinstance(texts, Sequence) or any(not isinstance(t, str) for t in texts):
126
+ raise TypeError("embed(texts) expects Sequence[str]")
127
+ if len(texts) == 0:
128
+ return []
129
+
130
+ # Resolve model (override > configured)
131
+ model = model or self.model or "text-embedding-3-small"
132
+
133
+ # Dispatch by provider
134
+ if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
135
+ embs = await self._embed_openai_like(texts, model=model, **kw)
136
+ elif self.provider == "azure":
137
+ embs = await self._embed_azure(texts, model=model, **kw)
138
+ elif self.provider == "google":
139
+ embs = await self._embed_google(texts, model=model, **kw)
140
+ elif self.provider == "anthropic":
141
+ raise NotImplementedError("Embeddings not supported for anthropic")
142
+ elif self.provider == "dummy":
143
+ embs = await self._embed_dummy(texts, model=model, **kw)
144
+ else: # pragma: no cover
145
+ raise NotImplementedError(f"Unknown embedding provider: {self.provider}")
146
+
147
+ # ---- metering hook (placeholder) ----
148
+ if self.metering is not None:
149
+ try:
150
+ # TODO: compute token estimates or bytes; for now just count inputs
151
+ await self.metering.record_embedding(
152
+ provider=self.provider,
153
+ model=model,
154
+ num_texts=len(texts),
155
+ # tokens=estimated_tokens,
156
+ )
157
+ except Exception:
158
+ # best-effort; never break main path
159
+ import logging
160
+
161
+ logger = logging.getLogger(__name__)
162
+ logger.exception("Error recording embedding metering")
163
+ pass
164
+
165
+ return embs
166
+
167
+ async def embed_one(
168
+ self,
169
+ text: str,
170
+ *,
171
+ model: str | None = None,
172
+ **kw: Any,
173
+ ) -> list[float]:
174
+ res = await self.embed([text], model=model, **kw)
175
+ return res[0]
176
+
177
+ # ------------ provider-specific helpers ------------------------
178
+
179
+ def _headers_openai_like(self) -> dict[str, str]:
180
+ headers = {"Content-Type": "application/json"}
181
+ if self.api_key:
182
+ headers["Authorization"] = f"Bearer {self.api_key}"
183
+ return headers
184
+
185
+ async def _embed_openai_like(
186
+ self,
187
+ texts: Sequence[str],
188
+ *,
189
+ model: str,
190
+ **kw: Any,
191
+ ) -> list[list[float]]:
192
+ assert self._client is not None
193
+ url = f"{self.base_url}/embeddings"
194
+ headers = self._headers_openai_like()
195
+ extra_body: dict[str, Any] = kw.get("extra_body") or {}
196
+
197
+ body: dict[str, Any] = {
198
+ "model": model,
199
+ "input": list(texts),
200
+ }
201
+ body.update(extra_body)
202
+
203
+ def parse(data: dict[str, Any]) -> list[list[float]]:
204
+ items = data.get("data", []) or []
205
+ embs = [d.get("embedding") for d in items]
206
+ if len(embs) != len(texts) or any(e is None for e in embs):
207
+ raise RuntimeError(
208
+ f"Embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
209
+ )
210
+ return embs # type: ignore[return-value]
211
+
212
+ async def _call():
213
+ r = await self._client.post(url, headers=headers, json=body)
214
+ try:
215
+ r.raise_for_status()
216
+ except httpx.HTTPStatusError as e:
217
+ raise RuntimeError(
218
+ f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
219
+ ) from e
220
+ return parse(r.json())
221
+
222
+ return await self._retry.run(_call)
223
+
224
+ async def _embed_azure(
225
+ self,
226
+ texts: Sequence[str],
227
+ *,
228
+ model: str,
229
+ **kw: Any,
230
+ ) -> list[list[float]]:
231
+ if not self.azure_deployment:
232
+ raise RuntimeError(
233
+ "Azure embeddings requires AZURE_OPENAI_DEPLOYMENT (azure_deployment)"
234
+ )
235
+
236
+ assert self._client is not None
237
+
238
+ azure_api_version = kw.get("azure_api_version") or "2024-08-01-preview"
239
+ extra_body: dict[str, Any] = kw.get("extra_body") or {}
240
+
241
+ url = (
242
+ f"{self.base_url}/openai/deployments/"
243
+ f"{self.azure_deployment}/embeddings?api-version={azure_api_version}"
244
+ )
245
+ headers = {"api-key": self.api_key or "", "Content-Type": "application/json"}
246
+ body: dict[str, Any] = {"input": list(texts)}
247
+ # Some Azure flavors accept model/dimensions; keep flexible
248
+ if model:
249
+ body["model"] = model
250
+ body.update(extra_body)
251
+
252
+ def parse(data: dict[str, Any]) -> list[list[float]]:
253
+ items = data.get("data", []) or []
254
+ embs = [d.get("embedding") for d in items]
255
+ if len(embs) != len(texts) or any(e is None for e in embs):
256
+ raise RuntimeError(
257
+ f"Azure embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
258
+ )
259
+ return embs # type: ignore[return-value]
260
+
261
+ async def _call():
262
+ r = await self._client.post(url, headers=headers, json=body)
263
+ try:
264
+ r.raise_for_status()
265
+ except httpx.HTTPStatusError as e:
266
+ raise RuntimeError(
267
+ f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
268
+ ) from e
269
+ return parse(r.json())
270
+
271
+ return await self._retry.run(_call)
272
+
273
+ async def _embed_google(
274
+ self,
275
+ texts: Sequence[str],
276
+ *,
277
+ model: str,
278
+ **kw: Any,
279
+ ) -> list[list[float]]:
280
+ assert self._client is not None
281
+ base = self.base_url.rstrip("/")
282
+ api_key = self.api_key or os.getenv("GOOGLE_API_KEY") or ""
283
+ headers = {"Content-Type": "application/json"}
284
+
285
+ # v1 and v1beta endpoints
286
+ batch_url_v1 = f"{base}/v1/models/{model}:batchEmbedContents?key={api_key}"
287
+ embed_url_v1 = f"{base}/v1/models/{model}:embedContent?key={api_key}"
288
+ batch_url_v1beta = f"{base}/v1beta/models/{model}:batchEmbedContents?key={api_key}"
289
+ embed_url_v1beta = f"{base}/v1beta/models/{model}:embedContent?key={api_key}"
290
+
291
+ def parse_single(data: dict[str, Any]) -> list[float]:
292
+ return (data.get("embedding") or {}).get("values") or []
293
+
294
+ def parse_batch(data: dict[str, Any]) -> list[list[float]]:
295
+ embs: list[list[float]] = []
296
+ for e in data.get("embeddings") or []:
297
+ embs.append((e or {}).get("values") or [])
298
+ if len(embs) != len(texts):
299
+ raise RuntimeError(
300
+ f"Gemini batch embeddings mismatch: got {len(embs)} for {len(texts)}"
301
+ )
302
+ return embs
303
+
304
+ async def try_batch(url: str) -> list[list[float]] | None:
305
+ body = {"requests": [{"content": {"parts": [{"text": t}]}} for t in texts]}
306
+ r = await self._client.post(url, headers=headers, json=body)
307
+ if r.status_code in (400, 404):
308
+ return None
309
+ try:
310
+ r.raise_for_status()
311
+ except httpx.HTTPStatusError as e:
312
+ raise RuntimeError(
313
+ f"Gemini batchEmbedContents failed ({e.response.status_code}): {e.response.text}"
314
+ ) from e
315
+ return parse_batch(r.json())
316
+
317
+ async def call_single(url: str) -> list[list[float]]:
318
+ out: list[list[float]] = []
319
+ for t in texts:
320
+ r = await self._client.post(
321
+ url, headers=headers, json={"content": {"parts": [{"text": t}]}}
322
+ )
323
+ try:
324
+ r.raise_for_status()
325
+ except httpx.HTTPStatusError as e:
326
+ raise RuntimeError(
327
+ f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
328
+ ) from e
329
+ out.append(parse_single(r.json()))
330
+ if len(out) != len(texts):
331
+ raise RuntimeError(f"Gemini embeddings mismatch: got {len(out)} for {len(texts)}")
332
+ return out
333
+
334
+ async def _call():
335
+ # try v1 batch, then v1beta batch, then single
336
+ res = await try_batch(batch_url_v1)
337
+ if res is not None:
338
+ return res
339
+ res = await try_batch(batch_url_v1beta)
340
+ if res is not None:
341
+ return res
342
+ try:
343
+ return await call_single(embed_url_v1)
344
+ except RuntimeError:
345
+ return await call_single(embed_url_v1beta)
346
+
347
+ return await self._retry.run(_call)
348
+
349
+ async def _embed_dummy(
350
+ self,
351
+ texts: Sequence[str],
352
+ *,
353
+ model: str,
354
+ **kw: Any,
355
+ ) -> list[list[float]]:
356
+ """
357
+ Dummy provider for tests: returns [len(text)] as a 1D "embedding".
358
+ """
359
+ return [[float(len(t))] for t in texts]
@@ -1,7 +1,9 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Any, Literal
3
3
 
4
- ChatOutputFormat = Literal["text", "json_object", "json_schema"]
4
+ ChatOutputFormat = Literal[
5
+ "text", "json_object", "json_schema", "raw"
6
+ ] # text: plain text; json_object: dict; json_schema: validate against schema; json_schema: validate against schema; raw: provider-specific raw response
5
7
 
6
8
  ImageFormat = Literal["png", "jpeg", "webp"]
7
9
  ImageResponseFormat = Literal["b64_json", "url"] # url only for dall-e models typically
@@ -5,32 +5,13 @@ import json
5
5
  from typing import Any
6
6
 
7
7
  from aethergraph.contracts.services.llm import LLMClientProtocol
8
- from aethergraph.contracts.services.memory import Distiller, Event, HotLog, Indices, Persistence
8
+ from aethergraph.contracts.services.memory import Distiller, Event, HotLog
9
9
  from aethergraph.contracts.storage.doc_store import DocStore
10
-
11
- # metering
12
- from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
13
- from aethergraph.services.memory.facade.utils import now_iso, stable_event_id
10
+ from aethergraph.services.memory.facade.utils import now_iso
14
11
  from aethergraph.services.memory.utils import _summary_doc_id
15
12
 
16
13
 
17
14
  class LLMLongTermSummarizer(Distiller):
18
- """
19
- LLM-based long-term summarizer.
20
-
21
- Flow:
22
- 1) Pull recent events from HotLog.
23
- 2) Filter by kind/tag/signal.
24
- 3) Build a prompt that shows the most important events as a transcript.
25
- 4) Call LLM to generate a structured summary.
26
- 5) Save summary JSON via Persistence.save_json(uri).
27
- 6) Emit a long_term_summary Event pointing to summary_uri.
28
-
29
- This is complementary to RAG:
30
- - LLM distiller compresses sequences into a digest.
31
- - RAG uses many such digests + raw docs for retrieval.
32
- """
33
-
34
15
  def __init__(
35
16
  self,
36
17
  *,
@@ -50,7 +31,7 @@ class LLMLongTermSummarizer(Distiller):
50
31
  self.include_tags = include_tags
51
32
  self.max_events = max_events
52
33
  self.min_signal = min_signal
53
- self.model = model # optional model override
34
+ self.model = model
54
35
 
55
36
  def _filter_events(self, events: Iterable[Event]) -> list[Event]:
56
37
  out: list[Event] = []
@@ -61,9 +42,8 @@ class LLMLongTermSummarizer(Distiller):
61
42
  if kinds is not None and e.kind not in kinds:
62
43
  continue
63
44
  if tags is not None:
64
- if not e.tags:
65
- continue
66
- if not tags.issubset(set(e.tags)):
45
+ et = set(e.tags or [])
46
+ if not tags.issubset(et): # AND semantics
67
47
  continue
68
48
  if (e.signal or 0.0) < self.min_signal:
69
49
  continue
@@ -71,17 +51,24 @@ class LLMLongTermSummarizer(Distiller):
71
51
  return out
72
52
 
73
53
  def _build_prompt(self, events: list[Event]) -> list[dict[str, str]]:
74
- """
75
- Convert events into a chat-style context for summarization.
76
-
77
- We keep it model-agnostic: a list of {role, content} messages.
78
- """
79
54
  lines: list[str] = []
80
55
 
81
56
  for e in events:
82
57
  role = e.stage or e.kind or "event"
83
- if e.text:
84
- lines.append(f"[{role}] {e.text}")
58
+
59
+ # Prefer text, but fall back to compact JSON of data when needed
60
+ content = (e.text or "").strip()
61
+ if not content and getattr(e, "data", None) is not None:
62
+ try:
63
+ content = json.dumps(e.data, ensure_ascii=False)
64
+ except Exception:
65
+ content = str(e.data)
66
+
67
+ if content:
68
+ # keep prompts bounded
69
+ if len(content) > 500:
70
+ content = content[:500] + "…"
71
+ lines.append(f"[{role}] {content}")
85
72
 
86
73
  transcript = "\n".join(lines)
87
74
 
@@ -97,54 +84,65 @@ class LLMLongTermSummarizer(Distiller):
97
84
  "Return a JSON object with keys: "
98
85
  "`summary` (string), "
99
86
  "`key_facts` (list of strings), "
100
- "`open_loops` (list of strings)."
101
- "Do not use markdown or include explanations or context outside the JSON."
87
+ "`open_loops` (list of strings). "
88
+ "Do not use markdown or include explanations outside the JSON."
102
89
  )
103
90
 
104
- return [
105
- {"role": "system", "content": system},
106
- {"role": "user", "content": user},
107
- ]
91
+ return [{"role": "system", "content": system}, {"role": "user", "content": user}]
108
92
 
109
93
  async def distill(
110
94
  self,
111
95
  run_id: str,
112
96
  timeline_id: str,
113
- scope_id: str = None,
97
+ scope_id: str | None = None,
114
98
  *,
115
99
  hotlog: HotLog,
116
- persistence: Persistence,
117
- indices: Indices,
118
100
  docs: DocStore,
119
101
  **kw: Any,
120
102
  ) -> dict[str, Any]:
121
- # 1) fetch more events than needed, then filter
122
- raw = await hotlog.recent(timeline_id, kinds=None, limit=self.max_events * 2)
103
+ # Over-fetch strategy:
104
+ # - if include_tags is present, filtering can be very selective, so over-fetch more
105
+ # - also pass include_kinds to HotLog to reduce noise
106
+ base_mult = 2
107
+ if self.include_tags:
108
+ base_mult = 8 # safer default for thread/session tags
109
+
110
+ # cap so we don't go crazy (HotLog may cap internally anyway)
111
+ fetch_limit = max(self.max_events * base_mult, 200)
112
+
113
+ raw = await hotlog.recent(
114
+ timeline_id,
115
+ kinds=self.include_kinds, # narrow early when possible
116
+ limit=fetch_limit,
117
+ )
123
118
  kept = self._filter_events(raw)
119
+
124
120
  if not kept:
125
121
  return {}
126
122
 
123
+ # Keep only the most recent max_events (chronological, newest last)
127
124
  kept = kept[-self.max_events :]
125
+
128
126
  first_ts = kept[0].ts
129
127
  last_ts = kept[-1].ts
130
128
 
131
- # 2) Build prompt and call LLM
132
129
  messages = self._build_prompt(kept)
133
130
 
134
- # LLMClientProtocol: assume chat(...) returns (text, usage)
135
- summary_json_str, usage = await self.llm.chat(
136
- messages,
137
- )
131
+ # Respect model override if the client supports it
132
+ try:
133
+ if self.model:
134
+ summary_json_str, usage = await self.llm.chat(messages, model=self.model) # type: ignore[arg-type]
135
+ else:
136
+ summary_json_str, usage = await self.llm.chat(messages)
137
+ except TypeError:
138
+ # Client doesn't accept model=...
139
+ summary_json_str, usage = await self.llm.chat(messages)
138
140
 
139
- # 3) Parse LLM JSON response
140
141
  try:
141
142
  payload = json.loads(summary_json_str)
142
143
  except Exception:
143
- payload = {
144
- "summary": summary_json_str,
145
- "key_facts": [],
146
- "open_loops": [],
147
- }
144
+ payload = {"summary": summary_json_str, "key_facts": [], "open_loops": []}
145
+
148
146
  ts = now_iso()
149
147
 
150
148
  summary_obj = {
@@ -161,74 +159,27 @@ class LLMLongTermSummarizer(Distiller):
161
159
  "key_facts": payload.get("key_facts", []),
162
160
  "open_loops": payload.get("open_loops", []),
163
161
  "llm_usage": usage,
164
- "llm_model": self.llm.model if hasattr(self.llm, "model") else None,
162
+ "llm_model": getattr(self.llm, "model", None),
163
+ "llm_model_override": self.model,
164
+ "include_kinds": self.include_kinds,
165
+ "include_tags": self.include_tags,
166
+ "min_signal": self.min_signal,
167
+ "fetch_limit": fetch_limit,
165
168
  }
166
169
 
167
170
  scope = scope_id or run_id
168
171
  doc_id = _summary_doc_id(scope, self.summary_tag, ts)
169
172
  await docs.put(doc_id, summary_obj)
170
173
 
171
- # 4) Emit summary Event with preview + uri in data
172
174
  text = summary_obj["summary"] or ""
173
175
  preview = text[:2000] + (" …[truncated]" if len(text) > 2000 else "")
174
176
 
175
- evt = Event(
176
- event_id="",
177
- ts=ts,
178
- run_id=run_id,
179
- scope_id=scope,
180
- kind=self.summary_kind,
181
- stage="summary_llm",
182
- text=preview,
183
- tags=["summary", "llm", self.summary_tag],
184
- data={
185
- "summary_doc_id": doc_id,
186
- "summary_tag": self.summary_tag,
187
- "time_window": summary_obj["time_window"],
188
- "num_events": len(kept),
189
- },
190
- metrics={"num_events": len(kept)},
191
- severity=2,
192
- signal=0.7,
193
- )
194
-
195
- evt.event_id = stable_event_id(
196
- {
197
- "ts": ts,
198
- "run_id": run_id,
199
- "kind": self.summary_kind,
200
- "summary_tag": self.summary_tag,
201
- "preview": preview[:200],
202
- }
203
- )
204
-
205
- await hotlog.append(timeline_id, evt, ttl_s=7 * 24 * 3600, limit=1000)
206
- await persistence.append_event(timeline_id, evt)
207
-
208
- # Metering: record summary event
209
- try:
210
- meter = current_metering()
211
- ctx = current_meter_context.get()
212
- user_id = ctx.get("user_id")
213
- org_id = ctx.get("org_id")
214
-
215
- await meter.record_event(
216
- user_id=user_id,
217
- org_id=org_id,
218
- run_id=run_id,
219
- scope_id=scope,
220
- kind=f"memory.{self.summary_kind}", # e.g. "memory.long_term_summary"
221
- )
222
- except Exception:
223
- import logging
224
-
225
- logger = logging.getLogger("aethergraph.services.memory.distillers.llm_long_term")
226
- logger.error("Failed to record metering event for long_term_summary")
227
-
228
177
  return {
229
178
  "summary_doc_id": doc_id,
230
179
  "summary_kind": self.summary_kind,
231
180
  "summary_tag": self.summary_tag,
232
181
  "time_window": summary_obj["time_window"],
233
182
  "num_events": len(kept),
183
+ "preview": preview,
184
+ "ts": ts,
234
185
  }