aethergraph 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__main__.py +3 -0
- aethergraph/api/v1/artifacts.py +23 -4
- aethergraph/api/v1/schemas.py +7 -0
- aethergraph/api/v1/session.py +123 -4
- aethergraph/config/config.py +2 -0
- aethergraph/config/search.py +49 -0
- aethergraph/contracts/services/channel.py +18 -1
- aethergraph/contracts/services/execution.py +58 -0
- aethergraph/contracts/services/llm.py +26 -0
- aethergraph/contracts/services/memory.py +10 -4
- aethergraph/contracts/services/planning.py +53 -0
- aethergraph/contracts/storage/event_log.py +8 -0
- aethergraph/contracts/storage/search_backend.py +47 -0
- aethergraph/contracts/storage/vector_index.py +73 -0
- aethergraph/core/graph/action_spec.py +76 -0
- aethergraph/core/graph/graph_fn.py +75 -2
- aethergraph/core/graph/graphify.py +74 -2
- aethergraph/core/runtime/graph_runner.py +2 -1
- aethergraph/core/runtime/node_context.py +66 -3
- aethergraph/core/runtime/node_services.py +8 -0
- aethergraph/core/runtime/run_manager.py +263 -271
- aethergraph/core/runtime/run_types.py +54 -1
- aethergraph/core/runtime/runtime_env.py +35 -14
- aethergraph/core/runtime/runtime_services.py +308 -18
- aethergraph/plugins/agents/default_chat_agent.py +266 -74
- aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
- aethergraph/plugins/channel/adapters/webui.py +69 -21
- aethergraph/plugins/channel/routes/webui_routes.py +8 -48
- aethergraph/runtime/__init__.py +12 -0
- aethergraph/server/app_factory.py +10 -1
- aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
- aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
- aethergraph/server/ui_static/index.html +2 -2
- aethergraph/services/artifacts/facade.py +157 -21
- aethergraph/services/artifacts/types.py +35 -0
- aethergraph/services/artifacts/utils.py +42 -0
- aethergraph/services/channel/channel_bus.py +3 -1
- aethergraph/services/channel/event_hub copy.py +55 -0
- aethergraph/services/channel/event_hub.py +81 -0
- aethergraph/services/channel/factory.py +3 -2
- aethergraph/services/channel/session.py +709 -74
- aethergraph/services/container/default_container.py +69 -7
- aethergraph/services/execution/__init__.py +0 -0
- aethergraph/services/execution/local_python.py +118 -0
- aethergraph/services/indices/__init__.py +0 -0
- aethergraph/services/indices/global_indices.py +21 -0
- aethergraph/services/indices/scoped_indices.py +292 -0
- aethergraph/services/llm/generic_client.py +342 -46
- aethergraph/services/llm/generic_embed_client.py +359 -0
- aethergraph/services/llm/types.py +3 -1
- aethergraph/services/memory/distillers/llm_long_term.py +60 -109
- aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
- aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
- aethergraph/services/memory/distillers/long_term.py +48 -131
- aethergraph/services/memory/distillers/long_term_v1.py +170 -0
- aethergraph/services/memory/facade/chat.py +18 -8
- aethergraph/services/memory/facade/core.py +159 -19
- aethergraph/services/memory/facade/distillation.py +86 -31
- aethergraph/services/memory/facade/retrieval.py +100 -1
- aethergraph/services/memory/factory.py +4 -1
- aethergraph/services/planning/__init__.py +0 -0
- aethergraph/services/planning/action_catalog.py +271 -0
- aethergraph/services/planning/bindings.py +56 -0
- aethergraph/services/planning/dependency_index.py +65 -0
- aethergraph/services/planning/flow_validator.py +263 -0
- aethergraph/services/planning/graph_io_adapter.py +150 -0
- aethergraph/services/planning/input_parser.py +312 -0
- aethergraph/services/planning/missing_inputs.py +28 -0
- aethergraph/services/planning/node_planner.py +613 -0
- aethergraph/services/planning/orchestrator.py +112 -0
- aethergraph/services/planning/plan_executor.py +506 -0
- aethergraph/services/planning/plan_types.py +321 -0
- aethergraph/services/planning/planner.py +617 -0
- aethergraph/services/planning/planner_service.py +369 -0
- aethergraph/services/planning/planning_context_builder.py +43 -0
- aethergraph/services/planning/quick_actions.py +29 -0
- aethergraph/services/planning/routers/__init__.py +0 -0
- aethergraph/services/planning/routers/simple_router.py +26 -0
- aethergraph/services/rag/facade.py +0 -3
- aethergraph/services/scope/scope.py +30 -30
- aethergraph/services/scope/scope_factory.py +15 -7
- aethergraph/services/skills/__init__.py +0 -0
- aethergraph/services/skills/skill_registry.py +465 -0
- aethergraph/services/skills/skills.py +220 -0
- aethergraph/services/skills/utils.py +194 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
- aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
- aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
- aethergraph/storage/memory/event_persist.py +42 -2
- aethergraph/storage/memory/fs_persist.py +32 -2
- aethergraph/storage/search_backend/__init__.py +0 -0
- aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
- aethergraph/storage/search_backend/null_backend.py +34 -0
- aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
- aethergraph/storage/search_backend/utils.py +31 -0
- aethergraph/storage/search_factory.py +75 -0
- aethergraph/storage/vector_index/faiss_index.py +72 -4
- aethergraph/storage/vector_index/sqlite_index.py +521 -52
- aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
- aethergraph/storage/vector_index/utils.py +22 -0
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +108 -64
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
- aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
- aethergraph/services/eventhub/event_hub.py +0 -76
- aethergraph/services/llm/generic_client copy.py +0 -691
- aethergraph/services/prompts/file_store.py +0 -41
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
# aethergraph/services/llm/embedding_client.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
import os
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from aethergraph.contracts.services.llm import EmbeddingClientProtocol
|
|
13
|
+
from aethergraph.services.llm.generic_client import _Retry
|
|
14
|
+
from aethergraph.services.metering.eventlog_metering import MeteringService
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class GenericEmbeddingClient(EmbeddingClientProtocol):
|
|
19
|
+
"""
|
|
20
|
+
Provider-agnostic embedding client.
|
|
21
|
+
|
|
22
|
+
provider: one of {"openai","azure","anthropic","google","openrouter","lmstudio","ollama","dummy"}
|
|
23
|
+
|
|
24
|
+
Configuration (env defaults, but can be passed directly):
|
|
25
|
+
|
|
26
|
+
- OPENAI_API_KEY / OPENAI_BASE_URL
|
|
27
|
+
- AZURE_OPENAI_KEY / AZURE_OPENAI_ENDPOINT / AZURE_OPENAI_DEPLOYMENT
|
|
28
|
+
- ANTHROPIC_API_KEY
|
|
29
|
+
- GOOGLE_API_KEY
|
|
30
|
+
- OPENROUTER_API_KEY
|
|
31
|
+
- LMSTUDIO_BASE_URL (default http://localhost:1234/v1)
|
|
32
|
+
- OLLAMA_BASE_URL (default http://localhost:11434/v1)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
provider: str | None = None
|
|
36
|
+
model: str | None = None
|
|
37
|
+
base_url: str | None = None
|
|
38
|
+
api_key: str | None = None
|
|
39
|
+
azure_deployment: str | None = None
|
|
40
|
+
timeout: float = 60.0
|
|
41
|
+
|
|
42
|
+
# metering (optional, can be None)
|
|
43
|
+
metering: MeteringService | None = None
|
|
44
|
+
|
|
45
|
+
def __post_init__(self) -> None:
|
|
46
|
+
self.provider = (
|
|
47
|
+
self.provider or os.getenv("EMBED_PROVIDER") or os.getenv("LLM_PROVIDER") or "openai"
|
|
48
|
+
).lower() # type: ignore[assignment]
|
|
49
|
+
self.model = (
|
|
50
|
+
self.model
|
|
51
|
+
or os.getenv("EMBED_MODEL")
|
|
52
|
+
or os.getenv("LLM_EMBED_MODEL")
|
|
53
|
+
or "text-embedding-3-small"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Pick an API key from provider-specific envs (or explicit api_key)
|
|
57
|
+
if self.api_key is None:
|
|
58
|
+
self.api_key = (
|
|
59
|
+
os.getenv("OPENAI_API_KEY")
|
|
60
|
+
or os.getenv("AZURE_OPENAI_KEY")
|
|
61
|
+
or os.getenv("ANTHROPIC_API_KEY")
|
|
62
|
+
or os.getenv("GOOGLE_API_KEY")
|
|
63
|
+
or os.getenv("OPENROUTER_API_KEY")
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Base URL defaults per provider
|
|
67
|
+
if self.base_url is None:
|
|
68
|
+
self.base_url = {
|
|
69
|
+
"openai": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
|
|
70
|
+
"azure": os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/"),
|
|
71
|
+
"anthropic": "https://api.anthropic.com",
|
|
72
|
+
"google": "https://generativelanguage.googleapis.com",
|
|
73
|
+
"openrouter": "https://openrouter.ai/api/v1",
|
|
74
|
+
"lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
|
|
75
|
+
"ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
|
|
76
|
+
"dummy": "http://localhost:8745", # for tests
|
|
77
|
+
}[self.provider]
|
|
78
|
+
|
|
79
|
+
# Azure deployment (for /deployments/{name}/embeddings)
|
|
80
|
+
if self.provider == "azure" and self.azure_deployment is None:
|
|
81
|
+
self.azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
|
82
|
+
|
|
83
|
+
self._retry = _Retry()
|
|
84
|
+
self._client: httpx.AsyncClient | None = None
|
|
85
|
+
|
|
86
|
+
# ------------ client management -----------------
|
|
87
|
+
|
|
88
|
+
async def _ensure_client(self) -> None:
|
|
89
|
+
"""
|
|
90
|
+
Ensure we have an httpx.AsyncClient bound to the *current* event loop.
|
|
91
|
+
|
|
92
|
+
IMPORTANT: We do NOT try to aclose() a client created on a different loop,
|
|
93
|
+
because httpx/anyio expects it to be closed on the same loop it was created on.
|
|
94
|
+
"""
|
|
95
|
+
loop = asyncio.get_running_loop()
|
|
96
|
+
|
|
97
|
+
if self._client is None:
|
|
98
|
+
# first-time init
|
|
99
|
+
self._client = httpx.AsyncClient(timeout=self.timeout)
|
|
100
|
+
self._bound_loop = loop
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
if self._bound_loop is not loop:
|
|
104
|
+
# We're now in a different loop -> do not reuse the old client.
|
|
105
|
+
# We also do NOT call aclose() here, because that tends to explode
|
|
106
|
+
# if the old loop is already closed.
|
|
107
|
+
self._client = httpx.AsyncClient(timeout=self.timeout)
|
|
108
|
+
self._bound_loop = loop
|
|
109
|
+
|
|
110
|
+
# ------------ public API ------------------------
|
|
111
|
+
|
|
112
|
+
async def embed(
|
|
113
|
+
self,
|
|
114
|
+
texts: Sequence[str],
|
|
115
|
+
*,
|
|
116
|
+
model: str | None = None,
|
|
117
|
+
**kw: Any,
|
|
118
|
+
) -> list[list[float]]:
|
|
119
|
+
"""
|
|
120
|
+
Provider-agnostic batch embedding.
|
|
121
|
+
"""
|
|
122
|
+
await self._ensure_client()
|
|
123
|
+
assert self._client is not None
|
|
124
|
+
|
|
125
|
+
if not isinstance(texts, Sequence) or any(not isinstance(t, str) for t in texts):
|
|
126
|
+
raise TypeError("embed(texts) expects Sequence[str]")
|
|
127
|
+
if len(texts) == 0:
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
# Resolve model (override > configured)
|
|
131
|
+
model = model or self.model or "text-embedding-3-small"
|
|
132
|
+
|
|
133
|
+
# Dispatch by provider
|
|
134
|
+
if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
|
|
135
|
+
embs = await self._embed_openai_like(texts, model=model, **kw)
|
|
136
|
+
elif self.provider == "azure":
|
|
137
|
+
embs = await self._embed_azure(texts, model=model, **kw)
|
|
138
|
+
elif self.provider == "google":
|
|
139
|
+
embs = await self._embed_google(texts, model=model, **kw)
|
|
140
|
+
elif self.provider == "anthropic":
|
|
141
|
+
raise NotImplementedError("Embeddings not supported for anthropic")
|
|
142
|
+
elif self.provider == "dummy":
|
|
143
|
+
embs = await self._embed_dummy(texts, model=model, **kw)
|
|
144
|
+
else: # pragma: no cover
|
|
145
|
+
raise NotImplementedError(f"Unknown embedding provider: {self.provider}")
|
|
146
|
+
|
|
147
|
+
# ---- metering hook (placeholder) ----
|
|
148
|
+
if self.metering is not None:
|
|
149
|
+
try:
|
|
150
|
+
# TODO: compute token estimates or bytes; for now just count inputs
|
|
151
|
+
await self.metering.record_embedding(
|
|
152
|
+
provider=self.provider,
|
|
153
|
+
model=model,
|
|
154
|
+
num_texts=len(texts),
|
|
155
|
+
# tokens=estimated_tokens,
|
|
156
|
+
)
|
|
157
|
+
except Exception:
|
|
158
|
+
# best-effort; never break main path
|
|
159
|
+
import logging
|
|
160
|
+
|
|
161
|
+
logger = logging.getLogger(__name__)
|
|
162
|
+
logger.exception("Error recording embedding metering")
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
return embs
|
|
166
|
+
|
|
167
|
+
async def embed_one(
|
|
168
|
+
self,
|
|
169
|
+
text: str,
|
|
170
|
+
*,
|
|
171
|
+
model: str | None = None,
|
|
172
|
+
**kw: Any,
|
|
173
|
+
) -> list[float]:
|
|
174
|
+
res = await self.embed([text], model=model, **kw)
|
|
175
|
+
return res[0]
|
|
176
|
+
|
|
177
|
+
# ------------ provider-specific helpers ------------------------
|
|
178
|
+
|
|
179
|
+
def _headers_openai_like(self) -> dict[str, str]:
|
|
180
|
+
headers = {"Content-Type": "application/json"}
|
|
181
|
+
if self.api_key:
|
|
182
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
183
|
+
return headers
|
|
184
|
+
|
|
185
|
+
async def _embed_openai_like(
|
|
186
|
+
self,
|
|
187
|
+
texts: Sequence[str],
|
|
188
|
+
*,
|
|
189
|
+
model: str,
|
|
190
|
+
**kw: Any,
|
|
191
|
+
) -> list[list[float]]:
|
|
192
|
+
assert self._client is not None
|
|
193
|
+
url = f"{self.base_url}/embeddings"
|
|
194
|
+
headers = self._headers_openai_like()
|
|
195
|
+
extra_body: dict[str, Any] = kw.get("extra_body") or {}
|
|
196
|
+
|
|
197
|
+
body: dict[str, Any] = {
|
|
198
|
+
"model": model,
|
|
199
|
+
"input": list(texts),
|
|
200
|
+
}
|
|
201
|
+
body.update(extra_body)
|
|
202
|
+
|
|
203
|
+
def parse(data: dict[str, Any]) -> list[list[float]]:
|
|
204
|
+
items = data.get("data", []) or []
|
|
205
|
+
embs = [d.get("embedding") for d in items]
|
|
206
|
+
if len(embs) != len(texts) or any(e is None for e in embs):
|
|
207
|
+
raise RuntimeError(
|
|
208
|
+
f"Embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
|
|
209
|
+
)
|
|
210
|
+
return embs # type: ignore[return-value]
|
|
211
|
+
|
|
212
|
+
async def _call():
|
|
213
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
214
|
+
try:
|
|
215
|
+
r.raise_for_status()
|
|
216
|
+
except httpx.HTTPStatusError as e:
|
|
217
|
+
raise RuntimeError(
|
|
218
|
+
f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
|
|
219
|
+
) from e
|
|
220
|
+
return parse(r.json())
|
|
221
|
+
|
|
222
|
+
return await self._retry.run(_call)
|
|
223
|
+
|
|
224
|
+
async def _embed_azure(
|
|
225
|
+
self,
|
|
226
|
+
texts: Sequence[str],
|
|
227
|
+
*,
|
|
228
|
+
model: str,
|
|
229
|
+
**kw: Any,
|
|
230
|
+
) -> list[list[float]]:
|
|
231
|
+
if not self.azure_deployment:
|
|
232
|
+
raise RuntimeError(
|
|
233
|
+
"Azure embeddings requires AZURE_OPENAI_DEPLOYMENT (azure_deployment)"
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
assert self._client is not None
|
|
237
|
+
|
|
238
|
+
azure_api_version = kw.get("azure_api_version") or "2024-08-01-preview"
|
|
239
|
+
extra_body: dict[str, Any] = kw.get("extra_body") or {}
|
|
240
|
+
|
|
241
|
+
url = (
|
|
242
|
+
f"{self.base_url}/openai/deployments/"
|
|
243
|
+
f"{self.azure_deployment}/embeddings?api-version={azure_api_version}"
|
|
244
|
+
)
|
|
245
|
+
headers = {"api-key": self.api_key or "", "Content-Type": "application/json"}
|
|
246
|
+
body: dict[str, Any] = {"input": list(texts)}
|
|
247
|
+
# Some Azure flavors accept model/dimensions; keep flexible
|
|
248
|
+
if model:
|
|
249
|
+
body["model"] = model
|
|
250
|
+
body.update(extra_body)
|
|
251
|
+
|
|
252
|
+
def parse(data: dict[str, Any]) -> list[list[float]]:
|
|
253
|
+
items = data.get("data", []) or []
|
|
254
|
+
embs = [d.get("embedding") for d in items]
|
|
255
|
+
if len(embs) != len(texts) or any(e is None for e in embs):
|
|
256
|
+
raise RuntimeError(
|
|
257
|
+
f"Azure embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
|
|
258
|
+
)
|
|
259
|
+
return embs # type: ignore[return-value]
|
|
260
|
+
|
|
261
|
+
async def _call():
|
|
262
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
263
|
+
try:
|
|
264
|
+
r.raise_for_status()
|
|
265
|
+
except httpx.HTTPStatusError as e:
|
|
266
|
+
raise RuntimeError(
|
|
267
|
+
f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
|
|
268
|
+
) from e
|
|
269
|
+
return parse(r.json())
|
|
270
|
+
|
|
271
|
+
return await self._retry.run(_call)
|
|
272
|
+
|
|
273
|
+
async def _embed_google(
|
|
274
|
+
self,
|
|
275
|
+
texts: Sequence[str],
|
|
276
|
+
*,
|
|
277
|
+
model: str,
|
|
278
|
+
**kw: Any,
|
|
279
|
+
) -> list[list[float]]:
|
|
280
|
+
assert self._client is not None
|
|
281
|
+
base = self.base_url.rstrip("/")
|
|
282
|
+
api_key = self.api_key or os.getenv("GOOGLE_API_KEY") or ""
|
|
283
|
+
headers = {"Content-Type": "application/json"}
|
|
284
|
+
|
|
285
|
+
# v1 and v1beta endpoints
|
|
286
|
+
batch_url_v1 = f"{base}/v1/models/{model}:batchEmbedContents?key={api_key}"
|
|
287
|
+
embed_url_v1 = f"{base}/v1/models/{model}:embedContent?key={api_key}"
|
|
288
|
+
batch_url_v1beta = f"{base}/v1beta/models/{model}:batchEmbedContents?key={api_key}"
|
|
289
|
+
embed_url_v1beta = f"{base}/v1beta/models/{model}:embedContent?key={api_key}"
|
|
290
|
+
|
|
291
|
+
def parse_single(data: dict[str, Any]) -> list[float]:
|
|
292
|
+
return (data.get("embedding") or {}).get("values") or []
|
|
293
|
+
|
|
294
|
+
def parse_batch(data: dict[str, Any]) -> list[list[float]]:
|
|
295
|
+
embs: list[list[float]] = []
|
|
296
|
+
for e in data.get("embeddings") or []:
|
|
297
|
+
embs.append((e or {}).get("values") or [])
|
|
298
|
+
if len(embs) != len(texts):
|
|
299
|
+
raise RuntimeError(
|
|
300
|
+
f"Gemini batch embeddings mismatch: got {len(embs)} for {len(texts)}"
|
|
301
|
+
)
|
|
302
|
+
return embs
|
|
303
|
+
|
|
304
|
+
async def try_batch(url: str) -> list[list[float]] | None:
|
|
305
|
+
body = {"requests": [{"content": {"parts": [{"text": t}]}} for t in texts]}
|
|
306
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
307
|
+
if r.status_code in (400, 404):
|
|
308
|
+
return None
|
|
309
|
+
try:
|
|
310
|
+
r.raise_for_status()
|
|
311
|
+
except httpx.HTTPStatusError as e:
|
|
312
|
+
raise RuntimeError(
|
|
313
|
+
f"Gemini batchEmbedContents failed ({e.response.status_code}): {e.response.text}"
|
|
314
|
+
) from e
|
|
315
|
+
return parse_batch(r.json())
|
|
316
|
+
|
|
317
|
+
async def call_single(url: str) -> list[list[float]]:
|
|
318
|
+
out: list[list[float]] = []
|
|
319
|
+
for t in texts:
|
|
320
|
+
r = await self._client.post(
|
|
321
|
+
url, headers=headers, json={"content": {"parts": [{"text": t}]}}
|
|
322
|
+
)
|
|
323
|
+
try:
|
|
324
|
+
r.raise_for_status()
|
|
325
|
+
except httpx.HTTPStatusError as e:
|
|
326
|
+
raise RuntimeError(
|
|
327
|
+
f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
|
|
328
|
+
) from e
|
|
329
|
+
out.append(parse_single(r.json()))
|
|
330
|
+
if len(out) != len(texts):
|
|
331
|
+
raise RuntimeError(f"Gemini embeddings mismatch: got {len(out)} for {len(texts)}")
|
|
332
|
+
return out
|
|
333
|
+
|
|
334
|
+
async def _call():
|
|
335
|
+
# try v1 batch, then v1beta batch, then single
|
|
336
|
+
res = await try_batch(batch_url_v1)
|
|
337
|
+
if res is not None:
|
|
338
|
+
return res
|
|
339
|
+
res = await try_batch(batch_url_v1beta)
|
|
340
|
+
if res is not None:
|
|
341
|
+
return res
|
|
342
|
+
try:
|
|
343
|
+
return await call_single(embed_url_v1)
|
|
344
|
+
except RuntimeError:
|
|
345
|
+
return await call_single(embed_url_v1beta)
|
|
346
|
+
|
|
347
|
+
return await self._retry.run(_call)
|
|
348
|
+
|
|
349
|
+
async def _embed_dummy(
|
|
350
|
+
self,
|
|
351
|
+
texts: Sequence[str],
|
|
352
|
+
*,
|
|
353
|
+
model: str,
|
|
354
|
+
**kw: Any,
|
|
355
|
+
) -> list[list[float]]:
|
|
356
|
+
"""
|
|
357
|
+
Dummy provider for tests: returns [len(text)] as a 1D "embedding".
|
|
358
|
+
"""
|
|
359
|
+
return [[float(len(t))] for t in texts]
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from typing import Any, Literal
|
|
3
3
|
|
|
4
|
-
ChatOutputFormat = Literal[
|
|
4
|
+
ChatOutputFormat = Literal[
|
|
5
|
+
"text", "json_object", "json_schema", "raw"
|
|
6
|
+
] # text: plain text; json_object: dict; json_schema: validate against schema; json_schema: validate against schema; raw: provider-specific raw response
|
|
5
7
|
|
|
6
8
|
ImageFormat = Literal["png", "jpeg", "webp"]
|
|
7
9
|
ImageResponseFormat = Literal["b64_json", "url"] # url only for dall-e models typically
|
|
@@ -5,32 +5,13 @@ import json
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
from aethergraph.contracts.services.llm import LLMClientProtocol
|
|
8
|
-
from aethergraph.contracts.services.memory import Distiller, Event, HotLog
|
|
8
|
+
from aethergraph.contracts.services.memory import Distiller, Event, HotLog
|
|
9
9
|
from aethergraph.contracts.storage.doc_store import DocStore
|
|
10
|
-
|
|
11
|
-
# metering
|
|
12
|
-
from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
|
|
13
|
-
from aethergraph.services.memory.facade.utils import now_iso, stable_event_id
|
|
10
|
+
from aethergraph.services.memory.facade.utils import now_iso
|
|
14
11
|
from aethergraph.services.memory.utils import _summary_doc_id
|
|
15
12
|
|
|
16
13
|
|
|
17
14
|
class LLMLongTermSummarizer(Distiller):
|
|
18
|
-
"""
|
|
19
|
-
LLM-based long-term summarizer.
|
|
20
|
-
|
|
21
|
-
Flow:
|
|
22
|
-
1) Pull recent events from HotLog.
|
|
23
|
-
2) Filter by kind/tag/signal.
|
|
24
|
-
3) Build a prompt that shows the most important events as a transcript.
|
|
25
|
-
4) Call LLM to generate a structured summary.
|
|
26
|
-
5) Save summary JSON via Persistence.save_json(uri).
|
|
27
|
-
6) Emit a long_term_summary Event pointing to summary_uri.
|
|
28
|
-
|
|
29
|
-
This is complementary to RAG:
|
|
30
|
-
- LLM distiller compresses sequences into a digest.
|
|
31
|
-
- RAG uses many such digests + raw docs for retrieval.
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
15
|
def __init__(
|
|
35
16
|
self,
|
|
36
17
|
*,
|
|
@@ -50,7 +31,7 @@ class LLMLongTermSummarizer(Distiller):
|
|
|
50
31
|
self.include_tags = include_tags
|
|
51
32
|
self.max_events = max_events
|
|
52
33
|
self.min_signal = min_signal
|
|
53
|
-
self.model = model
|
|
34
|
+
self.model = model
|
|
54
35
|
|
|
55
36
|
def _filter_events(self, events: Iterable[Event]) -> list[Event]:
|
|
56
37
|
out: list[Event] = []
|
|
@@ -61,9 +42,8 @@ class LLMLongTermSummarizer(Distiller):
|
|
|
61
42
|
if kinds is not None and e.kind not in kinds:
|
|
62
43
|
continue
|
|
63
44
|
if tags is not None:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
if not tags.issubset(set(e.tags)):
|
|
45
|
+
et = set(e.tags or [])
|
|
46
|
+
if not tags.issubset(et): # AND semantics
|
|
67
47
|
continue
|
|
68
48
|
if (e.signal or 0.0) < self.min_signal:
|
|
69
49
|
continue
|
|
@@ -71,17 +51,24 @@ class LLMLongTermSummarizer(Distiller):
|
|
|
71
51
|
return out
|
|
72
52
|
|
|
73
53
|
def _build_prompt(self, events: list[Event]) -> list[dict[str, str]]:
|
|
74
|
-
"""
|
|
75
|
-
Convert events into a chat-style context for summarization.
|
|
76
|
-
|
|
77
|
-
We keep it model-agnostic: a list of {role, content} messages.
|
|
78
|
-
"""
|
|
79
54
|
lines: list[str] = []
|
|
80
55
|
|
|
81
56
|
for e in events:
|
|
82
57
|
role = e.stage or e.kind or "event"
|
|
83
|
-
|
|
84
|
-
|
|
58
|
+
|
|
59
|
+
# Prefer text, but fall back to compact JSON of data when needed
|
|
60
|
+
content = (e.text or "").strip()
|
|
61
|
+
if not content and getattr(e, "data", None) is not None:
|
|
62
|
+
try:
|
|
63
|
+
content = json.dumps(e.data, ensure_ascii=False)
|
|
64
|
+
except Exception:
|
|
65
|
+
content = str(e.data)
|
|
66
|
+
|
|
67
|
+
if content:
|
|
68
|
+
# keep prompts bounded
|
|
69
|
+
if len(content) > 500:
|
|
70
|
+
content = content[:500] + "…"
|
|
71
|
+
lines.append(f"[{role}] {content}")
|
|
85
72
|
|
|
86
73
|
transcript = "\n".join(lines)
|
|
87
74
|
|
|
@@ -97,54 +84,65 @@ class LLMLongTermSummarizer(Distiller):
|
|
|
97
84
|
"Return a JSON object with keys: "
|
|
98
85
|
"`summary` (string), "
|
|
99
86
|
"`key_facts` (list of strings), "
|
|
100
|
-
"`open_loops` (list of strings)."
|
|
101
|
-
"Do not use markdown or include explanations
|
|
87
|
+
"`open_loops` (list of strings). "
|
|
88
|
+
"Do not use markdown or include explanations outside the JSON."
|
|
102
89
|
)
|
|
103
90
|
|
|
104
|
-
return [
|
|
105
|
-
{"role": "system", "content": system},
|
|
106
|
-
{"role": "user", "content": user},
|
|
107
|
-
]
|
|
91
|
+
return [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
|
108
92
|
|
|
109
93
|
async def distill(
|
|
110
94
|
self,
|
|
111
95
|
run_id: str,
|
|
112
96
|
timeline_id: str,
|
|
113
|
-
scope_id: str = None,
|
|
97
|
+
scope_id: str | None = None,
|
|
114
98
|
*,
|
|
115
99
|
hotlog: HotLog,
|
|
116
|
-
persistence: Persistence,
|
|
117
|
-
indices: Indices,
|
|
118
100
|
docs: DocStore,
|
|
119
101
|
**kw: Any,
|
|
120
102
|
) -> dict[str, Any]:
|
|
121
|
-
#
|
|
122
|
-
|
|
103
|
+
# Over-fetch strategy:
|
|
104
|
+
# - if include_tags is present, filtering can be very selective, so over-fetch more
|
|
105
|
+
# - also pass include_kinds to HotLog to reduce noise
|
|
106
|
+
base_mult = 2
|
|
107
|
+
if self.include_tags:
|
|
108
|
+
base_mult = 8 # safer default for thread/session tags
|
|
109
|
+
|
|
110
|
+
# cap so we don't go crazy (HotLog may cap internally anyway)
|
|
111
|
+
fetch_limit = max(self.max_events * base_mult, 200)
|
|
112
|
+
|
|
113
|
+
raw = await hotlog.recent(
|
|
114
|
+
timeline_id,
|
|
115
|
+
kinds=self.include_kinds, # narrow early when possible
|
|
116
|
+
limit=fetch_limit,
|
|
117
|
+
)
|
|
123
118
|
kept = self._filter_events(raw)
|
|
119
|
+
|
|
124
120
|
if not kept:
|
|
125
121
|
return {}
|
|
126
122
|
|
|
123
|
+
# Keep only the most recent max_events (chronological, newest last)
|
|
127
124
|
kept = kept[-self.max_events :]
|
|
125
|
+
|
|
128
126
|
first_ts = kept[0].ts
|
|
129
127
|
last_ts = kept[-1].ts
|
|
130
128
|
|
|
131
|
-
# 2) Build prompt and call LLM
|
|
132
129
|
messages = self._build_prompt(kept)
|
|
133
130
|
|
|
134
|
-
#
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
131
|
+
# Respect model override if the client supports it
|
|
132
|
+
try:
|
|
133
|
+
if self.model:
|
|
134
|
+
summary_json_str, usage = await self.llm.chat(messages, model=self.model) # type: ignore[arg-type]
|
|
135
|
+
else:
|
|
136
|
+
summary_json_str, usage = await self.llm.chat(messages)
|
|
137
|
+
except TypeError:
|
|
138
|
+
# Client doesn't accept model=...
|
|
139
|
+
summary_json_str, usage = await self.llm.chat(messages)
|
|
138
140
|
|
|
139
|
-
# 3) Parse LLM JSON response
|
|
140
141
|
try:
|
|
141
142
|
payload = json.loads(summary_json_str)
|
|
142
143
|
except Exception:
|
|
143
|
-
payload = {
|
|
144
|
-
|
|
145
|
-
"key_facts": [],
|
|
146
|
-
"open_loops": [],
|
|
147
|
-
}
|
|
144
|
+
payload = {"summary": summary_json_str, "key_facts": [], "open_loops": []}
|
|
145
|
+
|
|
148
146
|
ts = now_iso()
|
|
149
147
|
|
|
150
148
|
summary_obj = {
|
|
@@ -161,74 +159,27 @@ class LLMLongTermSummarizer(Distiller):
|
|
|
161
159
|
"key_facts": payload.get("key_facts", []),
|
|
162
160
|
"open_loops": payload.get("open_loops", []),
|
|
163
161
|
"llm_usage": usage,
|
|
164
|
-
"llm_model":
|
|
162
|
+
"llm_model": getattr(self.llm, "model", None),
|
|
163
|
+
"llm_model_override": self.model,
|
|
164
|
+
"include_kinds": self.include_kinds,
|
|
165
|
+
"include_tags": self.include_tags,
|
|
166
|
+
"min_signal": self.min_signal,
|
|
167
|
+
"fetch_limit": fetch_limit,
|
|
165
168
|
}
|
|
166
169
|
|
|
167
170
|
scope = scope_id or run_id
|
|
168
171
|
doc_id = _summary_doc_id(scope, self.summary_tag, ts)
|
|
169
172
|
await docs.put(doc_id, summary_obj)
|
|
170
173
|
|
|
171
|
-
# 4) Emit summary Event with preview + uri in data
|
|
172
174
|
text = summary_obj["summary"] or ""
|
|
173
175
|
preview = text[:2000] + (" …[truncated]" if len(text) > 2000 else "")
|
|
174
176
|
|
|
175
|
-
evt = Event(
|
|
176
|
-
event_id="",
|
|
177
|
-
ts=ts,
|
|
178
|
-
run_id=run_id,
|
|
179
|
-
scope_id=scope,
|
|
180
|
-
kind=self.summary_kind,
|
|
181
|
-
stage="summary_llm",
|
|
182
|
-
text=preview,
|
|
183
|
-
tags=["summary", "llm", self.summary_tag],
|
|
184
|
-
data={
|
|
185
|
-
"summary_doc_id": doc_id,
|
|
186
|
-
"summary_tag": self.summary_tag,
|
|
187
|
-
"time_window": summary_obj["time_window"],
|
|
188
|
-
"num_events": len(kept),
|
|
189
|
-
},
|
|
190
|
-
metrics={"num_events": len(kept)},
|
|
191
|
-
severity=2,
|
|
192
|
-
signal=0.7,
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
evt.event_id = stable_event_id(
|
|
196
|
-
{
|
|
197
|
-
"ts": ts,
|
|
198
|
-
"run_id": run_id,
|
|
199
|
-
"kind": self.summary_kind,
|
|
200
|
-
"summary_tag": self.summary_tag,
|
|
201
|
-
"preview": preview[:200],
|
|
202
|
-
}
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
await hotlog.append(timeline_id, evt, ttl_s=7 * 24 * 3600, limit=1000)
|
|
206
|
-
await persistence.append_event(timeline_id, evt)
|
|
207
|
-
|
|
208
|
-
# Metering: record summary event
|
|
209
|
-
try:
|
|
210
|
-
meter = current_metering()
|
|
211
|
-
ctx = current_meter_context.get()
|
|
212
|
-
user_id = ctx.get("user_id")
|
|
213
|
-
org_id = ctx.get("org_id")
|
|
214
|
-
|
|
215
|
-
await meter.record_event(
|
|
216
|
-
user_id=user_id,
|
|
217
|
-
org_id=org_id,
|
|
218
|
-
run_id=run_id,
|
|
219
|
-
scope_id=scope,
|
|
220
|
-
kind=f"memory.{self.summary_kind}", # e.g. "memory.long_term_summary"
|
|
221
|
-
)
|
|
222
|
-
except Exception:
|
|
223
|
-
import logging
|
|
224
|
-
|
|
225
|
-
logger = logging.getLogger("aethergraph.services.memory.distillers.llm_long_term")
|
|
226
|
-
logger.error("Failed to record metering event for long_term_summary")
|
|
227
|
-
|
|
228
177
|
return {
|
|
229
178
|
"summary_doc_id": doc_id,
|
|
230
179
|
"summary_kind": self.summary_kind,
|
|
231
180
|
"summary_tag": self.summary_tag,
|
|
232
181
|
"time_window": summary_obj["time_window"],
|
|
233
182
|
"num_events": len(kept),
|
|
183
|
+
"preview": preview,
|
|
184
|
+
"ts": ts,
|
|
234
185
|
}
|