PyPI - aethergraph - Versions diffs - 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

aethergraph 0.1.0a3py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

aethergraph/api/v1/artifacts.py +23 -4
aethergraph/api/v1/schemas.py +7 -0
aethergraph/api/v1/session.py +123 -4
aethergraph/config/config.py +2 -0
aethergraph/config/search.py +49 -0
aethergraph/contracts/services/channel.py +18 -1
aethergraph/contracts/services/execution.py +58 -0
aethergraph/contracts/services/llm.py +26 -0
aethergraph/contracts/services/memory.py +10 -4
aethergraph/contracts/services/planning.py +53 -0
aethergraph/contracts/storage/event_log.py +8 -0
aethergraph/contracts/storage/search_backend.py +47 -0
aethergraph/contracts/storage/vector_index.py +73 -0
aethergraph/core/graph/action_spec.py +76 -0
aethergraph/core/graph/graph_fn.py +75 -2
aethergraph/core/graph/graphify.py +74 -2
aethergraph/core/runtime/graph_runner.py +2 -1
aethergraph/core/runtime/node_context.py +66 -3
aethergraph/core/runtime/node_services.py +8 -0
aethergraph/core/runtime/run_manager.py +263 -271
aethergraph/core/runtime/run_types.py +54 -1
aethergraph/core/runtime/runtime_env.py +35 -14
aethergraph/core/runtime/runtime_services.py +308 -18
aethergraph/plugins/agents/default_chat_agent.py +266 -74
aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
aethergraph/plugins/channel/adapters/webui.py +69 -21
aethergraph/plugins/channel/routes/webui_routes.py +8 -48
aethergraph/runtime/__init__.py +12 -0
aethergraph/server/app_factory.py +3 -0
aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
aethergraph/server/ui_static/index.html +2 -2
aethergraph/services/artifacts/facade.py +157 -21
aethergraph/services/artifacts/types.py +35 -0
aethergraph/services/artifacts/utils.py +42 -0
aethergraph/services/channel/channel_bus.py +3 -1
aethergraph/services/channel/event_hub copy.py +55 -0
aethergraph/services/channel/event_hub.py +81 -0
aethergraph/services/channel/factory.py +3 -2
aethergraph/services/channel/session.py +709 -74
aethergraph/services/container/default_container.py +69 -7
aethergraph/services/execution/__init__.py +0 -0
aethergraph/services/execution/local_python.py +118 -0
aethergraph/services/indices/__init__.py +0 -0
aethergraph/services/indices/global_indices.py +21 -0
aethergraph/services/indices/scoped_indices.py +292 -0
aethergraph/services/llm/generic_client.py +342 -46
aethergraph/services/llm/generic_embed_client.py +359 -0
aethergraph/services/llm/types.py +3 -1
aethergraph/services/memory/distillers/llm_long_term.py +60 -109
aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
aethergraph/services/memory/distillers/long_term.py +48 -131
aethergraph/services/memory/distillers/long_term_v1.py +170 -0
aethergraph/services/memory/facade/chat.py +18 -8
aethergraph/services/memory/facade/core.py +159 -19
aethergraph/services/memory/facade/distillation.py +86 -31
aethergraph/services/memory/facade/retrieval.py +100 -1
aethergraph/services/memory/factory.py +4 -1
aethergraph/services/planning/__init__.py +0 -0
aethergraph/services/planning/action_catalog.py +271 -0
aethergraph/services/planning/bindings.py +56 -0
aethergraph/services/planning/dependency_index.py +65 -0
aethergraph/services/planning/flow_validator.py +263 -0
aethergraph/services/planning/graph_io_adapter.py +150 -0
aethergraph/services/planning/input_parser.py +312 -0
aethergraph/services/planning/missing_inputs.py +28 -0
aethergraph/services/planning/node_planner.py +613 -0
aethergraph/services/planning/orchestrator.py +112 -0
aethergraph/services/planning/plan_executor.py +506 -0
aethergraph/services/planning/plan_types.py +321 -0
aethergraph/services/planning/planner.py +617 -0
aethergraph/services/planning/planner_service.py +369 -0
aethergraph/services/planning/planning_context_builder.py +43 -0
aethergraph/services/planning/quick_actions.py +29 -0
aethergraph/services/planning/routers/__init__.py +0 -0
aethergraph/services/planning/routers/simple_router.py +26 -0
aethergraph/services/rag/facade.py +0 -3
aethergraph/services/scope/scope.py +30 -30
aethergraph/services/scope/scope_factory.py +15 -7
aethergraph/services/skills/__init__.py +0 -0
aethergraph/services/skills/skill_registry.py +465 -0
aethergraph/services/skills/skills.py +220 -0
aethergraph/services/skills/utils.py +194 -0
aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
aethergraph/storage/memory/event_persist.py +42 -2
aethergraph/storage/memory/fs_persist.py +32 -2
aethergraph/storage/search_backend/__init__.py +0 -0
aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
aethergraph/storage/search_backend/null_backend.py +34 -0
aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
aethergraph/storage/search_backend/utils.py +31 -0
aethergraph/storage/search_factory.py +75 -0
aethergraph/storage/vector_index/faiss_index.py +72 -4
aethergraph/storage/vector_index/sqlite_index.py +521 -52
aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
aethergraph/storage/vector_index/utils.py +22 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +107 -63
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
aethergraph/services/eventhub/event_hub.py +0 -76
aethergraph/services/llm/generic_client copy.py +0 -691
aethergraph/services/prompts/file_store.py +0 -41
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0

aethergraph/services/llm/generic_embed_client.py ADDED Viewed

@@ -0,0 +1,359 @@
+# aethergraph/services/llm/embedding_client.py
+from __future__ import annotations
+import asyncio
+from collections.abc import Sequence
+from dataclasses import dataclass
+import os
+from typing import Any
+import httpx
+from aethergraph.contracts.services.llm import EmbeddingClientProtocol
+from aethergraph.services.llm.generic_client import _Retry
+from aethergraph.services.metering.eventlog_metering import MeteringService
+@dataclass
+class GenericEmbeddingClient(EmbeddingClientProtocol):
+    """
+    Provider-agnostic embedding client.
+    provider: one of {"openai","azure","anthropic","google","openrouter","lmstudio","ollama","dummy"}
+    Configuration (env defaults, but can be passed directly):
+    - OPENAI_API_KEY / OPENAI_BASE_URL
+    - AZURE_OPENAI_KEY / AZURE_OPENAI_ENDPOINT / AZURE_OPENAI_DEPLOYMENT
+    - ANTHROPIC_API_KEY
+    - GOOGLE_API_KEY
+    - OPENROUTER_API_KEY
+    - LMSTUDIO_BASE_URL (default http://localhost:1234/v1)
+    - OLLAMA_BASE_URL   (default http://localhost:11434/v1)
+    """
+    provider: str | None = None
+    model: str | None = None
+    base_url: str | None = None
+    api_key: str | None = None
+    azure_deployment: str | None = None
+    timeout: float = 60.0
+    # metering (optional, can be None)
+    metering: MeteringService | None = None
+    def __post_init__(self) -> None:
+        self.provider = (
+            self.provider or os.getenv("EMBED_PROVIDER") or os.getenv("LLM_PROVIDER") or "openai"
+        ).lower()  # type: ignore[assignment]
+        self.model = (
+            self.model
+            or os.getenv("EMBED_MODEL")
+            or os.getenv("LLM_EMBED_MODEL")
+            or "text-embedding-3-small"
+        )
+        # Pick an API key from provider-specific envs (or explicit api_key)
+        if self.api_key is None:
+            self.api_key = (
+                os.getenv("OPENAI_API_KEY")
+                or os.getenv("AZURE_OPENAI_KEY")
+                or os.getenv("ANTHROPIC_API_KEY")
+                or os.getenv("GOOGLE_API_KEY")
+                or os.getenv("OPENROUTER_API_KEY")
+            )
+        # Base URL defaults per provider
+        if self.base_url is None:
+            self.base_url = {
+                "openai": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
+                "azure": os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/"),
+                "anthropic": "https://api.anthropic.com",
+                "google": "https://generativelanguage.googleapis.com",
+                "openrouter": "https://openrouter.ai/api/v1",
+                "lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
+                "ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
+                "dummy": "http://localhost:8745",  # for tests
+            }[self.provider]
+        # Azure deployment (for /deployments/{name}/embeddings)
+        if self.provider == "azure" and self.azure_deployment is None:
+            self.azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
+        self._retry = _Retry()
+        self._client: httpx.AsyncClient | None = None
+    # ------------ client management -----------------
+    async def _ensure_client(self) -> None:
+        """
+        Ensure we have an httpx.AsyncClient bound to the *current* event loop.
+        IMPORTANT: We do NOT try to aclose() a client created on a different loop,
+        because httpx/anyio expects it to be closed on the same loop it was created on.
+        """
+        loop = asyncio.get_running_loop()
+        if self._client is None:
+            # first-time init
+            self._client = httpx.AsyncClient(timeout=self.timeout)
+            self._bound_loop = loop
+            return
+        if self._bound_loop is not loop:
+            # We're now in a different loop -> do not reuse the old client.
+            # We also do NOT call aclose() here, because that tends to explode
+            # if the old loop is already closed.
+            self._client = httpx.AsyncClient(timeout=self.timeout)
+            self._bound_loop = loop
+    # ------------ public API ------------------------
+    async def embed(
+        self,
+        texts: Sequence[str],
+        *,
+        model: str | None = None,
+        **kw: Any,
+    ) -> list[list[float]]:
+        """
+        Provider-agnostic batch embedding.
+        """
+        await self._ensure_client()
+        assert self._client is not None
+        if not isinstance(texts, Sequence) or any(not isinstance(t, str) for t in texts):
+            raise TypeError("embed(texts) expects Sequence[str]")
+        if len(texts) == 0:
+            return []
+        # Resolve model (override > configured)
+        model = model or self.model or "text-embedding-3-small"
+        # Dispatch by provider
+        if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
+            embs = await self._embed_openai_like(texts, model=model, **kw)
+        elif self.provider == "azure":
+            embs = await self._embed_azure(texts, model=model, **kw)
+        elif self.provider == "google":
+            embs = await self._embed_google(texts, model=model, **kw)
+        elif self.provider == "anthropic":
+            raise NotImplementedError("Embeddings not supported for anthropic")
+        elif self.provider == "dummy":
+            embs = await self._embed_dummy(texts, model=model, **kw)
+        else:  # pragma: no cover
+            raise NotImplementedError(f"Unknown embedding provider: {self.provider}")
+        # ---- metering hook (placeholder) ----
+        if self.metering is not None:
+            try:
+                # TODO: compute token estimates or bytes; for now just count inputs
+                await self.metering.record_embedding(
+                    provider=self.provider,
+                    model=model,
+                    num_texts=len(texts),
+                    # tokens=estimated_tokens,
+                )
+            except Exception:
+                # best-effort; never break main path
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.exception("Error recording embedding metering")
+                pass
+        return embs
+    async def embed_one(
+        self,
+        text: str,
+        *,
+        model: str | None = None,
+        **kw: Any,
+    ) -> list[float]:
+        res = await self.embed([text], model=model, **kw)
+        return res[0]
+    # ------------ provider-specific helpers ------------------------
+    def _headers_openai_like(self) -> dict[str, str]:
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        return headers
+    async def _embed_openai_like(
+        self,
+        texts: Sequence[str],
+        *,
+        model: str,
+        **kw: Any,
+    ) -> list[list[float]]:
+        assert self._client is not None
+        url = f"{self.base_url}/embeddings"
+        headers = self._headers_openai_like()
+        extra_body: dict[str, Any] = kw.get("extra_body") or {}
+        body: dict[str, Any] = {
+            "model": model,
+            "input": list(texts),
+        }
+        body.update(extra_body)
+        def parse(data: dict[str, Any]) -> list[list[float]]:
+            items = data.get("data", []) or []
+            embs = [d.get("embedding") for d in items]
+            if len(embs) != len(texts) or any(e is None for e in embs):
+                raise RuntimeError(
+                    f"Embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
+                )
+            return embs  # type: ignore[return-value]
+        async def _call():
+            r = await self._client.post(url, headers=headers, json=body)
+            try:
+                r.raise_for_status()
+            except httpx.HTTPStatusError as e:
+                raise RuntimeError(
+                    f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
+                ) from e
+            return parse(r.json())
+        return await self._retry.run(_call)
+    async def _embed_azure(
+        self,
+        texts: Sequence[str],
+        *,
+        model: str,
+        **kw: Any,
+    ) -> list[list[float]]:
+        if not self.azure_deployment:
+            raise RuntimeError(
+                "Azure embeddings requires AZURE_OPENAI_DEPLOYMENT (azure_deployment)"
+            )
+        assert self._client is not None
+        azure_api_version = kw.get("azure_api_version") or "2024-08-01-preview"
+        extra_body: dict[str, Any] = kw.get("extra_body") or {}
+        url = (
+            f"{self.base_url}/openai/deployments/"
+            f"{self.azure_deployment}/embeddings?api-version={azure_api_version}"
+        )
+        headers = {"api-key": self.api_key or "", "Content-Type": "application/json"}
+        body: dict[str, Any] = {"input": list(texts)}
+        # Some Azure flavors accept model/dimensions; keep flexible
+        if model:
+            body["model"] = model
+        body.update(extra_body)
+        def parse(data: dict[str, Any]) -> list[list[float]]:
+            items = data.get("data", []) or []
+            embs = [d.get("embedding") for d in items]
+            if len(embs) != len(texts) or any(e is None for e in embs):
+                raise RuntimeError(
+                    f"Azure embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
+                )
+            return embs  # type: ignore[return-value]
+        async def _call():
+            r = await self._client.post(url, headers=headers, json=body)
+            try:
+                r.raise_for_status()
+            except httpx.HTTPStatusError as e:
+                raise RuntimeError(
+                    f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
+                ) from e
+            return parse(r.json())
+        return await self._retry.run(_call)
+    async def _embed_google(
+        self,
+        texts: Sequence[str],
+        *,
+        model: str,
+        **kw: Any,
+    ) -> list[list[float]]:
+        assert self._client is not None
+        base = self.base_url.rstrip("/")
+        api_key = self.api_key or os.getenv("GOOGLE_API_KEY") or ""
+        headers = {"Content-Type": "application/json"}
+        # v1 and v1beta endpoints
+        batch_url_v1 = f"{base}/v1/models/{model}:batchEmbedContents?key={api_key}"
+        embed_url_v1 = f"{base}/v1/models/{model}:embedContent?key={api_key}"
+        batch_url_v1beta = f"{base}/v1beta/models/{model}:batchEmbedContents?key={api_key}"
+        embed_url_v1beta = f"{base}/v1beta/models/{model}:embedContent?key={api_key}"
+        def parse_single(data: dict[str, Any]) -> list[float]:
+            return (data.get("embedding") or {}).get("values") or []
+        def parse_batch(data: dict[str, Any]) -> list[list[float]]:
+            embs: list[list[float]] = []
+            for e in data.get("embeddings") or []:
+                embs.append((e or {}).get("values") or [])
+            if len(embs) != len(texts):
+                raise RuntimeError(
+                    f"Gemini batch embeddings mismatch: got {len(embs)} for {len(texts)}"
+                )
+            return embs
+        async def try_batch(url: str) -> list[list[float]] | None:
+            body = {"requests": [{"content": {"parts": [{"text": t}]}} for t in texts]}
+            r = await self._client.post(url, headers=headers, json=body)
+            if r.status_code in (400, 404):
+                return None
+            try:
+                r.raise_for_status()
+            except httpx.HTTPStatusError as e:
+                raise RuntimeError(
+                    f"Gemini batchEmbedContents failed ({e.response.status_code}): {e.response.text}"
+                ) from e
+            return parse_batch(r.json())
+        async def call_single(url: str) -> list[list[float]]:
+            out: list[list[float]] = []
+            for t in texts:
+                r = await self._client.post(
+                    url, headers=headers, json={"content": {"parts": [{"text": t}]}}
+                )
+                try:
+                    r.raise_for_status()
+                except httpx.HTTPStatusError as e:
+                    raise RuntimeError(
+                        f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
+                    ) from e
+                out.append(parse_single(r.json()))
+            if len(out) != len(texts):
+                raise RuntimeError(f"Gemini embeddings mismatch: got {len(out)} for {len(texts)}")
+            return out
+        async def _call():
+            # try v1 batch, then v1beta batch, then single
+            res = await try_batch(batch_url_v1)
+            if res is not None:
+                return res
+            res = await try_batch(batch_url_v1beta)
+            if res is not None:
+                return res
+            try:
+                return await call_single(embed_url_v1)
+            except RuntimeError:
+                return await call_single(embed_url_v1beta)
+        return await self._retry.run(_call)
+    async def _embed_dummy(
+        self,
+        texts: Sequence[str],
+        *,
+        model: str,
+        **kw: Any,
+    ) -> list[list[float]]:
+        """
+        Dummy provider for tests: returns [len(text)] as a 1D "embedding".
+        """
+        return [[float(len(t))] for t in texts]

aethergraph/services/llm/types.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from dataclasses import dataclass
 from typing import Any, Literal
-ChatOutputFormat = Literal["text", "json_object", "json_schema"]
+ChatOutputFormat = Literal[
+    "text", "json_object", "json_schema", "raw"
+]  # text: plain text; json_object: dict; json_schema: validate against schema; json_schema: validate against schema; raw: provider-specific raw response
 ImageFormat = Literal["png", "jpeg", "webp"]
 ImageResponseFormat = Literal["b64_json", "url"]  # url only for dall-e models typically

aethergraph/services/memory/distillers/llm_long_term.py CHANGED Viewed

@@ -5,32 +5,13 @@ import json
 from typing import Any
 from aethergraph.contracts.services.llm import LLMClientProtocol
-from aethergraph.contracts.services.memory import Distiller, Event, HotLog, Indices, Persistence
+from aethergraph.contracts.services.memory import Distiller, Event, HotLog
 from aethergraph.contracts.storage.doc_store import DocStore
-# metering
-from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
-from aethergraph.services.memory.facade.utils import now_iso, stable_event_id
+from aethergraph.services.memory.facade.utils import now_iso
 from aethergraph.services.memory.utils import _summary_doc_id
 class LLMLongTermSummarizer(Distiller):
-    """
-    LLM-based long-term summarizer.
-    Flow:
-      1) Pull recent events from HotLog.
-      2) Filter by kind/tag/signal.
-      3) Build a prompt that shows the most important events as a transcript.
-      4) Call LLM to generate a structured summary.
-      5) Save summary JSON via Persistence.save_json(uri).
-      6) Emit a long_term_summary Event pointing to summary_uri.
-    This is complementary to RAG:
-      - LLM distiller compresses sequences into a digest.
-      - RAG uses many such digests + raw docs for retrieval.
-    """
     def __init__(
         self,
         *,
@@ -50,7 +31,7 @@ class LLMLongTermSummarizer(Distiller):
         self.include_tags = include_tags
         self.max_events = max_events
         self.min_signal = min_signal
-        self.model = model  # optional model override
+        self.model = model
     def _filter_events(self, events: Iterable[Event]) -> list[Event]:
         out: list[Event] = []
@@ -61,9 +42,8 @@ class LLMLongTermSummarizer(Distiller):
             if kinds is not None and e.kind not in kinds:
                 continue
             if tags is not None:
-                if not e.tags:
-                    continue
-                if not tags.issubset(set(e.tags)):
+                et = set(e.tags or [])
+                if not tags.issubset(et):  # AND semantics
                     continue
             if (e.signal or 0.0) < self.min_signal:
                 continue
@@ -71,17 +51,24 @@ class LLMLongTermSummarizer(Distiller):
         return out
     def _build_prompt(self, events: list[Event]) -> list[dict[str, str]]:
-        """
-        Convert events into a chat-style context for summarization.
-        We keep it model-agnostic: a list of {role, content} messages.
-        """
         lines: list[str] = []
         for e in events:
             role = e.stage or e.kind or "event"
-            if e.text:
-                lines.append(f"[{role}] {e.text}")
+            # Prefer text, but fall back to compact JSON of data when needed
+            content = (e.text or "").strip()
+            if not content and getattr(e, "data", None) is not None:
+                try:
+                    content = json.dumps(e.data, ensure_ascii=False)
+                except Exception:
+                    content = str(e.data)
+            if content:
+                # keep prompts bounded
+                if len(content) > 500:
+                    content = content[:500] + "…"
+                lines.append(f"[{role}] {content}")
         transcript = "\n".join(lines)
@@ -97,54 +84,65 @@ class LLMLongTermSummarizer(Distiller):
             "Return a JSON object with keys: "
             "`summary` (string), "
             "`key_facts` (list of strings), "
-            "`open_loops` (list of strings)."
-            "Do not use markdown or include explanations or context outside the JSON."
+            "`open_loops` (list of strings). "
+            "Do not use markdown or include explanations outside the JSON."
         )
-        return [
-            {"role": "system", "content": system},
-            {"role": "user", "content": user},
-        ]
+        return [{"role": "system", "content": system}, {"role": "user", "content": user}]
     async def distill(
         self,
         run_id: str,
         timeline_id: str,
-        scope_id: str = None,
+        scope_id: str | None = None,
         *,
         hotlog: HotLog,
-        persistence: Persistence,
-        indices: Indices,
         docs: DocStore,
         **kw: Any,
     ) -> dict[str, Any]:
-        # 1) fetch more events than needed, then filter
-        raw = await hotlog.recent(timeline_id, kinds=None, limit=self.max_events * 2)
+        # Over-fetch strategy:
+        # - if include_tags is present, filtering can be very selective, so over-fetch more
+        # - also pass include_kinds to HotLog to reduce noise
+        base_mult = 2
+        if self.include_tags:
+            base_mult = 8  # safer default for thread/session tags
+        # cap so we don't go crazy (HotLog may cap internally anyway)
+        fetch_limit = max(self.max_events * base_mult, 200)
+        raw = await hotlog.recent(
+            timeline_id,
+            kinds=self.include_kinds,  # narrow early when possible
+            limit=fetch_limit,
+        )
         kept = self._filter_events(raw)
         if not kept:
             return {}
+        # Keep only the most recent max_events (chronological, newest last)
         kept = kept[-self.max_events :]
         first_ts = kept[0].ts
         last_ts = kept[-1].ts
-        # 2) Build prompt and call LLM
         messages = self._build_prompt(kept)
-        # LLMClientProtocol: assume chat(...) returns (text, usage)
-        summary_json_str, usage = await self.llm.chat(
-            messages,
-        )
+        # Respect model override if the client supports it
+        try:
+            if self.model:
+                summary_json_str, usage = await self.llm.chat(messages, model=self.model)  # type: ignore[arg-type]
+            else:
+                summary_json_str, usage = await self.llm.chat(messages)
+        except TypeError:
+            # Client doesn't accept model=...
+            summary_json_str, usage = await self.llm.chat(messages)
-        # 3) Parse LLM JSON response
         try:
             payload = json.loads(summary_json_str)
         except Exception:
-            payload = {
-                "summary": summary_json_str,
-                "key_facts": [],
-                "open_loops": [],
-            }
+            payload = {"summary": summary_json_str, "key_facts": [], "open_loops": []}
         ts = now_iso()
         summary_obj = {
@@ -161,74 +159,27 @@ class LLMLongTermSummarizer(Distiller):
             "key_facts": payload.get("key_facts", []),
             "open_loops": payload.get("open_loops", []),
             "llm_usage": usage,
-            "llm_model": self.llm.model if hasattr(self.llm, "model") else None,
+            "llm_model": getattr(self.llm, "model", None),
+            "llm_model_override": self.model,
+            "include_kinds": self.include_kinds,
+            "include_tags": self.include_tags,
+            "min_signal": self.min_signal,
+            "fetch_limit": fetch_limit,
         }
         scope = scope_id or run_id
         doc_id = _summary_doc_id(scope, self.summary_tag, ts)
         await docs.put(doc_id, summary_obj)
-        # 4) Emit summary Event with preview + uri in data
         text = summary_obj["summary"] or ""
         preview = text[:2000] + (" …[truncated]" if len(text) > 2000 else "")
-        evt = Event(
-            event_id="",
-            ts=ts,
-            run_id=run_id,
-            scope_id=scope,
-            kind=self.summary_kind,
-            stage="summary_llm",
-            text=preview,
-            tags=["summary", "llm", self.summary_tag],
-            data={
-                "summary_doc_id": doc_id,
-                "summary_tag": self.summary_tag,
-                "time_window": summary_obj["time_window"],
-                "num_events": len(kept),
-            },
-            metrics={"num_events": len(kept)},
-            severity=2,
-            signal=0.7,
-        )
-        evt.event_id = stable_event_id(
-            {
-                "ts": ts,
-                "run_id": run_id,
-                "kind": self.summary_kind,
-                "summary_tag": self.summary_tag,
-                "preview": preview[:200],
-            }
-        )
-        await hotlog.append(timeline_id, evt, ttl_s=7 * 24 * 3600, limit=1000)
-        await persistence.append_event(timeline_id, evt)
-        # Metering: record summary event
-        try:
-            meter = current_metering()
-            ctx = current_meter_context.get()
-            user_id = ctx.get("user_id")
-            org_id = ctx.get("org_id")
-            await meter.record_event(
-                user_id=user_id,
-                org_id=org_id,
-                run_id=run_id,
-                scope_id=scope,
-                kind=f"memory.{self.summary_kind}",  # e.g. "memory.long_term_summary"
-            )
-        except Exception:
-            import logging
-            logger = logging.getLogger("aethergraph.services.memory.distillers.llm_long_term")
-            logger.error("Failed to record metering event for long_term_summary")
         return {
             "summary_doc_id": doc_id,
             "summary_kind": self.summary_kind,
             "summary_tag": self.summary_tag,
             "time_window": summary_obj["time_window"],
             "num_events": len(kept),
+            "preview": preview,
+            "ts": ts,
         }

aethergraph 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

aethergraph 0.1.0a3py3-none-any.whl → 0.1.0a4py3-none-any.whl