PyPI - aethergraph - Versions diffs - 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

aethergraph 0.1.0a3py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

aethergraph/api/v1/artifacts.py +23 -4
aethergraph/api/v1/schemas.py +7 -0
aethergraph/api/v1/session.py +123 -4
aethergraph/config/config.py +2 -0
aethergraph/config/search.py +49 -0
aethergraph/contracts/services/channel.py +18 -1
aethergraph/contracts/services/execution.py +58 -0
aethergraph/contracts/services/llm.py +26 -0
aethergraph/contracts/services/memory.py +10 -4
aethergraph/contracts/services/planning.py +53 -0
aethergraph/contracts/storage/event_log.py +8 -0
aethergraph/contracts/storage/search_backend.py +47 -0
aethergraph/contracts/storage/vector_index.py +73 -0
aethergraph/core/graph/action_spec.py +76 -0
aethergraph/core/graph/graph_fn.py +75 -2
aethergraph/core/graph/graphify.py +74 -2
aethergraph/core/runtime/graph_runner.py +2 -1
aethergraph/core/runtime/node_context.py +66 -3
aethergraph/core/runtime/node_services.py +8 -0
aethergraph/core/runtime/run_manager.py +263 -271
aethergraph/core/runtime/run_types.py +54 -1
aethergraph/core/runtime/runtime_env.py +35 -14
aethergraph/core/runtime/runtime_services.py +308 -18
aethergraph/plugins/agents/default_chat_agent.py +266 -74
aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
aethergraph/plugins/channel/adapters/webui.py +69 -21
aethergraph/plugins/channel/routes/webui_routes.py +8 -48
aethergraph/runtime/__init__.py +12 -0
aethergraph/server/app_factory.py +3 -0
aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
aethergraph/server/ui_static/index.html +2 -2
aethergraph/services/artifacts/facade.py +157 -21
aethergraph/services/artifacts/types.py +35 -0
aethergraph/services/artifacts/utils.py +42 -0
aethergraph/services/channel/channel_bus.py +3 -1
aethergraph/services/channel/event_hub copy.py +55 -0
aethergraph/services/channel/event_hub.py +81 -0
aethergraph/services/channel/factory.py +3 -2
aethergraph/services/channel/session.py +709 -74
aethergraph/services/container/default_container.py +69 -7
aethergraph/services/execution/__init__.py +0 -0
aethergraph/services/execution/local_python.py +118 -0
aethergraph/services/indices/__init__.py +0 -0
aethergraph/services/indices/global_indices.py +21 -0
aethergraph/services/indices/scoped_indices.py +292 -0
aethergraph/services/llm/generic_client.py +342 -46
aethergraph/services/llm/generic_embed_client.py +359 -0
aethergraph/services/llm/types.py +3 -1
aethergraph/services/memory/distillers/llm_long_term.py +60 -109
aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
aethergraph/services/memory/distillers/long_term.py +48 -131
aethergraph/services/memory/distillers/long_term_v1.py +170 -0
aethergraph/services/memory/facade/chat.py +18 -8
aethergraph/services/memory/facade/core.py +159 -19
aethergraph/services/memory/facade/distillation.py +86 -31
aethergraph/services/memory/facade/retrieval.py +100 -1
aethergraph/services/memory/factory.py +4 -1
aethergraph/services/planning/__init__.py +0 -0
aethergraph/services/planning/action_catalog.py +271 -0
aethergraph/services/planning/bindings.py +56 -0
aethergraph/services/planning/dependency_index.py +65 -0
aethergraph/services/planning/flow_validator.py +263 -0
aethergraph/services/planning/graph_io_adapter.py +150 -0
aethergraph/services/planning/input_parser.py +312 -0
aethergraph/services/planning/missing_inputs.py +28 -0
aethergraph/services/planning/node_planner.py +613 -0
aethergraph/services/planning/orchestrator.py +112 -0
aethergraph/services/planning/plan_executor.py +506 -0
aethergraph/services/planning/plan_types.py +321 -0
aethergraph/services/planning/planner.py +617 -0
aethergraph/services/planning/planner_service.py +369 -0
aethergraph/services/planning/planning_context_builder.py +43 -0
aethergraph/services/planning/quick_actions.py +29 -0
aethergraph/services/planning/routers/__init__.py +0 -0
aethergraph/services/planning/routers/simple_router.py +26 -0
aethergraph/services/rag/facade.py +0 -3
aethergraph/services/scope/scope.py +30 -30
aethergraph/services/scope/scope_factory.py +15 -7
aethergraph/services/skills/__init__.py +0 -0
aethergraph/services/skills/skill_registry.py +465 -0
aethergraph/services/skills/skills.py +220 -0
aethergraph/services/skills/utils.py +194 -0
aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
aethergraph/storage/memory/event_persist.py +42 -2
aethergraph/storage/memory/fs_persist.py +32 -2
aethergraph/storage/search_backend/__init__.py +0 -0
aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
aethergraph/storage/search_backend/null_backend.py +34 -0
aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
aethergraph/storage/search_backend/utils.py +31 -0
aethergraph/storage/search_factory.py +75 -0
aethergraph/storage/vector_index/faiss_index.py +72 -4
aethergraph/storage/vector_index/sqlite_index.py +521 -52
aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
aethergraph/storage/vector_index/utils.py +22 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +107 -63
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
aethergraph/services/eventhub/event_hub.py +0 -76
aethergraph/services/llm/generic_client copy.py +0 -691
aethergraph/services/prompts/file_store.py +0 -41
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
{aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0

aethergraph/storage/search_backend/sqlite_lexical_backend.py ADDED Viewed

@@ -0,0 +1,387 @@
+from __future__ import annotations
+import asyncio
+from dataclasses import dataclass
+import json
+from pathlib import Path
+import sqlite3
+import time
+from typing import Any
+from aethergraph.contracts.storage.search_backend import ScoredItem, SearchBackend
+LEXICAL_SCHEMA = """
+CREATE TABLE IF NOT EXISTS docs (
+    corpus_id     TEXT,
+    item_id       TEXT,
+    text          TEXT,
+    meta_json     TEXT,
+    created_at_ts REAL,
+    org_id        TEXT,
+    user_id       TEXT,
+    scope_id      TEXT,
+    client_id     TEXT,
+    app_id        TEXT,
+    session_id    TEXT,
+    run_id        TEXT,
+    graph_id      TEXT,
+    node_id       TEXT,
+    kind          TEXT,
+    source        TEXT,
+    PRIMARY KEY (corpus_id, item_id)
+);
+CREATE INDEX IF NOT EXISTS idx_docs_corpus_scope_time
+    ON docs(corpus_id, scope_id, created_at_ts DESC);
+CREATE INDEX IF NOT EXISTS idx_docs_corpus_user_time
+    ON docs(corpus_id, user_id, created_at_ts DESC);
+CREATE INDEX IF NOT EXISTS idx_docs_corpus_org_time
+    ON docs(corpus_id, org_id, created_at_ts DESC);
+"""
+def _ensure_db(path: str) -> None:
+    Path(path).parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(path, check_same_thread=False)
+    try:
+        cur = conn.cursor()
+        for stmt in LEXICAL_SCHEMA.strip().split(";\n\n"):
+            s = stmt.strip()
+            if s:
+                cur.execute(s)
+        conn.commit()
+    finally:
+        conn.close()
+@dataclass
+class SQLiteLexicalSearchBackend(SearchBackend):
+    """
+    Cheap non-LLM search backend.
+    - Upsert: store raw text + metadata in a SQLite table.
+    - Search: use simple keyword LIKE search + identity/time filters.
+    Right now the lexical backend is a simple bag-of-words search over a SQLite table:
+    - Every upsert stores: corpus_id, item_id, raw text, full meta_json, and promoted fields (org_id, user_id, scope_id, run_id, kind, source, created_at_ts, etc.) into docs.
+    - At query time, we:
+        - Use SQL to filter by corpus, org/user/scope, and optional time window (created_at_min/max), and sort by created_at_ts DESC LIMIT N (recency bias).
+        - Pull that candidate row set into Python.
+        - Tokenize the query ("sample text JSON artifact" → ["sample", "text", "json", "artifact"]).
+        - For each candidate text, count how many tokens appear (and how often), and derive a simple score:
+        - “more distinct query words present + a tiny bump for repeats = higher score.”
+        - Discard docs that match none of the tokens, return top-k by score.
+    NOTE: it’s exact token match, multi-word aware, and understands time + scope, but deliberately dumb:
+    - No stemming (“run” vs “running”), no synonyms, no typo/fuzzy matching.
+    - No real IR scoring (no TF-IDF/BM25, no field weighting, no phrase queries).
+    - Quality will degrade for huge corpora because ranking is naive and all ranking happens in Python.
+    - But it’s cheap, local, deterministic, and good enough for “I remember some words from that thing I saved.”
+    """
+    db_path: str
+    def __post_init__(self) -> None:
+        _ensure_db(self.db_path)
+    def _connect(self) -> sqlite3.Connection:
+        return sqlite3.connect(self.db_path, check_same_thread=False)
+    # -------- helpers ----------------------------------------------------
+    @staticmethod
+    def _parse_time_window(
+        time_window: str | None,
+        created_at_min: float | None,
+        created_at_max: float | None,
+    ) -> tuple[float | None, float | None]:
+        if not time_window:
+            return created_at_min, created_at_max
+        if created_at_min is not None and created_at_max is not None:
+            return created_at_min, created_at_max
+        # very simple parser: "7d", "24h", "30m", "60s"
+        import re
+        m = re.match(r"^\s*(\d+)\s*([smhd])\s*$", time_window)
+        if not m:
+            return created_at_min, created_at_max
+        value = int(m.group(1))
+        unit = m.group(2)
+        factor = {"s": 1, "m": 60, "h": 3600, "d": 86400}[unit]
+        now_ts = time.time()
+        duration = value * factor
+        if created_at_min is None:
+            created_at_min = now_ts - duration
+        if created_at_max is None:
+            created_at_max = now_ts
+        return created_at_min, created_at_max
+    # -------- public APIs -----------------------------------------------
+    async def upsert(
+        self,
+        *,
+        corpus: str,
+        item_id: str,
+        text: str,
+        metadata: dict[str, Any],
+    ) -> None:
+        """
+        Store text + metadata in docs table.
+        We mirror common promoted fields into columns for cheap filtering.
+        """
+        if not text:
+            text = ""
+        # Extract promoted fields from metadata
+        org_id = metadata.get("org_id")
+        user_id = metadata.get("user_id")
+        scope_id = metadata.get("scope_id")
+        client_id = metadata.get("client_id")
+        app_id = metadata.get("app_id")
+        session_id = metadata.get("session_id")
+        run_id = metadata.get("run_id")
+        graph_id = metadata.get("graph_id")
+        node_id = metadata.get("node_id")
+        kind = metadata.get("kind")
+        source = metadata.get("source")
+        created_at_ts = metadata.get("created_at_ts")
+        # If no created_at_ts given, fallback to "now" (cheap and good enough)
+        if created_at_ts is None:
+            created_at_ts = time.time()
+        meta_json = json.dumps(metadata, ensure_ascii=False)
+        def _upsert_sync() -> None:
+            conn = self._connect()
+            try:
+                cur = conn.cursor()
+                cur.execute(
+                    """
+                    REPLACE INTO docs(
+                        corpus_id,
+                        item_id,
+                        text,
+                        meta_json,
+                        created_at_ts,
+                        org_id,
+                        user_id,
+                        scope_id,
+                        client_id,
+                        app_id,
+                        session_id,
+                        run_id,
+                        graph_id,
+                        node_id,
+                        kind,
+                        source
+                    )
+                    VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
+                    """,
+                    (
+                        corpus,
+                        item_id,
+                        text,
+                        meta_json,
+                        float(created_at_ts),
+                        org_id,
+                        user_id,
+                        scope_id,
+                        client_id,
+                        app_id,
+                        session_id,
+                        run_id,
+                        graph_id,
+                        node_id,
+                        kind,
+                        source,
+                    ),
+                )
+                conn.commit()
+            finally:
+                conn.close()
+        await asyncio.to_thread(_upsert_sync)
+    async def search(
+        self,
+        *,
+        corpus: str,
+        query: str,
+        top_k: int = 10,
+        filters: dict[str, Any] | None = None,
+        time_window: str | None = None,
+        created_at_min: float | None = None,
+        created_at_max: float | None = None,
+    ) -> list[ScoredItem]:
+        if not query.strip():
+            return []
+        filters = filters or {}
+        # Compute final time bounds
+        created_at_min, created_at_max = self._parse_time_window(
+            time_window, created_at_min, created_at_max
+        )
+        # We’ll do a cheap LIKE search on text and apply filters in SQL where possible,
+        # remaining filters in Python.
+        def _search_sync() -> list[ScoredItem]:
+            conn = self._connect()
+            try:
+                cur = conn.cursor()
+                sql = """
+                    SELECT item_id, text, meta_json, created_at_ts
+                    FROM docs
+                    WHERE corpus_id = ?
+                """
+                params: list[Any] = [corpus]
+                # subset of filters we can push into SQL
+                promoted_cols = {
+                    "org_id",
+                    "user_id",
+                    "scope_id",
+                    "client_id",
+                    "app_id",
+                    "session_id",
+                    "run_id",
+                    "graph_id",
+                    "node_id",
+                    "kind",
+                    "source",
+                }
+                sql_filters: dict[str, Any] = {}
+                py_filters: dict[str, Any] = {}
+                for k, v in filters.items():
+                    if v is None:
+                        continue
+                    if k in promoted_cols and not isinstance(v, (list, tuple, set)):  # noqa: UP038
+                        sql_filters[k] = v
+                    else:
+                        py_filters[k] = v
+                for key, val in sql_filters.items():
+                    sql += f" AND {key} = ?"
+                    params.append(val)
+                # Time window
+                if created_at_min is not None:
+                    sql += " AND created_at_ts >= ?"
+                    params.append(created_at_min)
+                if created_at_max is not None:
+                    sql += " AND created_at_ts <= ?"
+                    params.append(created_at_max)
+                # Bias toward recent, like vector backend
+                sql += " ORDER BY created_at_ts DESC LIMIT ?"
+                params.append(max(top_k * 50, top_k))
+                cur.execute(sql, params)
+                rows = cur.fetchall()
+            finally:
+                conn.close()
+            # Build results, apply any remaining filters in Python, and
+            # assign a simple "score" (e.g., count of occurrences)
+            results: list[ScoredItem] = []
+            # Basic bag-of-words: split query into tokens
+            tokens = [t for t in query.lower().split() if t]
+            for item_id, text, meta_json, _ in rows:
+                meta = json.loads(meta_json)
+                # Python-level filters (e.g., list-valued filters)
+                match = True
+                for key, val in py_filters.items():
+                    if key not in meta:
+                        match = False
+                        break
+                    mv = meta[key]
+                    if not self._match_value(mv, val):
+                        match = False
+                        break
+                if not match:
+                    continue
+                text_lower = (text or "").lower()
+                # Naive scoring: token-based exact matches
+                match_tokens = 0
+                total_hits = 0
+                for tok in tokens:
+                    c = text_lower.count(tok)
+                    if c > 0:
+                        match_tokens += 1
+                        total_hits += c
+                # If none of the tokens appear, skip
+                if match_tokens == 0:
+                    continue
+                # Score: prioritize docs that match more distinct tokens,
+                # with a small bump for repeated occurrences.
+                score = float(match_tokens) + 0.1 * float(total_hits)
+                results.append(
+                    ScoredItem(
+                        item_id=item_id,
+                        corpus=corpus,
+                        score=score,
+                        metadata=meta,
+                    )
+                )
+                if len(results) >= top_k:
+                    break
+            return results
+        return await asyncio.to_thread(_search_sync)
+    @staticmethod
+    def _match_value(mv: Any, val: Any) -> bool:
+        """
+        Rich matching semantics for filters:
+        - If val is list/tuple/set:
+            - if mv is list-like too -> match if intersection is non-empty
+            - else                    -> match if mv is in val
+        - If val is scalar:
+            - if mv is list-like      -> match if val is in mv
+            - else                    -> match if mv == val
+        """
+        if val is None:
+            return True
+        def _is_list_like(x: Any) -> bool:
+            return isinstance(x, (list, tuple, set))  # noqa: UP038
+        if _is_list_like(val):
+            if _is_list_like(mv):
+                # any overlap between filter values and meta values
+                return any(x in val for x in mv)
+            else:
+                # meta is scalar, filter is list-like
+                return mv in val
+        # val is scalar
+        if _is_list_like(mv):
+            return val in mv
+        return mv == val

aethergraph/storage/search_backend/utils.py ADDED Viewed

@@ -0,0 +1,31 @@
+import re
+_DURATION_PATTERN = re.compile(r"^\s*(\d+)\s*([smhd])\s*$")
+def _parse_time_window(window: str) -> float:
+    """
+    Parse a simple duration string like:
+      - "30s" (seconds)
+      - "15m" (minutes)
+      - "2h"  (hours)
+      - "7d"  (days)
+    Returns duration in seconds.
+    Raises ValueError on invalid format.
+    """
+    m = _DURATION_PATTERN.match(window)
+    if not m:
+        raise ValueError(f"Invalid time_window format: {window!r}")
+    value = int(m.group(1))
+    unit = m.group(2)
+    if unit == "s":
+        return float(value)
+    if unit == "m":
+        return float(value) * 60.0
+    if unit == "h":
+        return float(value) * 3600.0
+    if unit == "d":
+        return float(value) * 86400.0
+    raise ValueError(f"Unknown time unit in time_window: {window!r}")

aethergraph/storage/search_factory.py ADDED Viewed

@@ -0,0 +1,75 @@
+# search_factory.py
+from __future__ import annotations
+import os
+from aethergraph.config.config import AppSettings
+from aethergraph.config.search import SearchBackendSettings
+from aethergraph.contracts.services.llm import EmbeddingClientProtocol
+from aethergraph.contracts.storage.search_backend import SearchBackend
+from aethergraph.contracts.storage.vector_index import VectorIndex
+from aethergraph.storage.search_backend.generic_vector_backend import GenericVectorSearchBackend
+from aethergraph.storage.search_backend.null_backend import NullSearchBackend
+from aethergraph.storage.search_backend.sqlite_lexical_backend import SQLiteLexicalSearchBackend
+from aethergraph.storage.vector_index.faiss_index import FAISSVectorIndex
+from aethergraph.storage.vector_index.sqlite_index import SQLiteVectorIndex
+def build_vector_index_for_search(root: str, cfg: SearchBackendSettings) -> VectorIndex:
+    """
+    Helper to build a VectorIndex specifically for search, based on cfg.search.backend.
+    This is intentionally separate from storage.vector_index (legacy RAG index).
+    """
+    if cfg.backend == "sqlite_vector":
+        s = cfg.sqlite_vector
+        index_root = os.path.join(root, s.dir)
+        return SQLiteVectorIndex(root=index_root)
+    if cfg.backend == "faiss_vector":
+        s = cfg.faiss_vector
+        index_root = os.path.join(root, s.dir)
+        return FAISSVectorIndex(root=index_root, dim=s.dim)
+    raise ValueError(f"build_vector_index_for_search: unsupported backend {cfg.backend!r}")
+def build_search_backend(
+    cfg: AppSettings,
+    *,
+    embedder: EmbeddingClientProtocol | None,
+) -> SearchBackend:
+    """
+    Factory to build the high-level SearchBackend used by ScopedIndices.
+    Respects cfg.search.backend:
+      - "none"          -> NullSearchBackend
+      - "sqlite_lexical"-> SQLiteLexicalSearchBackend
+      - "sqlite_vector" -> VectorSearchBackend + SQLiteVectorIndex
+      - "faiss_vector"  -> VectorSearchBackend + FAISSVectorIndex
+    """
+    scfg = cfg.search
+    root = os.path.abspath(cfg.root)
+    # 1) No search at all
+    if scfg.backend == "none":
+        return NullSearchBackend()
+    # 2) Pure lexical, no LLM / embeddings
+    if scfg.backend == "sqlite_lexical":
+        lcfg = scfg.sqlite_lexical
+        db_path = os.path.join(root, lcfg.dir, lcfg.filename)
+        return SQLiteLexicalSearchBackend(db_path=db_path)
+    # 3) Vector search backends (sqlite or faiss)
+    if scfg.backend in ("sqlite_vector", "faiss_vector"):
+        if embedder is None:
+            raise RuntimeError(
+                f"Search backend {scfg.backend!r} requires an embedding client. "
+                "Pass an EmbeddingClientProtocol instance into build_search_backend()."
+            )
+        index = build_vector_index_for_search(root, scfg)
+        return GenericVectorSearchBackend(index=index, embedder=embedder)
+    raise ValueError(f"Unknown search backend: {scfg.backend!r}")

aethergraph/storage/vector_index/faiss_index.py CHANGED Viewed

@@ -148,10 +148,29 @@ class FAISSVectorIndex(VectorIndex):
         corpus_id: str,
         query_vec: list[float],
         k: int,
+        where: dict[str, Any] | None = None,
+        max_candidates: int | None = None,
+        created_at_min: float | None = None,
+        created_at_max: float | None = None,
     ) -> list[dict[str, Any]]:
+        """
+        FAISS-backed search with compatibility to SQLiteVectorIndex:
+        - where: equality filters on metadata (e.g., org_id, user_id, scope_id, etc.)
+        - created_at_min / created_at_max: numeric UNIX timestamps for time-range filtering.
+        - max_candidates: how many FAISS hits to retrieve before filtering.
+        Since FAISS doesn't support filtering natively, we:
+          1) Search across all vectors (or up to max_candidates).
+          2) Manually filter results by `where` and time bounds.
+        """
         if faiss is None:
             raise RuntimeError("FAISS not installed")
+        where = where or {}
+        # Normalize query vector for cosine similarity
         q = np.asarray([query_vec], dtype=np.float32)
         q = q / (np.linalg.norm(q, axis=1, keepdims=True) + 1e-9)
@@ -159,21 +178,70 @@ class FAISSVectorIndex(VectorIndex):
             index, metas = self._load_sync(corpus_id)
             if index is None or not metas:
                 return []
-            D, I = index.search(q, k)  # noqa: E741
-            out: list[dict[str, Any]] = []
+            n = len(metas)
+            if n == 0:
+                return []
+            # How many neighbors to ask FAISS for:
+            # - k here is "raw_k" from SearchBackend (e.g., top_k * 3)
+            # - max_candidates is an outer cap (e.g., top_k * 50)
+            search_k = min(
+                n,
+                max_candidates or n,
+            )
+            if search_k <= 0:
+                return []
+            # Ask FAISS for the top search_k neighbors
+            D, I = index.search(q, search_k)  # noqa: E741
             scores = D[0].tolist()
             idxs = I[0].tolist()
+            out: list[dict[str, Any]] = []
             for score, idx in zip(scores, idxs, strict=True):
                 if idx < 0 or idx >= len(metas):
                     continue
-                m = metas[idx]
+                m = metas[idx]  # {"chunk_id": ..., "meta": {...}}
+                meta = dict(m.get("meta") or {})
+                # --- Apply `where` equality filters ----------------------
+                match = True
+                for key, val in where.items():
+                    if val is None:
+                        continue
+                    if meta.get(key) != val:
+                        match = False
+                        break
+                if not match:
+                    continue
+                # --- Apply time-window filters ---------------------------
+                cat = meta.get("created_at_ts")
+                # If we have a time bound but no created_at_ts, we treat as non-match
+                if created_at_min is not None and (
+                    cat is None or float(cat) < float(created_at_min)
+                ):
+                    continue
+                if created_at_max is not None and (
+                    cat is None or float(cat) > float(created_at_max)
+                ):
+                    continue
                 out.append(
                     {
                         "chunk_id": m["chunk_id"],
                         "score": float(score),
-                        "meta": m["meta"],
+                        "meta": meta,
                     }
                 )
+                # Stop once we've collected k matches
+                if len(out) >= k:
+                    break
             return out
         return await asyncio.to_thread(_search_sync)

aethergraph 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

aethergraph 0.1.0a3py3-none-any.whl → 0.1.0a4py3-none-any.whl