PyPI - code-context-engine - Versions diffs - 0.4.0__py3-none-any.whl - Mend

code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

code_context_engine-0.4.0.dist-info/METADATA +389 -0
code_context_engine-0.4.0.dist-info/RECORD +63 -0
code_context_engine-0.4.0.dist-info/WHEEL +5 -0
code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
context_engine/__init__.py +3 -0
context_engine/cli.py +2848 -0
context_engine/cli_style.py +66 -0
context_engine/compression/__init__.py +0 -0
context_engine/compression/compressor.py +144 -0
context_engine/compression/ollama_client.py +33 -0
context_engine/compression/output_rules.py +77 -0
context_engine/compression/prompts.py +9 -0
context_engine/compression/quality.py +37 -0
context_engine/config.py +198 -0
context_engine/dashboard/__init__.py +0 -0
context_engine/dashboard/_page.py +1548 -0
context_engine/dashboard/server.py +429 -0
context_engine/editors.py +265 -0
context_engine/event_bus.py +24 -0
context_engine/indexer/__init__.py +0 -0
context_engine/indexer/chunker.py +147 -0
context_engine/indexer/embedder.py +154 -0
context_engine/indexer/embedding_cache.py +168 -0
context_engine/indexer/git_hooks.py +73 -0
context_engine/indexer/git_indexer.py +136 -0
context_engine/indexer/ignorefile.py +96 -0
context_engine/indexer/manifest.py +78 -0
context_engine/indexer/pipeline.py +624 -0
context_engine/indexer/secrets.py +332 -0
context_engine/indexer/watcher.py +109 -0
context_engine/integration/__init__.py +0 -0
context_engine/integration/bootstrap.py +76 -0
context_engine/integration/git_context.py +132 -0
context_engine/integration/mcp_server.py +1825 -0
context_engine/integration/session_capture.py +306 -0
context_engine/memory/__init__.py +6 -0
context_engine/memory/compressor.py +344 -0
context_engine/memory/db.py +922 -0
context_engine/memory/extractive.py +106 -0
context_engine/memory/grammar.py +419 -0
context_engine/memory/hook_installer.py +258 -0
context_engine/memory/hook_server.py +83 -0
context_engine/memory/hooks.py +327 -0
context_engine/memory/migrate.py +268 -0
context_engine/models.py +96 -0
context_engine/pricing.py +104 -0
context_engine/project_commands.py +296 -0
context_engine/retrieval/__init__.py +0 -0
context_engine/retrieval/confidence.py +47 -0
context_engine/retrieval/query_parser.py +105 -0
context_engine/retrieval/retriever.py +199 -0
context_engine/serve_http.py +208 -0
context_engine/services.py +252 -0
context_engine/storage/__init__.py +0 -0
context_engine/storage/backend.py +39 -0
context_engine/storage/fts_store.py +112 -0
context_engine/storage/graph_store.py +219 -0
context_engine/storage/local_backend.py +109 -0
context_engine/storage/remote_backend.py +117 -0
context_engine/storage/vector_store.py +357 -0
context_engine/utils.py +72 -0

context_engine/serve_http.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""HTTP API server for remote context engine — exposes storage + compression endpoints.
+Security model:
+- Default bind is 127.0.0.1. Use --host 0.0.0.0 explicitly to expose on LAN.
+- When bound to a non-loopback host, a bearer token is required. Set via the
+  CCE_API_TOKEN env var; requests without a matching `Authorization: Bearer <token>`
+  header get 401. Loopback requests skip auth for local development.
+"""
+import hmac
+import os
+from pathlib import Path
+from context_engine.config import load_config, PROJECT_CONFIG_NAME
+from context_engine.storage.local_backend import LocalBackend
+from context_engine.indexer.embedder import Embedder
+from context_engine.compression.compressor import Compressor
+from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType
+try:
+    from aiohttp import web
+except ImportError as e:
+    raise ImportError(
+        "aiohttp is required for HTTP serve mode. "
+        "Install with: pip install 'code-context-engine[http]'"
+    ) from e
+_MAX_REQUEST_BYTES = 10 * 1024 * 1024  # 10 MB — generous for bulk ingest, not unbounded
+_LOOPBACK_HOSTS = {"127.0.0.1", "::1", "localhost"}
+class ContextEngineHTTP:
+    def __init__(self, backend: LocalBackend, embedder: Embedder, compressor: Compressor):
+        self.backend = backend
+        self.embedder = embedder
+        self.compressor = compressor
+    async def handle_vector_search(self, request: web.Request) -> web.Response:
+        data = await _read_json(request)
+        embedding = data["embedding"]
+        top_k = data.get("top_k", 10)
+        results = await self.backend.vector_search(embedding, top_k=top_k)
+        return web.json_response({"results": [self._chunk_to_dict(c) for c in results]})
+    async def handle_fts_search(self, request: web.Request) -> web.Response:
+        data = await _read_json(request)
+        query = data["query"]
+        top_k = data.get("top_k", 30)
+        results = await self.backend.fts_search(query, top_k=top_k)
+        return web.json_response({"results": [{"id": i, "score": s} for i, s in results]})
+    async def handle_chunks_by_ids(self, request: web.Request) -> web.Response:
+        data = await _read_json(request)
+        ids = data.get("ids", [])
+        if not isinstance(ids, list):
+            return web.json_response({"error": "ids must be a list"}, status=400)
+        chunks = await self.backend.get_chunks_by_ids(ids)
+        return web.json_response({"results": [self._chunk_to_dict(c) for c in chunks]})
+    async def handle_graph_neighbors(self, request: web.Request) -> web.Response:
+        data = await _read_json(request)
+        node_id = data["node_id"]
+        edge_type = EdgeType(data["edge_type"]) if data.get("edge_type") else None
+        results = await self.backend.graph_neighbors(node_id, edge_type=edge_type)
+        return web.json_response({"results": [self._node_to_dict(n) for n in results]})
+    async def handle_ingest(self, request: web.Request) -> web.Response:
+        data = await _read_json(request)
+        chunks = [self._dict_to_chunk(d) for d in data.get("chunks", [])]
+        nodes = [self._dict_to_node(d) for d in data.get("nodes", [])]
+        edges = [self._dict_to_edge(d) for d in data.get("edges", [])]
+        await self.backend.ingest(chunks, nodes, edges)
+        return web.json_response({"ok": True})
+    async def handle_get_chunk(self, request: web.Request) -> web.Response:
+        chunk_id = request.match_info["chunk_id"]
+        chunk = await self.backend.get_chunk_by_id(chunk_id)
+        if chunk is None:
+            return web.json_response({"error": "not found"}, status=404)
+        return web.json_response(self._chunk_to_dict(chunk))
+    async def handle_delete_file(self, request: web.Request) -> web.Response:
+        file_path = request.match_info["file_path"]
+        # Reject absolute paths and traversal — delete_by_file is SQL-only today,
+        # but treating file_path as a relative project path is a safer contract.
+        if file_path.startswith("/") or ".." in Path(file_path).parts:
+            return web.json_response({"error": "invalid file_path"}, status=400)
+        await self.backend.delete_by_file(file_path)
+        return web.json_response({"ok": True})
+    async def handle_health(self, request: web.Request) -> web.Response:
+        return web.json_response({"status": "ok"})
+    def _chunk_to_dict(self, chunk):
+        return {"id": chunk.id, "content": chunk.content, "chunk_type": chunk.chunk_type.value,
+                "file_path": chunk.file_path, "start_line": chunk.start_line, "end_line": chunk.end_line,
+                "language": chunk.language, "embedding": chunk.embedding, "metadata": chunk.metadata}
+    def _dict_to_chunk(self, d):
+        return Chunk(id=d["id"], content=d["content"], chunk_type=ChunkType(d["chunk_type"]),
+                     file_path=d["file_path"], start_line=d["start_line"], end_line=d["end_line"],
+                     language=d["language"], embedding=d.get("embedding"), metadata=d.get("metadata", {}))
+    def _node_to_dict(self, node):
+        return {"id": node.id, "node_type": node.node_type.value, "name": node.name, "file_path": node.file_path}
+    def _dict_to_node(self, d):
+        return GraphNode(id=d["id"], node_type=NodeType(d["node_type"]), name=d["name"], file_path=d["file_path"])
+    def _dict_to_edge(self, d):
+        return GraphEdge(source_id=d["source_id"], target_id=d["target_id"], edge_type=EdgeType(d["edge_type"]))
+async def _read_json(request: web.Request) -> dict:
+    try:
+        return await request.json()
+    except Exception as e:
+        raise web.HTTPBadRequest(
+            text=f'{{"error": "invalid JSON: {type(e).__name__}"}}',
+            content_type="application/json",
+        )
+@web.middleware
+async def _error_middleware(request, handler):
+    try:
+        return await handler(request)
+    except web.HTTPException:
+        raise
+    except KeyError as e:
+        return web.json_response({"error": f"missing field: {e.args[0]}"}, status=400)
+    except ValueError as e:
+        return web.json_response({"error": str(e)}, status=400)
+def _make_auth_middleware(expected_token: str | None):
+    @web.middleware
+    async def _auth(request, handler):
+        # Health check is always open — used by liveness probes.
+        if request.path == "/health":
+            return await handler(request)
+        remote = request.remote or ""
+        # Loopback requests skip auth regardless of token setting — local dev UX.
+        if remote in _LOOPBACK_HOSTS:
+            return await handler(request)
+        if not expected_token:
+            # Bound to non-loopback but no token configured: refuse. Prevents
+            # accidentally exposing an unauthenticated server to a network.
+            return web.json_response(
+                {"error": "server is not configured for non-loopback access; set CCE_API_TOKEN"},
+                status=503,
+            )
+        auth_header = request.headers.get("Authorization", "")
+        if not auth_header.startswith("Bearer "):
+            return web.json_response({"error": "missing bearer token"}, status=401)
+        presented = auth_header[len("Bearer "):]
+        if not hmac.compare_digest(presented, expected_token):
+            return web.json_response({"error": "invalid token"}, status=401)
+        return await handler(request)
+    return _auth
+def create_app(backend, embedder, compressor, *, api_token: str | None = None) -> web.Application:
+    handler = ContextEngineHTTP(backend, embedder, compressor)
+    app = web.Application(
+        client_max_size=_MAX_REQUEST_BYTES,
+        middlewares=[_make_auth_middleware(api_token), _error_middleware],
+    )
+    app.router.add_get("/health", handler.handle_health)
+    app.router.add_post("/vector_search", handler.handle_vector_search)
+    app.router.add_post("/fts_search", handler.handle_fts_search)
+    app.router.add_post("/chunks_by_ids", handler.handle_chunks_by_ids)
+    app.router.add_post("/graph_neighbors", handler.handle_graph_neighbors)
+    app.router.add_post("/ingest", handler.handle_ingest)
+    app.router.add_get("/chunk/{chunk_id}", handler.handle_get_chunk)
+    app.router.add_delete("/file/{file_path:.*}", handler.handle_delete_file)
+    return app
+def run_http_server(config=None, host: str = "127.0.0.1", port: int = 8765) -> None:
+    if config is None:
+        project_path = Path.cwd() / PROJECT_CONFIG_NAME
+        config = load_config(project_path=project_path if project_path.exists() else None)
+    project_name = Path.cwd().name
+    storage_base = Path(config.storage_path) / project_name
+    storage_base.mkdir(parents=True, exist_ok=True)
+    backend = LocalBackend(base_path=str(storage_base))
+    embedder = Embedder(model_name=config.embedding_model)
+    compressor = Compressor(model=config.compression_model, cache=backend)
+    api_token = os.environ.get("CCE_API_TOKEN") or None
+    if host not in _LOOPBACK_HOSTS and not api_token:
+        raise SystemExit(
+            f"Refusing to bind {host}:{port} without CCE_API_TOKEN set. "
+            "Either bind --host 127.0.0.1 or export CCE_API_TOKEN=<secret>."
+        )
+    app = create_app(backend, embedder, compressor, api_token=api_token)
+    print(f"Context engine HTTP server starting on {host}:{port}")
+    if api_token:
+        print("Auth: bearer token required for non-loopback requests")
+    web.run_app(app, host=host, port=port, print=None)

context_engine/services.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""Service management for CCE — Ollama and Dashboard start/stop/status.
+PID files live in <storage_base>/pids/ where storage_base is resolved
+from config.yaml (defaults to ~/.cce):
+  ollama.pid       PID of the ollama process CCE started
+  dashboard.pid    PID of the dashboard process CCE started
+  dashboard.port   Port the dashboard is running on
+"""
+from __future__ import annotations
+import logging
+import os
+import signal
+import socket
+import subprocess
+from pathlib import Path
+log = logging.getLogger(__name__)
+_DASHBOARD_DEFAULT_PORT = 8080
+def _storage_base() -> Path:
+    """Resolve storage base from config, falling back to default."""
+    try:
+        from context_engine.config import load_config
+        config = load_config()
+        return Path(config.storage_path).parent
+    except Exception as exc:
+        log.debug("Could not load config for storage base, using default: %s", exc)
+        from context_engine.config import _CCE_HOME
+        return _CCE_HOME
+def _pid_dir() -> Path:
+    d = _storage_base() / "pids"
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+def _read_pid(name: str) -> int | None:
+    p = _pid_dir() / f"{name}.pid"
+    try:
+        return int(p.read_text().strip())
+    except (FileNotFoundError, ValueError):
+        return None
+def _write_pid(name: str, pid: int) -> None:
+    (_pid_dir() / f"{name}.pid").write_text(str(pid))
+def _remove_pid(name: str) -> None:
+    p = _pid_dir() / f"{name}.pid"
+    p.unlink(missing_ok=True)
+def _process_alive(pid: int) -> bool:
+    try:
+        os.kill(pid, 0)
+        return True
+    except ProcessLookupError:
+        return False
+    except PermissionError:
+        # Process exists but owned by another user
+        return True
+def _check_port_open(port: int, host: str = "127.0.0.1") -> bool:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(0.5)
+        return s.connect_ex((host, port)) == 0
+def _ollama_running() -> bool:
+    """Check if Ollama is responding on its default port."""
+    try:
+        import httpx
+        resp = httpx.get("http://localhost:11434/api/tags", timeout=2.0)
+        return resp.status_code == 200
+    except Exception:
+        return False
+def _mcp_running() -> bool:
+    """Check if a cce serve process is running via pgrep (or ps fallback)."""
+    try:
+        result = subprocess.run(
+            ["pgrep", "-f", "cce serve"],
+            capture_output=True, text=True, timeout=3,
+        )
+        if result.returncode == 0:
+            return True
+        # returncode 1 = no matches (normal). Any other code or stderr
+        # suggests pgrep itself failed — fall through to ps fallback.
+        if result.returncode == 1 and not result.stderr.strip():
+            return False
+    except FileNotFoundError:
+        pass
+    except Exception:
+        pass
+    # Fallback: ps with grep exclusion
+    try:
+        result = subprocess.run(
+            ["ps", "aux"], capture_output=True, text=True, timeout=3,
+        )
+        for line in result.stdout.splitlines():
+            if "cce serve" in line and "grep" not in line:
+                return True
+    except Exception:
+        pass
+    return False
+# ── Public status API ─────────────────────────────────────────────────────────
+def get_ollama_status() -> dict:
+    running = _ollama_running()
+    managed_pid = _read_pid("ollama")
+    managed = managed_pid is not None and _process_alive(managed_pid)
+    detail = ""
+    if running:
+        detail = "localhost:11434"
+        if not managed:
+            detail += " (external)"
+    return {
+        "name": "ollama",
+        "running": running,
+        "managed": managed,
+        "detail": detail,
+    }
+def get_dashboard_status() -> dict:
+    port_file = _pid_dir() / "dashboard.port"
+    try:
+        port = int(port_file.read_text().strip())
+    except (FileNotFoundError, ValueError):
+        port = None
+    managed_pid = _read_pid("dashboard")
+    managed = managed_pid is not None and _process_alive(managed_pid)
+    running = False
+    detail = ""
+    if port and _check_port_open(port):
+        running = True
+        detail = f"http://localhost:{port}"
+    elif managed:
+        # PID alive but port not answering yet (starting up)
+        running = True
+        detail = "starting..."
+    return {
+        "name": "dashboard",
+        "running": running,
+        "managed": managed,
+        "port": port,
+        "detail": detail,
+    }
+def get_mcp_status() -> dict:
+    running = _mcp_running()
+    return {
+        "name": "mcp",
+        "running": running,
+        "managed": False,  # always managed by Claude Code
+        "detail": "managed by Claude Code" if running else "",
+    }
+# ── Start/stop ────────────────────────────────────────────────────────────────
+def start_ollama() -> tuple[bool, str]:
+    """Start ollama serve in the background. Returns (success, message)."""
+    if _ollama_running():
+        return False, "Ollama is already running."
+    try:
+        proc = subprocess.Popen(
+            ["ollama", "serve"],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            # Detach from CCE's process group so SIGINT to the CLI doesn't
+            # kill the background daemon. Works on both Linux and macOS.
+            start_new_session=True,
+        )
+        _write_pid("ollama", proc.pid)
+        return True, f"Ollama started (PID {proc.pid})"
+    except FileNotFoundError:
+        return False, "ollama not found. Install: https://ollama.com (or `brew install ollama` on macOS)"
+    except Exception as exc:
+        return False, f"Failed to start Ollama: {exc}"
+def stop_ollama() -> tuple[bool, str]:
+    """Stop the Ollama process CCE started."""
+    pid = _read_pid("ollama")
+    if pid is None:
+        if _ollama_running():
+            return False, "Ollama is running but was not started by CCE (external process)."
+        return False, "Ollama is not running."
+    if not _process_alive(pid):
+        _remove_pid("ollama")
+        return False, "Ollama process already stopped."
+    try:
+        os.kill(pid, signal.SIGTERM)
+        _remove_pid("ollama")
+        return True, f"Ollama stopped (PID {pid})"
+    except Exception as exc:
+        return False, f"Failed to stop Ollama: {exc}"
+def start_dashboard(port: int = _DASHBOARD_DEFAULT_PORT) -> tuple[bool, str]:
+    """Start CCE dashboard as a background process."""
+    status = get_dashboard_status()
+    if status["running"]:
+        return False, f"Dashboard is already running at {status['detail']}"
+    try:
+        from context_engine.utils import resolve_cce_binary
+        cce_bin = resolve_cce_binary()
+        proc = subprocess.Popen(
+            [cce_bin, "dashboard", "--no-browser", "--port", str(port)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            start_new_session=True,
+        )
+        _write_pid("dashboard", proc.pid)
+        (_pid_dir() / "dashboard.port").write_text(str(port))
+        return True, f"Dashboard started at http://localhost:{port} (PID {proc.pid})"
+    except Exception as exc:
+        return False, f"Failed to start dashboard: {exc}"
+def stop_dashboard() -> tuple[bool, str]:
+    """Stop the CCE dashboard process."""
+    pid = _read_pid("dashboard")
+    if pid is None:
+        return False, "Dashboard is not running (no PID on record)."
+    if not _process_alive(pid):
+        _remove_pid("dashboard")
+        (_pid_dir() / "dashboard.port").unlink(missing_ok=True)
+        return False, "Dashboard process already stopped."
+    try:
+        os.kill(pid, signal.SIGTERM)
+        _remove_pid("dashboard")
+        (_pid_dir() / "dashboard.port").unlink(missing_ok=True)
+        return True, f"Dashboard stopped (PID {pid})"
+    except Exception as exc:
+        return False, f"Failed to stop dashboard: {exc}"

context_engine/storage/__init__.py ADDED Viewed

File without changes

context_engine/storage/backend.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Storage backend protocol — implemented by local and remote backends."""
+from typing import Protocol, runtime_checkable
+from context_engine.models import Chunk, GraphNode, GraphEdge, NodeType, EdgeType
+@runtime_checkable
+class StorageBackend(Protocol):
+    async def ingest(
+        self,
+        chunks: list[Chunk],
+        nodes: list[GraphNode],
+        edges: list[GraphEdge],
+    ) -> None: ...
+    async def vector_search(
+        self,
+        query_embedding: list[float],
+        top_k: int = 10,
+        filters: dict | None = None,
+    ) -> list[Chunk]: ...
+    async def graph_neighbors(
+        self,
+        node_id: str,
+        edge_type: EdgeType | None = None,
+    ) -> list[GraphNode]: ...
+    async def get_chunk_by_id(self, chunk_id: str) -> Chunk | None: ...
+    async def delete_by_file(self, file_path: str) -> None: ...
+    async def fts_search(
+        self,
+        query: str,
+        top_k: int = 30,
+    ) -> list[tuple[str, float]]: ...
+    async def get_chunks_by_ids(self, chunk_ids: list[str]) -> list[Chunk]: ...

context_engine/storage/fts_store.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""SQLite FTS5 full-text search store."""
+import asyncio
+import logging
+import os
+import sqlite3
+from threading import RLock
+from context_engine.models import Chunk
+log = logging.getLogger(__name__)
+_MAX_CONTENT_CHARS = 5_000
+def _escape_fts5(query: str) -> str:
+    """Wrap user input as an FTS5 phrase to avoid operator injection."""
+    return '"' + query.replace('"', '""') + '"'
+class FTSStore:
+    """Single-connection SQLite FTS store, serialised with an RLock.
+    `check_same_thread=False` only disables thread ownership checks; concurrent
+    operations on one sqlite3 connection are still unsafe. Mirrors VectorStore's
+    locking pattern so dashboard/MCP/reindex calls running through asyncio
+    .to_thread don't interleave on the connection.
+    """
+    def __init__(self, db_path: str) -> None:
+        os.makedirs(db_path, exist_ok=True)
+        self._lock = RLock()
+        self._conn = sqlite3.connect(
+            os.path.join(db_path, "fts.db"), check_same_thread=False
+        )
+        with self._lock:
+            self._conn.execute(
+                "CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts "
+                "USING fts5(id UNINDEXED, content, file_path, language, chunk_type)"
+            )
+            self._conn.commit()
+    def _ingest_sync(self, chunks: list[Chunk]) -> None:
+        # executemany packs all rows into one prepared-statement batch — about
+        # 30-50% faster than the per-row INSERT loop on 1000+ chunks.
+        rows = [
+            (
+                chunk.id,
+                chunk.content[:_MAX_CONTENT_CHARS] if len(chunk.content) > _MAX_CONTENT_CHARS else chunk.content,
+                chunk.file_path,
+                chunk.language,
+                chunk.chunk_type.value,
+            )
+            for chunk in chunks
+        ]
+        with self._lock:
+            self._conn.executemany(
+                "INSERT OR REPLACE INTO chunks_fts(id, content, file_path, language, chunk_type) "
+                "VALUES (?, ?, ?, ?, ?)",
+                rows,
+            )
+            self._conn.commit()
+    def _search_sync(self, escaped_query: str, top_k: int) -> list[tuple[str, float]]:
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT id, rank FROM chunks_fts WHERE chunks_fts MATCH ? "
+                "ORDER BY rank LIMIT ?",
+                (escaped_query, top_k),
+            )
+            return [(row[0], float(row[1])) for row in cursor.fetchall()]
+    def _delete_sync(self, file_path: str) -> None:
+        with self._lock:
+            self._conn.execute(
+                "DELETE FROM chunks_fts WHERE file_path = ?", (file_path,)
+            )
+            self._conn.commit()
+    def _delete_files_sync(self, file_paths: list[str]) -> None:
+        if not file_paths:
+            return
+        from context_engine.utils import batched_params
+        with self._lock:
+            for batch in batched_params(file_paths):
+                placeholders = ",".join("?" * len(batch))
+                self._conn.execute(
+                    f"DELETE FROM chunks_fts WHERE file_path IN ({placeholders})",
+                    batch,
+                )
+            self._conn.commit()
+    async def ingest(self, chunks: list[Chunk]) -> None:
+        if not chunks:
+            return
+        await asyncio.to_thread(self._ingest_sync, chunks)
+    async def search(self, query: str, top_k: int = 30) -> list[tuple[str, float]]:
+        if not query.strip():
+            return []
+        return await asyncio.to_thread(self._search_sync, _escape_fts5(query), top_k)
+    def clear(self) -> None:
+        with self._lock:
+            self._conn.execute("DELETE FROM chunks_fts")
+            self._conn.commit()
+    async def delete_by_file(self, file_path: str) -> None:
+        await asyncio.to_thread(self._delete_sync, file_path)
+    async def delete_by_files(self, file_paths: list[str]) -> None:
+        await asyncio.to_thread(self._delete_files_sync, file_paths)