PyPI - code-context-engine - Versions diffs - 0.4.0__py3-none-any.whl - Mend

code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

code_context_engine-0.4.0.dist-info/METADATA +389 -0
code_context_engine-0.4.0.dist-info/RECORD +63 -0
code_context_engine-0.4.0.dist-info/WHEEL +5 -0
code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
context_engine/__init__.py +3 -0
context_engine/cli.py +2848 -0
context_engine/cli_style.py +66 -0
context_engine/compression/__init__.py +0 -0
context_engine/compression/compressor.py +144 -0
context_engine/compression/ollama_client.py +33 -0
context_engine/compression/output_rules.py +77 -0
context_engine/compression/prompts.py +9 -0
context_engine/compression/quality.py +37 -0
context_engine/config.py +198 -0
context_engine/dashboard/__init__.py +0 -0
context_engine/dashboard/_page.py +1548 -0
context_engine/dashboard/server.py +429 -0
context_engine/editors.py +265 -0
context_engine/event_bus.py +24 -0
context_engine/indexer/__init__.py +0 -0
context_engine/indexer/chunker.py +147 -0
context_engine/indexer/embedder.py +154 -0
context_engine/indexer/embedding_cache.py +168 -0
context_engine/indexer/git_hooks.py +73 -0
context_engine/indexer/git_indexer.py +136 -0
context_engine/indexer/ignorefile.py +96 -0
context_engine/indexer/manifest.py +78 -0
context_engine/indexer/pipeline.py +624 -0
context_engine/indexer/secrets.py +332 -0
context_engine/indexer/watcher.py +109 -0
context_engine/integration/__init__.py +0 -0
context_engine/integration/bootstrap.py +76 -0
context_engine/integration/git_context.py +132 -0
context_engine/integration/mcp_server.py +1825 -0
context_engine/integration/session_capture.py +306 -0
context_engine/memory/__init__.py +6 -0
context_engine/memory/compressor.py +344 -0
context_engine/memory/db.py +922 -0
context_engine/memory/extractive.py +106 -0
context_engine/memory/grammar.py +419 -0
context_engine/memory/hook_installer.py +258 -0
context_engine/memory/hook_server.py +83 -0
context_engine/memory/hooks.py +327 -0
context_engine/memory/migrate.py +268 -0
context_engine/models.py +96 -0
context_engine/pricing.py +104 -0
context_engine/project_commands.py +296 -0
context_engine/retrieval/__init__.py +0 -0
context_engine/retrieval/confidence.py +47 -0
context_engine/retrieval/query_parser.py +105 -0
context_engine/retrieval/retriever.py +199 -0
context_engine/serve_http.py +208 -0
context_engine/services.py +252 -0
context_engine/storage/__init__.py +0 -0
context_engine/storage/backend.py +39 -0
context_engine/storage/fts_store.py +112 -0
context_engine/storage/graph_store.py +219 -0
context_engine/storage/local_backend.py +109 -0
context_engine/storage/remote_backend.py +117 -0
context_engine/storage/vector_store.py +357 -0
context_engine/utils.py +72 -0

context_engine/integration/session_capture.py ADDED Viewed

@@ -0,0 +1,306 @@
+"""Session history capture — records decisions, code areas, and Q&A for future recall."""
+import json
+import logging
+import threading
+import time
+import uuid
+from pathlib import Path
+from context_engine.utils import atomic_write_text as _atomic_write_text
+log = logging.getLogger(__name__)
+# Once a project accumulates more session JSONs than this, the oldest are
+# consolidated into decisions_log.json (decisions only — the durable signal)
+# and the source files are removed. The most recent _PRUNE_KEEP files are
+# always preserved verbatim.
+_PRUNE_THRESHOLD = 100
+_PRUNE_KEEP = 50
+_DECISIONS_LOG_NAME = "decisions_log.json"
+class SessionCapture:
+    """Thread-safe session log. All `_active` access goes through `_lock` so
+    concurrent MCP tool calls (e.g. record_decision while end_session flushes)
+    can't interleave a half-mutation."""
+    def __init__(self, sessions_dir: str) -> None:
+        self._sessions_dir = sessions_dir
+        Path(sessions_dir).mkdir(parents=True, exist_ok=True)
+        self._active: dict[str, dict] = {}
+        self._lock = threading.RLock()
+    def start_session(self, project_name: str) -> str:
+        session_id = uuid.uuid4().hex[:12]
+        with self._lock:
+            self._active[session_id] = {
+                "id": session_id, "project": project_name, "started_at": time.time(),
+                "decisions": [], "code_areas": [], "questions": [],
+                # touched_files: per-file count of how many times the chunk was
+                # surfaced or opened during the session. Auto-captured by the
+                # MCP server so even sessions where Claude never explicitly
+                # calls `record_code_area` leave a useful breadcrumb.
+                "touched_files": {},
+            }
+        return session_id
+    def record_decision(self, session_id, decision, reason):
+        with self._lock:
+            session = self._active.get(session_id)
+            if session:
+                session["decisions"].append({"decision": decision, "reason": reason, "timestamp": time.time()})
+    def record_code_area(self, session_id, file_path, description):
+        with self._lock:
+            session = self._active.get(session_id)
+            if session:
+                session["code_areas"].append({"file_path": file_path, "description": description, "timestamp": time.time()})
+    def touch_files(self, session_id, file_paths) -> None:
+        """Bump the touched-files counter for each path. Auto-called by the
+        MCP server whenever a result references a file or a chunk is opened.
+        Cheap (in-memory dict update); persisted on the next flush."""
+        if not file_paths:
+            return
+        with self._lock:
+            session = self._active.get(session_id)
+            if not session:
+                return
+            counts = session.setdefault("touched_files", {})
+            for fp in file_paths:
+                if not fp or fp.startswith("git:"):
+                    continue
+                counts[fp] = counts.get(fp, 0) + 1
+    def get_session_snapshot(self, session_id) -> dict | None:
+        """Return a shallow copy of the active session for safe inspection.
+        Returns None if the session_id isn't in _active."""
+        with self._lock:
+            session = self._active.get(session_id)
+            if session is None:
+                return None
+            return dict(session)
+    def get_decisions(self, session_id):
+        with self._lock:
+            session = self._active.get(session_id)
+            # Defensive copy so the caller can iterate without holding the lock.
+            return list(session["decisions"]) if session else []
+    def get_code_areas(self, session_id):
+        with self._lock:
+            session = self._active.get(session_id)
+            return list(session["code_areas"]) if session else []
+    def end_session(self, session_id):
+        with self._lock:
+            session = self._active.pop(session_id, None)
+        if session:
+            session["ended_at"] = time.time()
+            file_path = Path(self._sessions_dir) / f"{session_id}.json"
+            _atomic_write_text(file_path, json.dumps(session, indent=2))
+    def load_recent_sessions(self, limit=5):
+        sessions_path = Path(self._sessions_dir)
+        files = [
+            f for f in sessions_path.glob("*.json")
+            # decisions_log.json is the consolidated archive, not a session.
+            if f.name != _DECISIONS_LOG_NAME
+        ]
+        files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+        sessions = []
+        for f in files[:limit]:
+            try:
+                with open(f) as fp:
+                    sessions.append(json.load(fp))
+            except (json.JSONDecodeError, OSError):
+                # Skip corrupt session files; don't blow up recall.
+                continue
+        return sessions
+    def prune_old_sessions(
+        self,
+        threshold: int = _PRUNE_THRESHOLD,
+        keep: int = _PRUNE_KEEP,
+    ) -> dict:
+        """Consolidate old session JSONs into decisions_log.json + delete them.
+        Triggered automatically at server start when there are more than
+        `threshold` session files; can also be run from the CLI as
+        `cce sessions prune`. Returns a summary dict so the caller can report.
+        Only the *decisions* (and their reasons + timestamps + originating
+        session id) survive consolidation. code_areas and questions in old
+        sessions are dropped — they were heuristic auto-captures and the
+        signal-to-noise drops fast as they age.
+        Uses an fcntl advisory lock on `.prune.lock` in the sessions dir so
+        two processes can't race the read-append-write on decisions_log.json
+        (last-write-wins would clobber one process's appended decisions).
+        On Windows fcntl is unavailable; we fall through without a lock and
+        accept the rare race — Windows isn't a supported deploy target today.
+        """
+        sessions_path = Path(self._sessions_dir)
+        sessions_path.mkdir(parents=True, exist_ok=True)
+        lock_path = sessions_path / ".prune.lock"
+        # Acquire an exclusive flock; fall back to no-op on platforms where
+        # fcntl isn't available so the prune still runs (just unlocked).
+        lock_fh = None
+        try:
+            import fcntl  # POSIX only; ImportError on Windows
+            lock_fh = open(lock_path, "w")
+            try:
+                fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+            except BlockingIOError:
+                # Another process is pruning right now — let it finish.
+                lock_fh.close()
+                return {"pruned": 0, "kept": -1, "reason": "another prune in progress"}
+        except ImportError:
+            lock_fh = None
+        try:
+            return self._prune_locked(sessions_path, threshold, keep)
+        finally:
+            if lock_fh is not None:
+                try:
+                    import fcntl
+                    fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
+                except Exception:
+                    pass
+                lock_fh.close()
+    def _prune_locked(
+        self,
+        sessions_path: Path,
+        threshold: int,
+        keep: int,
+    ) -> dict:
+        """The actual prune work. Caller holds the cross-process flock."""
+        files = sorted(
+            (f for f in sessions_path.glob("*.json") if f.name != _DECISIONS_LOG_NAME),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+        if len(files) <= threshold:
+            return {"pruned": 0, "kept": len(files), "reason": "below threshold"}
+        keep_files = files[:keep]
+        old_files = files[keep:]
+        log_path = sessions_path / _DECISIONS_LOG_NAME
+        existing: list[dict] = []
+        if log_path.exists():
+            try:
+                existing = json.loads(log_path.read_text())
+                if not isinstance(existing, list):
+                    existing = []
+            except (json.JSONDecodeError, OSError):
+                existing = []
+        appended = 0
+        for f in old_files:
+            if f == log_path:
+                continue
+            try:
+                data = json.loads(f.read_text())
+            except (json.JSONDecodeError, OSError) as exc:
+                log.warning("Skipping unreadable session file %s: %s", f, exc)
+                continue
+            for d in data.get("decisions", []):
+                existing.append({
+                    "decision": d.get("decision", ""),
+                    "reason": d.get("reason", ""),
+                    "timestamp": d.get("timestamp", 0.0),
+                    "session_id": data.get("id", ""),
+                })
+                appended += 1
+        try:
+            _atomic_write_text(log_path, json.dumps(existing, indent=2))
+        except OSError as exc:
+            log.warning("Failed to write decisions_log: %s", exc)
+            return {"pruned": 0, "kept": len(files), "reason": f"write failed: {exc}"}
+        deleted = 0
+        for f in old_files:
+            if f == log_path:
+                continue
+            try:
+                f.unlink()
+                deleted += 1
+            except OSError as exc:
+                log.warning("Failed to remove old session %s: %s", f, exc)
+        return {
+            "pruned": deleted,
+            "kept": len(keep_files),
+            "decisions_appended": appended,
+            "decisions_log": str(log_path),
+        }
+    def _load_consolidated_decisions(self) -> list[dict]:
+        """Read decisions_log.json (the consolidated archive). Returns []
+        when absent or unreadable — never raises."""
+        log_path = Path(self._sessions_dir) / _DECISIONS_LOG_NAME
+        if not log_path.exists():
+            return []
+        try:
+            data = json.loads(log_path.read_text())
+            return data if isinstance(data, list) else []
+        except (json.JSONDecodeError, OSError):
+            return []
+    def get_recent_decisions(self, limit: int = 10, session_limit: int = 50) -> list[str]:
+        """Return the most-recent decision strings across recent sessions.
+        Used by the bootstrap prompt to inject prior decisions at session
+        start without relying on a topic-grep that often returns nothing.
+        Includes any decisions in the currently active in-memory session.
+        Order: newest first by recorded timestamp.
+        """
+        decisions: list[tuple[float, str]] = []
+        # Active in-memory sessions first (may not yet be flushed to disk).
+        # Snapshot under the lock so a concurrent record_decision can't mutate
+        # the list while we're iterating it.
+        import copy
+        with self._lock:
+            active_snapshot = copy.deepcopy(list(self._active.values()))
+        for session in active_snapshot:
+            for d in session.get("decisions", []):
+                ts = d.get("timestamp", 0.0)
+                text = (
+                    f"[decision] {d.get('decision', '')} — {d.get('reason', '')}"
+                )
+                decisions.append((ts, text))
+        for session in self.load_recent_sessions(limit=session_limit):
+            for d in session.get("decisions", []):
+                ts = d.get("timestamp", 0.0)
+                text = (
+                    f"[decision] {d.get('decision', '')} — {d.get('reason', '')}"
+                )
+                decisions.append((ts, text))
+        # Pull from the consolidated archive as well — `prune_old_sessions`
+        # writes decisions there before deleting the source files, so without
+        # this step a recall on a long-lived project would forget anything
+        # past the most-recent session_limit files.
+        for d in self._load_consolidated_decisions():
+            ts = d.get("timestamp", 0.0)
+            text = (
+                f"[decision] {d.get('decision', '')} — {d.get('reason', '')}"
+            )
+            decisions.append((ts, text))
+        # Dedup keeping the newest occurrence of each text.
+        seen: set[str] = set()
+        ordered: list[str] = []
+        for _, text in sorted(decisions, key=lambda pair: pair[0], reverse=True):
+            if text in seen:
+                continue
+            seen.add(text)
+            ordered.append(text)
+            if len(ordered) >= limit:
+                break
+        return ordered

context_engine/memory/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Per-project memory store — SQLite tables backing cross-session recall.
+This package introduces the new memory.db storage. The legacy JSON-per-session
+capture path in `context_engine.integration.session_capture` continues to work
+unchanged; it is retired in a follow-up PR once hooks land.
+"""

context_engine/memory/compressor.py ADDED Viewed

@@ -0,0 +1,344 @@
+"""Background compression worker for the memory store.
+Drains `pending_compressions` rows on a fixed interval, calls the extractive
+summariser for each, writes the result to `turn_summaries` (or
+`sessions.rollup_summary` for kind='session_rollup'), and removes the queue
+row. Failures bump the row's `attempts` and log; the row remains queued for
+retry on the next pass.
+Designed to run as an asyncio task inside `cce serve`. Single-flight by
+construction — only one worker drains at a time.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import sqlite3
+import time
+from context_engine.memory import db as memory_db
+from context_engine.memory.extractive import extractive_summary, truncation_summary
+from context_engine.memory.grammar import (
+    compress as _grammar_compress,
+    compress_with_counts as _grammar_compress_counted,
+    DEFAULT_LEVEL as _GRAMMAR_LEVEL,
+)
+def _approx_tokens(text: str) -> int:
+    """Cheap heuristic — chars // 4. Matches mcp_server._count_tokens so
+    bucket totals across writers stay comparable.
+    """
+    return max(1, len(text) // 4) if text else 0
+log = logging.getLogger(__name__)
+_DEFAULT_TURN_TOP_K = 3
+_DEFAULT_ROLLUP_TOP_K = 5
+_DEFAULT_INTERVAL_SECONDS = 5.0
+_TOOL_OUTPUT_CHAR_CAP = 1500  # avoid embedding multi-MB tool outputs
+_TOOL_INPUT_CHAR_CAP = 4000  # skip JSON parsing for huge tool inputs (e.g. patches)
+def compress_turn(
+    conn: sqlite3.Connection,
+    *,
+    session_id: str,
+    prompt_number: int,
+    embedder,
+) -> str:
+    """Compute and persist a turn summary. Returns the summary text.
+    Two compression passes apply:
+      1. Extractive: pick the top-K most central sentences from the turn
+         (the existing `_summarise` step).
+      2. Grammar: drop articles/fillers from prose tokens; structured
+         tokens (paths, identifiers, code) survive byte-for-byte.
+    The returned text is the post-grammar form (what the model will see
+    after expand() on the read side).
+    """
+    text = _build_turn_text(conn, session_id=session_id, prompt_number=prompt_number)
+    raw_tokens = _approx_tokens(text)
+    summary, tier = _summarise(text, embedder=embedder, top_k=_DEFAULT_TURN_TOP_K)
+    extractive_tokens = _approx_tokens(summary)
+    if summary:
+        # Scrub PII before grammar compression — emails / IPs / SSNs that
+        # leaked into a turn (the user pasted a real value into a prompt
+        # or tool input) shouldn't end up indexed in turn_summaries.
+        summary = memory_db.scrub_pii(summary)
+        summary, gram_raw, gram_comp = _grammar_compress_counted(
+            summary, level=_GRAMMAR_LEVEL,
+        )
+        memory_db.record_savings(
+            conn, bucket="grammar", baseline=gram_raw, served=gram_comp,
+        )
+    # Turn-summarization savings: raw turn text (prompt + tool inputs/outputs)
+    # vs the extractive summary that ends up in turn_summaries.
+    if raw_tokens > 0 and extractive_tokens > 0:
+        memory_db.record_savings(
+            conn, bucket="turn_summarization",
+            baseline=raw_tokens, served=extractive_tokens,
+            meta={"kind": "turn", "tier": tier},
+        )
+    epoch = int(time.time())
+    cur = conn.execute(
+        "INSERT OR REPLACE INTO turn_summaries "
+        "(session_id, prompt_number, summary, tier, created_at_epoch) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (session_id, prompt_number, summary, tier, epoch),
+    )
+    if summary:
+        memory_db.record_turn_summary_vec(
+            conn, embedder, turn_id=cur.lastrowid, summary=summary,
+        )
+    return summary
+def compress_session_rollup(
+    conn: sqlite3.Connection,
+    *,
+    session_id: str,
+    embedder,
+) -> str:
+    """Compute the session rollup summary from existing turn summaries.
+    If a session has no turn_summaries yet (e.g. SessionEnd fired before the
+    worker drained any turns), we fall through to an empty rollup; the
+    session row is still updated so the timeline view shows it as completed.
+    """
+    rows = list(conn.execute(
+        "SELECT summary FROM turn_summaries WHERE session_id = ? "
+        "ORDER BY prompt_number ASC",
+        (session_id,),
+    ))
+    text = "\n".join(r["summary"] for r in rows if r["summary"])
+    raw_tokens = _approx_tokens(text)
+    if not text:
+        rollup = ""
+        tier = "empty"
+    else:
+        rollup, tier = _summarise(text, embedder=embedder, top_k=_DEFAULT_ROLLUP_TOP_K)
+        extractive_tokens = _approx_tokens(rollup)
+        # Belt-and-braces PII scrub on the rollup. Each turn summary
+        # already went through scrub_pii in compress_turn(), but the
+        # rollup is the long-lived "canonical history" view of a
+        # session — worth re-scrubbing in case a turn slipped through.
+        rollup = memory_db.scrub_pii(rollup)
+        # Re-pass through grammar — turn summaries are already compressed,
+        # so this is mostly idempotent, but extractive may concatenate
+        # sentences with newlines that re-introduce articles via the join
+        # mechanics. Cheap, makes the on-disk form consistent.
+        rollup, gram_raw, gram_comp = _grammar_compress_counted(
+            rollup, level=_GRAMMAR_LEVEL,
+        )
+        memory_db.record_savings(
+            conn, bucket="grammar", baseline=gram_raw, served=gram_comp,
+        )
+        if raw_tokens > 0 and extractive_tokens > 0:
+            memory_db.record_savings(
+                conn, bucket="turn_summarization",
+                baseline=raw_tokens, served=extractive_tokens,
+                meta={"kind": "session_rollup", "tier": tier},
+            )
+    epoch = int(time.time())
+    conn.execute(
+        "UPDATE sessions SET rollup_summary = ?, rollup_summary_at_epoch = ? "
+        "WHERE id = ?",
+        (rollup, epoch, session_id),
+    )
+    log.debug("session rollup tier=%s len=%d", tier, len(rollup))
+    return rollup
+def _build_turn_text(
+    conn: sqlite3.Connection,
+    *,
+    session_id: str,
+    prompt_number: int,
+) -> str:
+    """Concatenate prompt + tool inputs/outputs into one big text blob."""
+    parts: list[str] = []
+    prompt = conn.execute(
+        "SELECT prompt_text FROM prompts WHERE session_id = ? AND prompt_number = ?",
+        (session_id, prompt_number),
+    ).fetchone()
+    if prompt and prompt["prompt_text"]:
+        parts.append(f"User: {prompt['prompt_text']}")
+    events = conn.execute(
+        "SELECT te.tool_name, p.raw_input, p.raw_output FROM tool_events te "
+        "LEFT JOIN tool_event_payloads p ON p.id = te.payload_id "
+        "WHERE te.session_id = ? AND te.prompt_number = ? "
+        "ORDER BY te.id ASC",
+        (session_id, prompt_number),
+    ).fetchall()
+    for ev in events:
+        descriptor = _describe_input(ev["tool_name"], ev["raw_input"] or "")
+        parts.append(descriptor)
+        out = (ev["raw_output"] or "").strip()
+        if out:
+            if len(out) > _TOOL_OUTPUT_CHAR_CAP:
+                out = out[:_TOOL_OUTPUT_CHAR_CAP] + "…"
+            parts.append(out)
+    return "\n".join(parts)
+def _describe_input(tool_name: str, raw_input: str) -> str:
+    """One-line descriptor of a tool invocation for the summary candidates."""
+    if not raw_input:
+        return tool_name
+    # Skip JSON parsing on oversize payloads (patches, large file contents) —
+    # the compression worker runs on the asyncio thread and we don't want it
+    # spending tens of ms parsing megabytes just to format a one-liner.
+    if len(raw_input) > _TOOL_INPUT_CHAR_CAP:
+        return f"{tool_name}: {raw_input[:120]}"
+    try:
+        data = json.loads(raw_input)
+    except (json.JSONDecodeError, ValueError):
+        return f"{tool_name}: {raw_input[:120]}"
+    if not isinstance(data, dict):
+        return f"{tool_name}: {raw_input[:120]}"
+    # Surface common high-signal fields explicitly.
+    for key in ("file_path", "command", "pattern", "path", "query"):
+        if key in data and data[key]:
+            return f"{tool_name} {key}={data[key]!r}"
+    keys = list(data.keys())[:2]
+    return f"{tool_name} {keys}"
+def _summarise(text: str, *, embedder, top_k: int) -> tuple[str, str]:
+    """Run extractive summarisation, falling back to truncation on failure."""
+    if not text.strip():
+        return "", "empty"
+    if embedder is None:
+        return truncation_summary(text), "truncation"
+    try:
+        out = extractive_summary(text, embedder=embedder, top_k=top_k)
+        return out, "extractive"
+    except Exception:
+        log.exception("extractive failed; falling back to truncation")
+        return truncation_summary(text), "truncation"
+def _drain_one_sync(conn: sqlite3.Connection, embedder) -> bool:
+    """Pop and process the oldest pending row. Pure-sync; safe for either the
+    main thread (tests) or a worker thread (production via to_thread).
+    Returns True iff work was done.
+    """
+    row = conn.execute(
+        "SELECT id, kind, session_id, prompt_number, attempts FROM pending_compressions "
+        "ORDER BY enqueued_at_epoch ASC LIMIT 1"
+    ).fetchone()
+    if row is None:
+        return False
+    try:
+        if row["kind"] == "turn":
+            compress_turn(
+                conn,
+                session_id=row["session_id"],
+                prompt_number=row["prompt_number"],
+                embedder=embedder,
+            )
+        else:
+            compress_session_rollup(
+                conn,
+                session_id=row["session_id"],
+                embedder=embedder,
+            )
+        conn.execute("DELETE FROM pending_compressions WHERE id = ?", (row["id"],))
+        conn.commit()
+    except Exception as exc:
+        log.exception("Compression failed for %s/%s/%s",
+                      row["kind"], row["session_id"], row["prompt_number"])
+        conn.execute(
+            "UPDATE pending_compressions SET attempts = attempts + 1, "
+            "last_error = ? WHERE id = ?",
+            (str(exc)[:500], row["id"]),
+        )
+        conn.commit()
+    return True
+def _drain_one_threaded(db_path) -> bool:
+    """Open a worker-local connection, drain one, close. Designed to run on a
+    thread via `asyncio.to_thread` — that's the whole point of this function:
+    every byte of work below the to_thread call lives off the asyncio loop so
+    `mcp.run_stdio()` stays responsive even under a 50-turn backlog.
+    """
+    # Importing here avoids a circular import at module load.
+    from context_engine.memory import db as _memory_db
+    conn = _memory_db.connect(db_path)
+    try:
+        # Resolve the embedder lazily so the worker thread doesn't pin a
+        # cross-thread reference; the embedder is process-global anyway.
+        from context_engine.indexer.embedder import Embedder as _EmbedderCls  # noqa: F401
+        # Embedder is held by the caller — see compression_loop's closure.
+        return _drain_one_sync(conn, _drain_one_threaded._embedder)
+    finally:
+        conn.close()
+async def _drain_one(conn: sqlite3.Connection, embedder) -> bool:
+    """Async test-only shim around `_drain_one_sync` for tests that already
+    own a connection and don't want to pay the open/close round-trip.
+    """
+    return _drain_one_sync(conn, embedder)
+_BACKLOG_BATCH = 5  # drain at most this many items before yielding to other tasks
+async def compression_loop(
+    db_path,
+    embedder,
+    *,
+    interval_seconds: float = _DEFAULT_INTERVAL_SECONDS,
+    stop_event: asyncio.Event | None = None,
+) -> None:
+    """Run forever, draining the queue off the asyncio thread.
+    Each iteration runs the heavy work (embed + SQLite write) on a worker
+    thread via `asyncio.to_thread`, so `mcp.run_stdio()` stays responsive
+    under backlog. We still pace with sleep(0) per item and a 50 ms breath
+    every `_BACKLOG_BATCH` items to keep CPU contention bounded.
+    `db_path` may also be a `sqlite3.Connection` for compatibility with the
+    test suite, in which case we drive `_drain_one_sync` directly.
+    """
+    legacy_conn = isinstance(db_path, sqlite3.Connection)
+    # Stash the embedder on the function for the worker thread to read; this
+    # avoids passing it through asyncio.to_thread's positional plumbing while
+    # keeping the thread closure-free (no risk of capturing the asyncio loop).
+    _drain_one_threaded._embedder = embedder
+    consecutive = 0
+    while True:
+        if stop_event is not None and stop_event.is_set():
+            return
+        try:
+            if legacy_conn:
+                did_work = _drain_one_sync(db_path, embedder)
+            else:
+                did_work = await asyncio.to_thread(
+                    _drain_one_threaded, db_path,
+                )
+            if did_work:
+                consecutive += 1
+                if consecutive >= _BACKLOG_BATCH:
+                    consecutive = 0
+                    await asyncio.sleep(0.05)
+                else:
+                    await asyncio.sleep(0)
+            else:
+                consecutive = 0
+                await asyncio.sleep(interval_seconds)
+        except asyncio.CancelledError:
+            raise
+        except Exception:
+            log.exception("compression_loop iteration crashed; backing off")
+            consecutive = 0
+            await asyncio.sleep(interval_seconds)