PyPI - deja-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

deja-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

deja/__init__.py +0 -0
deja/config.py +127 -0
deja/core/__init__.py +0 -0
deja/core/extractor.py +135 -0
deja/core/reflection.py +364 -0
deja/core/scheduler.py +65 -0
deja/core/store.py +1413 -0
deja/ingest/__init__.py +0 -0
deja/ingest/watchers/__init__.py +0 -0
deja/ingest/watchers/base.py +143 -0
deja/ingest/watchers/claude_code.py +62 -0
deja/ingest/watchers/codex_cli.py +95 -0
deja/ingest/watchers/gemini_cli.py +96 -0
deja/interfaces/__init__.py +0 -0
deja/interfaces/cli.py +1967 -0
deja/interfaces/mcp_server.py +96 -0
deja/interfaces/web.py +104 -0
deja/interfaces/web_ui/index.html +614 -0
deja/llm/__init__.py +0 -0
deja/llm/base.py +34 -0
deja/llm/embedding.py +45 -0
deja/llm/factory.py +90 -0
deja/llm/providers/__init__.py +0 -0
deja/llm/providers/anthropic.py +21 -0
deja/llm/providers/ollama.py +30 -0
deja/main.py +4 -0
deja_cli-0.1.0.dist-info/METADATA +100 -0
deja_cli-0.1.0.dist-info/RECORD +31 -0
deja_cli-0.1.0.dist-info/WHEEL +4 -0
deja_cli-0.1.0.dist-info/entry_points.txt +3 -0
deja_cli-0.1.0.dist-info/licenses/LICENSE +21 -0

deja/core/store.py ADDED Viewed

@@ -0,0 +1,1413 @@
+from __future__ import annotations
+import math
+import struct
+import sys
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+from typing import Any, Optional
+import aiosqlite
+from ulid import ULID
+from deja.config import Config
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def _token_overlap(a: str, b: str) -> float:
+    """Simple token overlap ratio between two strings."""
+    tokens_a = set(a.lower().split())
+    tokens_b = set(b.lower().split())
+    if not tokens_a or not tokens_b:
+        return 0.0
+    intersection = tokens_a & tokens_b
+    union = tokens_a | tokens_b
+    return len(intersection) / len(union)
+# ── embedding helpers ──────────────────────────────────────────────────────────
+def _emb_to_bytes(embedding: list[float]) -> bytes:
+    return struct.pack(f"{len(embedding)}f", *embedding)
+def _bytes_to_emb(data: bytes) -> list[float]:
+    n = len(data) // 4
+    return list(struct.unpack(f"{n}f", data))
+def _cosine_similarity(a: list[float], b: list[float]) -> float:
+    dot = sum(x * y for x, y in zip(a, b))
+    mag_a = math.sqrt(sum(x * x for x in a))
+    mag_b = math.sqrt(sum(x * x for x in b))
+    if mag_a == 0.0 or mag_b == 0.0:
+        return 0.0
+    return dot / (mag_a * mag_b)
+# ── activation ranking helpers ─────────────────────────────────────────────────
+def _activation_score(
+    mem: dict,
+    task_match: float,
+    project: Optional[str],
+    now_dt: datetime,
+) -> float:
+    """Score a memory for activation ranking.
+    score = task_match * 2 + confidence + recency * 0.5 + reuse_norm * 0.5 + scope_fit
+    All components are in [0, 1] except task_match which is weighted 2×
+    because retrieval relevance is the primary signal.
+    """
+    try:
+        created_str = mem.get("created_at", "")
+        created = datetime.fromisoformat(created_str)
+        if created.tzinfo is None:
+            created = created.replace(tzinfo=timezone.utc)
+        days_old = (now_dt - created).days
+    except Exception:
+        days_old = 0
+    recency = max(0.0, 1.0 - days_old / 365.0)
+    reuse_norm = min(mem.get("reuse_count", 0), 10) / 10.0
+    scope_fit = 0.1 if (project and mem.get("project") == project) else 0.0
+    confidence = mem.get("confidence", 1.0)
+    return task_match * 2.0 + confidence + recency * 0.5 + reuse_norm * 0.5 + scope_fit
+def _apply_confusability_penalty(
+    scored: list[tuple[dict, Optional[bytes], float]],
+) -> list[tuple[dict, Optional[bytes], float]]:
+    """Down-rank procedures highly similar to a higher-ranked procedure.
+    Processes in score-descending order. For each procedure, if its embedding is
+    >0.85 cosine-similar to any higher-ranked procedure, multiply its score by 0.6.
+    Only applies when the memory has a stored embedding.
+    """
+    scored_sorted = sorted(scored, key=lambda x: x[2], reverse=True)
+    claimed_embeddings: list[list[float]] = []
+    result: list[tuple[dict, Optional[bytes], float]] = []
+    for mem, emb_bytes, score in scored_sorted:
+        if mem.get("type") == "procedure" and emb_bytes:
+            mem_emb = _bytes_to_emb(emb_bytes)
+            if any(
+                _cosine_similarity(mem_emb, higher) > 0.85
+                for higher in claimed_embeddings
+            ):
+                score *= 0.6
+            else:
+                claimed_embeddings.append(mem_emb)
+        result.append((mem, emb_bytes, score))
+    return result
+def _strip_embedding(mem: dict) -> dict:
+    """Remove the binary embedding field before returning to callers."""
+    m = dict(mem)
+    m.pop("embedding", None)
+    return m
+# ── load budgeting ─────────────────────────────────────────────────────────────
+DEFAULT_LOAD_SLOTS: dict[str, int] = {
+    "preference": 5,
+    "gotcha": 5,
+    "decision": 5,
+    "pattern": 5,
+    "procedure": 3,
+    "progress": 3,  # only if updated within the last 7 days
+}
+def _parse_dt(dt_str: str) -> datetime:
+    """Parse an ISO datetime string, defaulting to UTC epoch on failure."""
+    try:
+        dt = datetime.fromisoformat(dt_str)
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=timezone.utc)
+        return dt
+    except Exception:
+        return datetime(1970, 1, 1, tzinfo=timezone.utc)
+# ── store ──────────────────────────────────────────────────────────────────────
+_GLOBAL_PROJECT_KEY = "__global__"
+def _project_meta_key(project: Optional[str]) -> str:
+    return project if project is not None else _GLOBAL_PROJECT_KEY
+class MemoryStore:
+    """SQLite-backed memory store with FTS5 full-text search and confidence scoring.
+    Confidence lifecycle
+    --------------------
+    Each memory has a ``confidence`` float in [0.0, 1.0] that tracks reliability:
+    - **Initial value** — set by the caller on ``save()``. Manual ``deja save`` calls
+      default to 1.0. LLM-extracted memories use the model's self-assessed confidence
+      (often 0.7–0.95 for inferred facts).
+    - **Deduplication reinforcement** — when ``save()`` finds an existing memory with
+      >80% token overlap and the same type+scope, it increments ``confidence`` by 0.05
+      (capped at 1.0) instead of inserting a duplicate. Repeated discoveries strengthen
+      rather than clutter the vault.
+    - **Load ordering** — ``load()`` and ``search()`` both order results by
+      ``confidence DESC``, so the most reliable memories appear first in context.
+    - **Decay** (Phase 2, scheduler) — memories not referenced for 2+ weeks have
+      confidence reduced by ~0.05/week. Keeps stale knowledge from dominating context.
+    - **Archival threshold** — memories whose confidence falls below 0.3 are archived
+      (``archived_at`` stamped). Archived memories are excluded from ``load``/``search``
+      but not deleted, preserving history.
+    Search (Phase 3)
+    ----------------
+    ``search()`` runs a hybrid pipeline:
+    1. BM25 via FTS5 — fast keyword matching, ordered by BM25 rank.
+    2. Embedding search — always runs when an adapter is configured (not a fallback).
+       Results merged with FTS results, deduped by ID.
+    3. Activation ranking — all candidates re-scored by:
+       ``task_match * 2 + confidence + recency * 0.5 + reuse_count_norm * 0.5 + scope_fit``
+       ``task_match`` uses cosine similarity for any result with a stored embedding;
+       falls back to normalized FTS rank (0.1–1.0) otherwise.
+    4. Confusability penalty — procedures with cosine >0.85 to a higher-ranked result
+       are down-ranked by 0.4×.
+    5. ``reuse_count`` is incremented for each returned memory when ``track_usage=True``
+       (the default). Pass ``track_usage=False`` from benchmarks or batch jobs to prevent
+       cross-query accumulation artifacts.
+    """
+    def __init__(self, config: Config) -> None:
+        self._db_path = config.store.db_path
+        self._db: Optional[aiosqlite.Connection] = None
+    async def _get_db(self) -> aiosqlite.Connection:
+        if self._db is None:
+            self._db_path.parent.mkdir(parents=True, exist_ok=True)
+            self._db = await aiosqlite.connect(self._db_path)
+            self._db.row_factory = aiosqlite.Row
+            await self._db.execute("PRAGMA journal_mode=WAL")
+            await self._db.execute("PRAGMA foreign_keys=ON")
+        return self._db
+    async def init_db(self) -> None:
+        db = await self._get_db()
+        await db.executescript("""
+            CREATE TABLE IF NOT EXISTS memories (
+                id              TEXT PRIMARY KEY,
+                type            TEXT NOT NULL,
+                category        TEXT NOT NULL DEFAULT 'agent',
+                content         TEXT NOT NULL,
+                scope           TEXT NOT NULL,
+                project         TEXT,
+                source          TEXT,
+                confidence      REAL NOT NULL DEFAULT 1.0,
+                reuse_count     INTEGER NOT NULL DEFAULT 0,
+                domain          TEXT,
+                entity_graph    TEXT,
+                trigger         TEXT,
+                embedding       BLOB,
+                created_at      TEXT NOT NULL,
+                updated_at      TEXT NOT NULL,
+                last_confirmed  TEXT,
+                archived_at     TEXT,
+                invalidated_at  TEXT
+            );
+            CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
+            USING fts5(content, type, scope, content=memories, content_rowid=rowid);
+            CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN
+                INSERT INTO memories_fts(rowid, content, type, scope)
+                VALUES (new.rowid, new.content, new.type, new.scope);
+            END;
+            CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN
+                INSERT INTO memories_fts(memories_fts, rowid, content, type, scope)
+                VALUES ('delete', old.rowid, old.content, old.type, old.scope);
+            END;
+            CREATE TRIGGER IF NOT EXISTS memories_au AFTER UPDATE ON memories BEGIN
+                INSERT INTO memories_fts(memories_fts, rowid, content, type, scope)
+                VALUES ('delete', old.rowid, old.content, old.type, old.scope);
+                INSERT INTO memories_fts(rowid, content, type, scope)
+                VALUES (new.rowid, new.content, new.type, new.scope);
+            END;
+            CREATE TABLE IF NOT EXISTS entity_nodes (
+                id          TEXT PRIMARY KEY,
+                project     TEXT,
+                entity      TEXT NOT NULL,
+                created_at  TEXT NOT NULL
+            );
+            CREATE TABLE IF NOT EXISTS entity_edges (
+                id                  TEXT PRIMARY KEY,
+                project             TEXT,
+                subject_entity      TEXT NOT NULL,
+                predicate           TEXT NOT NULL,
+                object_entity       TEXT NOT NULL,
+                confidence          REAL NOT NULL DEFAULT 0.5,
+                confirmations       INTEGER NOT NULL DEFAULT 1,
+                is_negation         INTEGER NOT NULL DEFAULT 0,
+                first_seen_session  TEXT,
+                valid_from          TEXT NOT NULL,
+                invalidated_at      TEXT
+            );
+            CREATE TABLE IF NOT EXISTS observations (
+                id              TEXT PRIMARY KEY,
+                project         TEXT,
+                content         TEXT NOT NULL,
+                token_estimate  INTEGER,
+                created_at      TEXT NOT NULL,
+                reflector_pass  INTEGER NOT NULL DEFAULT 0
+            );
+            CREATE TABLE IF NOT EXISTS reflection_meta (
+                project             TEXT PRIMARY KEY,
+                last_observer_at    TEXT,
+                last_reflector_at   TEXT,
+                last_decay_at       TEXT,
+                last_promote_at     TEXT,
+                last_archive_at     TEXT
+            );
+        """)
+        await db.commit()
+        # Migrations: add columns that didn't exist in earlier schema versions.
+        # ALTER TABLE ADD COLUMN is a no-op on error — we catch and ignore
+        # "duplicate column" errors so init_db() is safe to call on existing DBs.
+        migrations = [
+            "ALTER TABLE memories ADD COLUMN reuse_count INTEGER NOT NULL DEFAULT 0",
+            "ALTER TABLE memories ADD COLUMN domain TEXT",
+            "ALTER TABLE memories ADD COLUMN embedding BLOB",
+            "ALTER TABLE memories ADD COLUMN trigger TEXT",
+        ]
+        for sql in migrations:
+            try:
+                await db.execute(sql)
+            except Exception:
+                pass  # column already exists
+        await db.commit()
+    async def save(self, memory: dict, embedding: Optional[bytes] = None) -> str:
+        """Save a memory, deduplicating if an existing memory is >80% similar.
+        embedding: pre-computed embedding bytes (from EmbeddingAdapter.to_bytes()).
+        Pass None if no embedding provider is configured.
+        """
+        db = await self._get_db()
+        content = memory["content"]
+        mem_type = memory.get("type", "pattern")
+        scope = memory.get("scope", "global")
+        project = memory.get("project")
+        # Dedup check uses raw FTS5 search (no side effects on reuse_count)
+        candidates = await self._search_fts(content, project, limit=5)
+        for candidate in candidates:
+            if candidate["type"] == mem_type and candidate["scope"] == scope:
+                overlap = _token_overlap(content, candidate["content"])
+                if overlap > 0.8:
+                    # Dedup hit: agent rediscovered the same knowledge — strong confirmation.
+                    # Bump confidence and increment reuse_count.
+                    # Merge trigger phrases: union of existing + incoming, deduped.
+                    now = _now_iso()
+                    new_confidence = min(1.0, candidate["confidence"] + 0.05)
+                    new_reuse = candidate.get("reuse_count", 0) + 1
+                    existing_trigger = candidate.get("trigger") or ""
+                    incoming_trigger = memory.get("trigger") or ""
+                    merged_trigger: Optional[str] = None
+                    if existing_trigger or incoming_trigger:
+                        existing_phrases = {p.strip() for p in existing_trigger.split(",") if p.strip()}
+                        incoming_phrases = {p.strip() for p in incoming_trigger.split(",") if p.strip()}
+                        merged_trigger = ", ".join(sorted(existing_phrases | incoming_phrases))
+                    await db.execute(
+                        """
+                        UPDATE memories
+                        SET confidence = ?, reuse_count = ?, last_confirmed = ?, updated_at = ?,
+                            trigger = ?
+                        WHERE id = ?
+                        """,
+                        (new_confidence, new_reuse, now, now, merged_trigger, candidate["id"]),
+                    )
+                    await db.commit()
+                    return candidate["id"]
+        # Insert new memory
+        now = _now_iso()
+        mem_id = str(ULID())
+        await db.execute(
+            """
+            INSERT INTO memories (
+                id, type, category, content, scope, project, source,
+                confidence, reuse_count, domain, entity_graph, trigger, embedding,
+                created_at, updated_at, last_confirmed
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                mem_id,
+                mem_type,
+                memory.get("category", "agent"),
+                content,
+                scope,
+                project,
+                memory.get("source"),
+                memory.get("confidence", 1.0),
+                memory.get("reuse_count", 0),
+                memory.get("domain"),
+                memory.get("entity_graph"),
+                memory.get("trigger"),
+                embedding,
+                now,
+                now,
+                now,
+            ),
+        )
+        await db.commit()
+        return mem_id
+    async def save_embedding(self, memory_id: str, embedding: bytes) -> None:
+        """Store an embedding for an existing memory (used for backfill)."""
+        db = await self._get_db()
+        await db.execute(
+            "UPDATE memories SET embedding = ?, updated_at = ? WHERE id = ?",
+            (embedding, _now_iso(), memory_id),
+        )
+        await db.commit()
+    async def get_memories_without_embeddings(
+        self, project: Optional[str] = None
+    ) -> list[dict]:
+        """Return active memories that have no embedding yet (for backfill)."""
+        db = await self._get_db()
+        conditions = ["archived_at IS NULL", "invalidated_at IS NULL", "embedding IS NULL"]
+        params: list[Any] = []
+        if project:
+            conditions.append("(scope = 'global' OR (scope = ? AND project = ?))")
+            params.extend([f"project:{project}", project])
+        where = "WHERE " + " AND ".join(conditions)
+        async with db.execute(
+            f"SELECT id, content FROM memories {where}", params
+        ) as cursor:
+            return [dict(r) for r in await cursor.fetchall()]
+    async def load(self, project: Optional[str] = None) -> list[dict]:
+        """Load active memories for a project + global scope, ordered by confidence DESC."""
+        db = await self._get_db()
+        params: list[Any] = []
+        if project:
+            scope_filter = "(scope = 'global' OR (scope = ? AND project = ?))"
+            params = [f"project:{project}", project]
+        else:
+            scope_filter = "scope = 'global'"
+        query = f"""
+            SELECT * FROM memories
+            WHERE {scope_filter}
+              AND archived_at IS NULL
+              AND invalidated_at IS NULL
+            ORDER BY confidence DESC
+        """
+        async with db.execute(query, params) as cursor:
+            rows = await cursor.fetchall()
+            return [_strip_embedding(dict(row)) for row in rows]
+    async def load_budgeted(
+        self,
+        project: Optional[str] = None,
+        slots: Optional[dict[str, int]] = None,
+        context: Optional[str] = None,
+        embedding_adapter: Any = None,
+    ) -> dict:
+        """Load memories with type-slot allocation to bound output size.
+        Selects the top-N memories per type using type-appropriate ranking:
+        - gotcha/decision/pattern/preference: top-N by confidence DESC
+        - procedure: top-N by reuse_count DESC (most-activated first)
+        - progress: top-N by updated_at DESC, only if updated within 7 days
+        When ``context`` is provided, re-ranks within each type bucket using
+        activation scoring (task_match × 2 + confidence + recency + reuse_norm
+        + scope_fit) instead of raw confidence order. task_match is computed
+        via cosine similarity for memories with stored embeddings, falling back
+        to normalised FTS rank. Memories with no relevance signal get
+        task_match=0.0 and sort behind any context-matched result.
+        Returns a dict with:
+          memories: selected subset (list of dicts)
+          total: total active memory count
+          overflow: count of memories cut by slot limits
+          overflow_hints: list of {"type": str, "overflow": int} for each cut type
+          project: project name or "global"
+        """
+        if slots is None:
+            slots = dict(DEFAULT_LOAD_SLOTS)
+        all_memories = await self.load(project)
+        now = datetime.now(timezone.utc)
+        cutoff_7d = now - timedelta(days=7)
+        # --- context-aware relevance scoring -----------------------------------
+        task_match_by_id: dict[str, float] = {}
+        if context:
+            # FTS pass: get rank positions for keyword matches
+            fts_results = await self._search_fts(context, project, limit=len(all_memories) or 200)
+            fts_count = max(len(fts_results), 1)
+            for rank, mem in enumerate(fts_results):
+                task_match_by_id[mem["id"]] = max(0.1, 1.0 - rank / fts_count)
+            # Embedding pass: cosine similarity overrides FTS rank when available
+            if embedding_adapter is not None:
+                try:
+                    query_emb = await embedding_adapter.embed(context)
+                    for mem in all_memories:
+                        emb_bytes = mem.get("embedding")
+                        if isinstance(emb_bytes, bytes):
+                            sim = _cosine_similarity(query_emb, _bytes_to_emb(emb_bytes))
+                            task_match_by_id[mem["id"]] = sim
+                except Exception as e:
+                    print(f"[deja] load --context embedding error: {e}", file=sys.stderr)
+        # -----------------------------------------------------------------------
+        by_type: dict[str, list[dict]] = {}
+        for mem in all_memories:
+            t = mem.get("type", "pattern")
+            by_type.setdefault(t, []).append(mem)
+        selected: list[dict] = []
+        overflow_hints: list[dict] = []
+        for mem_type, limit in slots.items():
+            mems = list(by_type.get(mem_type, []))
+            if context:
+                # Re-rank by activation score using context relevance
+                mems.sort(
+                    key=lambda m: _activation_score(
+                        m, task_match_by_id.get(m["id"], 0.0), project, now
+                    ),
+                    reverse=True,
+                )
+                if mem_type == "progress":
+                    mems = [m for m in mems if _parse_dt(m.get("updated_at", "")) >= cutoff_7d]
+            elif mem_type == "progress":
+                mems = [m for m in mems if _parse_dt(m.get("updated_at", "")) >= cutoff_7d]
+                mems.sort(key=lambda m: m.get("updated_at", ""), reverse=True)
+            elif mem_type == "procedure":
+                mems.sort(
+                    key=lambda m: (m.get("reuse_count", 0), m.get("confidence", 0.0)),
+                    reverse=True,
+                )
+            else:
+                mems.sort(key=lambda m: m.get("confidence", 0.0), reverse=True)
+            chosen = mems[:limit]
+            leftover = len(mems) - len(chosen)
+            selected.extend(chosen)
+            if leftover > 0:
+                overflow_hints.append({"type": mem_type, "overflow": leftover})
+        # Any memory types not covered by slots
+        known = set(slots.keys())
+        for mem_type, mems in by_type.items():
+            if mem_type not in known and mems:
+                overflow_hints.append({"type": mem_type, "overflow": len(mems)})
+        return {
+            "memories": selected,
+            "total": len(all_memories),
+            "overflow": len(all_memories) - len(selected),
+            "overflow_hints": overflow_hints,
+            "project": project or "global",
+        }
+    async def list_all(self) -> list[dict]:
+        """List all active memories across every scope, ordered by scope then confidence."""
+        db = await self._get_db()
+        async with db.execute(
+            """
+            SELECT * FROM memories
+            WHERE archived_at IS NULL AND invalidated_at IS NULL
+            ORDER BY scope ASC, confidence DESC
+            """
+        ) as cursor:
+            rows = await cursor.fetchall()
+            return [_strip_embedding(dict(row)) for row in rows]
+    async def list_filtered(
+        self,
+        project: Optional[str] = None,
+        mem_type: Optional[str] = None,
+        status: str = "active",
+        limit: int = 200,
+        offset: int = 0,
+    ) -> tuple[list[dict], int]:
+        """List memories with optional filtering for the web viewer.
+        project: None=all, '__global__'=global scope only, else filter by project name.
+        status: 'active' | 'archived' | 'invalidated' | 'all'
+        Returns (memories, total_matching_count).
+        """
+        db = await self._get_db()
+        conditions: list[str] = []
+        params: list[Any] = []
+        if project == "__global__":
+            conditions.append("scope = 'global'")
+        elif project is not None:
+            conditions.append("project = ?")
+            params.append(project)
+        if mem_type:
+            conditions.append("type = ?")
+            params.append(mem_type)
+        if status == "active":
+            conditions.append("archived_at IS NULL AND invalidated_at IS NULL")
+        elif status == "archived":
+            conditions.append("archived_at IS NOT NULL")
+        elif status == "invalidated":
+            conditions.append("invalidated_at IS NOT NULL")
+        # "all" — no status filter
+        where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
+        async with db.execute(f"SELECT COUNT(*) as n FROM memories {where}", params) as cur:
+            row = await cur.fetchone()
+            total = row["n"] if row else 0
+        async with db.execute(
+            f"SELECT * FROM memories {where} ORDER BY confidence DESC, updated_at DESC LIMIT ? OFFSET ?",
+            params + [limit, offset],
+        ) as cur:
+            rows = await cur.fetchall()
+        return [_strip_embedding(dict(r)) for r in rows], total
+    async def list_projects(self) -> list[dict]:
+        """Return distinct project names with active memory count, ordered by count desc."""
+        db = await self._get_db()
+        async with db.execute(
+            """
+            SELECT project, COUNT(*) as count FROM memories
+            WHERE project IS NOT NULL
+              AND archived_at IS NULL AND invalidated_at IS NULL
+            GROUP BY project ORDER BY count DESC
+            """
+        ) as cur:
+            rows = await cur.fetchall()
+        return [{"name": r["project"], "count": r["count"]} for r in rows]
+    async def _search_by_trigger(
+        self,
+        query: str,
+        project: Optional[str] = None,
+        mem_type: Optional[str] = None,
+    ) -> list[dict]:
+        """Trigger phrase search. Returns memories where any trigger phrase is a
+        substring of the query (case-insensitive).
+        Direction: query contains trigger phrase — not the other way around.
+        Example: query "kubectl apply -f k8s/" matches trigger "kubectl apply" ✓
+                 query "kubectl apply" does NOT match trigger "terraform apply" ✓
+        This is Pass 0 in search() — fires before BM25/embedding and surfaces memories
+        that were explicitly tagged for this command. High precision by design.
+        """
+        db = await self._get_db()
+        query_lower = query.lower()
+        scope_clause = ""
+        scope_params: list[Any] = []
+        if project:
+            scope_clause = "AND (m.scope = 'global' OR (m.scope = ? AND m.project = ?))"
+            scope_params = [f"project:{project}", project]
+        type_clause = ""
+        type_params: list[Any] = []
+        if mem_type:
+            type_clause = "AND m.type = ?"
+            type_params = [mem_type]
+        sql = f"""
+            SELECT m.* FROM memories m
+            WHERE m.trigger IS NOT NULL
+              {scope_clause}
+              {type_clause}
+              AND m.archived_at IS NULL
+              AND m.invalidated_at IS NULL
+            ORDER BY m.reuse_count DESC, m.confidence DESC
+        """
+        try:
+            async with db.execute(sql, scope_params + type_params) as cursor:
+                rows = await cursor.fetchall()
+        except Exception as e:
+            print(f"[deja] Trigger search error: {e}", file=sys.stderr)
+            return []
+        # Filter in Python: does the query contain any of this memory's trigger phrases?
+        # Split trigger on commas; check each phrase as a substring of the query.
+        matches = []
+        for row in rows:
+            mem = dict(row)
+            phrases = [p.strip().lower() for p in mem["trigger"].split(",") if p.strip()]
+            if any(phrase in query_lower for phrase in phrases):
+                matches.append(mem)
+        return matches[:5]
+    async def _search_fts(
+        self,
+        query: str,
+        project: Optional[str] = None,
+        mem_type: Optional[str] = None,
+        limit: int = 20,
+    ) -> list[dict]:
+        """Raw FTS5 keyword search. Returns full rows including embedding bytes.
+        Used internally by search() and save() (dedup check). No side effects.
+        """
+        db = await self._get_db()
+        params: list[Any] = []
+        escaped_query = query.replace('"', '""')
+        params.append(f'"{escaped_query}"')
+        extra_conditions = []
+        if project:
+            extra_conditions.append(
+                "(m.scope = 'global' OR (m.scope = ? AND m.project = ?))"
+            )
+            params.extend([f"project:{project}", project])
+        if mem_type:
+            extra_conditions.append("m.type = ?")
+            params.append(mem_type)
+        extra_clause = ("AND " + " AND ".join(extra_conditions)) if extra_conditions else ""
+        params.append(limit)
+        query_sql = f"""
+            SELECT m.* FROM memories m
+            JOIN memories_fts ON m.rowid = memories_fts.rowid
+            WHERE memories_fts MATCH ?
+              {extra_clause}
+              AND m.archived_at IS NULL
+              AND m.invalidated_at IS NULL
+            ORDER BY memories_fts.rank
+            LIMIT ?
+        """
+        try:
+            async with db.execute(query_sql, params) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+        except Exception as e:
+            print(f"[deja] FTS search error: {e}", file=sys.stderr)
+            return []
+    async def _search_embedding(
+        self,
+        query_embedding: list[float],
+        project: Optional[str] = None,
+        mem_type: Optional[str] = None,
+        limit: int = 20,
+    ) -> list[dict]:
+        """Search by cosine similarity against stored embeddings.
+        Only returns memories that have a stored embedding. Returns full rows
+        including embedding bytes (needed for confusability penalty in search()).
+        """
+        db = await self._get_db()
+        conditions = [
+            "archived_at IS NULL",
+            "invalidated_at IS NULL",
+            "embedding IS NOT NULL",
+        ]
+        params: list[Any] = []
+        if project:
+            conditions.append("(scope = 'global' OR (scope = ? AND project = ?))")
+            params.extend([f"project:{project}", project])
+        if mem_type:
+            conditions.append("type = ?")
+            params.append(mem_type)
+        where = "WHERE " + " AND ".join(conditions)
+        async with db.execute(f"SELECT * FROM memories {where}", params) as cursor:
+            rows = [dict(r) for r in await cursor.fetchall()]
+        scored: list[tuple[float, dict]] = []
+        for row in rows:
+            try:
+                mem_emb = _bytes_to_emb(row["embedding"])
+                sim = _cosine_similarity(query_embedding, mem_emb)
+                scored.append((sim, row))
+            except Exception:
+                continue
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [row for _, row in scored[:limit]]
+    async def _increment_reuse_for_ids(self, memory_ids: list[str]) -> None:
+        """Increment reuse_count by 1 for specific memories (search activation signal)."""
+        if not memory_ids:
+            return
+        db = await self._get_db()
+        placeholders = ",".join("?" for _ in memory_ids)
+        await db.execute(
+            f"UPDATE memories SET reuse_count = reuse_count + 1, updated_at = ? "
+            f"WHERE id IN ({placeholders})",
+            [_now_iso()] + memory_ids,
+        )
+        await db.commit()
+    async def search(
+        self,
+        query: str,
+        project: Optional[str] = None,
+        mem_type: Optional[str] = None,
+        limit: int = 20,
+        embedding_adapter: Any = None,
+        track_usage: bool = True,
+    ) -> list[dict]:
+        """Hybrid search: BM25 (FTS5) keywords + embedding similarity, always both.
+        Pipeline:
+        1. FTS5 BM25 keyword search — always runs.
+        2. Embedding similarity search — always runs when embedding_adapter is provided.
+           Results merged with FTS results, deduped by ID (FTS first).
+        3. Activation ranking — all candidates re-scored by:
+           task_match * 2 + confidence + recency * 0.5 + reuse_norm * 0.5 + scope_fit
+           task_match uses cosine for results with stored embeddings; normalized FTS rank
+           (0.1–1.0) otherwise — puts both sources on a comparable scale.
+        4. Confusability penalty — procedures with cosine >0.85 to a higher-ranked
+           result are down-ranked by 0.4×.
+        5. reuse_count incremented for returned memories when track_usage=True.
+        Without embedding_adapter: FTS5 only with activation ranking (no reuse increment).
+        track_usage=False: skip reuse_count increment (use in benchmarks/batch jobs).
+        """
+        # Step 0: Trigger exact match — high-precision pass for hook-style recall.
+        # Finds memories whose trigger field contains any comma-separated phrase from
+        # the query. Fast substring match; no ranking needed — these are already precise.
+        trigger_results = await self._search_by_trigger(query, project, mem_type)
+        # Step 1: FTS5 keyword search
+        fts_results = await self._search_fts(query, project, mem_type, limit)
+        # Step 2: Embedding search — runs whenever adapter is configured.
+        # Previously this was a fallback (only when BM25 returned <3 results), but
+        # that caused the hybrid to perform worse than either component alone: BM25
+        # would return 3+ wrong results on natural language queries, blocking embedding
+        # from running at all. Now both always run and activation ranking picks the winner.
+        embedding_results: list[dict] = []
+        query_embedding: Optional[list[float]] = None
+        if embedding_adapter is not None:
+            try:
+                query_embedding = await embedding_adapter.embed(query)
+                embedding_results = await self._search_embedding(
+                    query_embedding, project, mem_type, limit
+                )
+            except Exception as e:
+                print(f"[deja] Embedding search error: {e}", file=sys.stderr)
+        # Step 3: Merge results, dedup by ID (trigger first, then FTS5, then embedding)
+        seen_ids: set[str] = set()
+        merged: list[tuple[dict, str, int]] = []  # (mem, source, rank)
+        for rank, mem in enumerate(trigger_results):
+            if mem["id"] not in seen_ids:
+                seen_ids.add(mem["id"])
+                merged.append((mem, "trigger", rank))
+        for rank, mem in enumerate(fts_results):
+            if mem["id"] not in seen_ids:
+                seen_ids.add(mem["id"])
+                merged.append((mem, "fts", rank))
+        for rank, mem in enumerate(embedding_results):
+            if mem["id"] not in seen_ids:
+                seen_ids.add(mem["id"])
+                merged.append((mem, "emb", rank))
+        if not merged:
+            return []
+        # Step 4: Activation ranking
+        now_dt = datetime.now(timezone.utc)
+        scored: list[tuple[dict, Optional[bytes], float]] = []
+        for mem, source, rank in merged:
+            emb_bytes = mem.get("embedding")
+            if not isinstance(emb_bytes, bytes):
+                emb_bytes = None
+            if source == "trigger":
+                # Trigger match is definitionally correct — pin above all scored results.
+                # Use task_match=2.0 (above max cosine of 1.0) so activation score
+                # always beats any FTS/embedding result regardless of reuse_count/recency.
+                task_match = 2.0
+            elif query_embedding is not None and emb_bytes:
+                # Use cosine similarity for any result that has a stored embedding —
+                # puts FTS and embedding results on the same scale. Without this,
+                # FTS rank 0 always gets task_match=1.0 and beats embedding results
+                # with actual cosine similarity, even when the FTS result is wrong.
+                task_match = _cosine_similarity(query_embedding, _bytes_to_emb(emb_bytes))
+            else:
+                # No embedding available: fall back to BM25 rank position (0.1–1.0).
+                task_match = max(0.1, 1.0 - rank / max(len(fts_results), 1))
+            score = _activation_score(mem, task_match, project, now_dt)
+            scored.append((mem, emb_bytes, score))
+        # Step 5: Confusability penalty for procedures
+        scored = _apply_confusability_penalty(scored)
+        # Step 6: Sort by activation score, take top limit
+        scored.sort(key=lambda x: x[2], reverse=True)
+        final = [_strip_embedding(mem) for mem, _, _ in scored[:limit]]
+        # Step 7: Increment reuse_count for returned memories (passive activation signal).
+        # Only when embedding_adapter is provided AND track_usage is True. Pass
+        # track_usage=False from benchmarks or batch jobs to avoid accumulation artifacts.
+        if embedding_adapter is not None and track_usage and final:
+            await self._increment_reuse_for_ids([m["id"] for m in final])
+        return final
+    async def archive(self, memory_id: str) -> None:
+        db = await self._get_db()
+        now = _now_iso()
+        await db.execute(
+            "UPDATE memories SET archived_at = ?, updated_at = ? WHERE id = ?",
+            (now, now, memory_id),
+        )
+        await db.commit()
+    async def update_memory(self, memory_id: str, fields: dict) -> bool:
+        """Update allowed metadata fields on an existing memory.
+        Only ``trigger`` and ``type`` can be updated this way — content changes
+        go through save() (which deduplicates). Returns True if a row was updated.
+        Trigger merge: if the memory already has a trigger, the new phrases are
+        unioned with the existing ones (same merge logic as dedup in save()).
+        """
+        allowed = {"trigger", "type"}
+        updates = {k: v for k, v in fields.items() if k in allowed and v is not None}
+        if not updates:
+            return False
+        db = await self._get_db()
+        now = _now_iso()
+        # Merge trigger phrases rather than overwrite
+        if "trigger" in updates:
+            async with db.execute(
+                "SELECT trigger FROM memories WHERE id = ?", (memory_id,)
+            ) as cur:
+                row = await cur.fetchone()
+            if row is None:
+                return False
+            existing_trigger = (row["trigger"] or "") if row else ""
+            incoming_trigger = updates["trigger"] or ""
+            existing_phrases = {p.strip() for p in existing_trigger.split(",") if p.strip()}
+            incoming_phrases = {p.strip() for p in incoming_trigger.split(",") if p.strip()}
+            updates["trigger"] = ", ".join(sorted(existing_phrases | incoming_phrases))
+        set_clauses = ", ".join(f"{k} = ?" for k in updates)
+        values = list(updates.values()) + [now, memory_id]
+        cursor = await db.execute(
+            f"UPDATE memories SET {set_clauses}, updated_at = ? WHERE id = ? "
+            f"AND archived_at IS NULL AND invalidated_at IS NULL",
+            values,
+        )
+        await db.commit()
+        return cursor.rowcount > 0
+    async def list_for_export(
+        self,
+        project: Optional[str] = None,
+        types: Optional[list[str]] = None,
+        include_archived: bool = False,
+    ) -> list[dict]:
+        """List memories for export with filtering. Embedding bytes are excluded."""
+        db = await self._get_db()
+        conditions = []
+        params: list[Any] = []
+        if project:
+            # Export only this project's memories (no global)
+            conditions.append("scope = ?")
+            params.append(f"project:{project}")
+        if types:
+            placeholders = ",".join("?" for _ in types)
+            conditions.append(f"type IN ({placeholders})")
+            params.extend(types)
+        if not include_archived:
+            conditions.append("archived_at IS NULL")
+            conditions.append("invalidated_at IS NULL")
+        where_clause = ""
+        if conditions:
+            where_clause = "WHERE " + " AND ".join(conditions)
+        query = f"SELECT * FROM memories {where_clause} ORDER BY created_at ASC"
+        async with db.execute(query, params) as cursor:
+            rows = await cursor.fetchall()
+            return [_strip_embedding(dict(row)) for row in rows]
+    async def upsert(
+        self,
+        memory: dict,
+        merge_strategy: str = "skip",
+    ) -> str:
+        """Insert or update a memory during import based on merge strategy.
+        Strategies:
+        - skip: If ID exists, do nothing.
+        - overwrite: Replace existing record wholesale.
+        - update-confidence: If ID exists and content matches, bump confidence.
+        """
+        db = await self._get_db()
+        mem_id = memory["id"]
+        existing = await self.get(mem_id)
+        if not existing:
+            # New record, just insert
+            fields = list(memory.keys())
+            placeholders = ",".join("?" for _ in fields)
+            query = f"INSERT INTO memories ({','.join(fields)}) VALUES ({placeholders})"
+            await db.execute(query, [memory[f] for f in fields])
+            await db.commit()
+            return "inserted"
+        if merge_strategy == "skip":
+            return "skipped"
+        if merge_strategy == "overwrite":
+            fields = [f for f in memory.keys() if f != "id"]
+            set_clause = ",".join(f"{f} = ?" for f in fields)
+            query = f"UPDATE memories SET {set_clause} WHERE id = ?"
+            params = [memory[f] for f in fields] + [mem_id]
+            await db.execute(query, params)
+            await db.commit()
+            return "overwritten"
+        if merge_strategy == "update-confidence":
+            if memory["content"] == existing["content"]:
+                new_confidence = min(1.0, existing["confidence"] + 0.05)
+                now = _now_iso()
+                await db.execute(
+                    """
+                    UPDATE memories
+                    SET confidence = ?, last_confirmed = ?, updated_at = ?
+                    WHERE id = ?
+                    """,
+                    (new_confidence, now, now, mem_id),
+                )
+                await db.commit()
+                return "updated"
+            else:
+                return "skipped"
+        return "skipped"
+    async def get(self, memory_id: str) -> Optional[dict]:
+        db = await self._get_db()
+        async with db.execute(
+            "SELECT * FROM memories WHERE id = ?", (memory_id,)
+        ) as cursor:
+            row = await cursor.fetchone()
+            return _strip_embedding(dict(row)) if row else None
+    async def invalidate(self, memory_id: str) -> None:
+        """Mark a memory as invalidated (superseded by newer information)."""
+        db = await self._get_db()
+        now = _now_iso()
+        await db.execute(
+            "UPDATE memories SET invalidated_at = ?, updated_at = ? WHERE id = ?",
+            (now, now, memory_id),
+        )
+        await db.commit()
+    async def save_observation(self, project: Optional[str], content: str) -> str:
+        """Save one observation to the observations table."""
+        db = await self._get_db()
+        obs_id = str(ULID())
+        now = _now_iso()
+        token_estimate = len(content.split()) * 2
+        await db.execute(
+            """
+            INSERT INTO observations (id, project, content, token_estimate, created_at, reflector_pass)
+            VALUES (?, ?, ?, ?, ?, 0)
+            """,
+            (obs_id, project, content, token_estimate, now),
+        )
+        await db.commit()
+        return obs_id
+    async def list_observations(self, project: Optional[str] = None) -> list[dict]:
+        """List all observations for a project (or all if project is None)."""
+        db = await self._get_db()
+        if project is not None:
+            async with db.execute(
+                "SELECT * FROM observations WHERE project = ? ORDER BY created_at ASC",
+                (project,),
+            ) as cursor:
+                rows = await cursor.fetchall()
+        else:
+            async with db.execute(
+                "SELECT * FROM observations ORDER BY created_at ASC"
+            ) as cursor:
+                rows = await cursor.fetchall()
+        return [dict(row) for row in rows]
+    async def replace_observations(
+        self, project: Optional[str], new_texts: list[str]
+    ) -> None:
+        """Replace the full observation log for a project with condensed versions."""
+        db = await self._get_db()
+        if project is not None:
+            await db.execute("DELETE FROM observations WHERE project = ?", (project,))
+        else:
+            await db.execute("DELETE FROM observations WHERE project IS NULL")
+        now = _now_iso()
+        for text in new_texts:
+            obs_id = str(ULID())
+            token_estimate = len(text.split()) * 2
+            await db.execute(
+                """
+                INSERT INTO observations (id, project, content, token_estimate, created_at, reflector_pass)
+                VALUES (?, ?, ?, ?, ?, 1)
+                """,
+                (obs_id, project, text, token_estimate, now),
+            )
+        await db.commit()
+    async def get_reflection_meta(self, project: Optional[str] = None) -> Optional[dict]:
+        """Get reflection metadata for a project."""
+        db = await self._get_db()
+        key = _project_meta_key(project)
+        async with db.execute(
+            "SELECT * FROM reflection_meta WHERE project = ?", (key,)
+        ) as cursor:
+            row = await cursor.fetchone()
+            return dict(row) if row else None
+    async def set_reflection_meta(self, project: Optional[str] = None, **fields) -> None:
+        """Insert or update reflection metadata fields for a project."""
+        db = await self._get_db()
+        key = _project_meta_key(project)
+        existing = await self.get_reflection_meta(project)
+        if existing is None:
+            all_fields: dict = {
+                "project": key,
+                "last_observer_at": None,
+                "last_reflector_at": None,
+                "last_decay_at": None,
+                "last_promote_at": None,
+                "last_archive_at": None,
+            }
+            all_fields.update(fields)
+            cols = ",".join(all_fields.keys())
+            placeholders = ",".join("?" for _ in all_fields)
+            await db.execute(
+                f"INSERT INTO reflection_meta ({cols}) VALUES ({placeholders})",
+                list(all_fields.values()),
+            )
+        else:
+            set_clause = ",".join(f"{f} = ?" for f in fields)
+            await db.execute(
+                f"UPDATE reflection_meta SET {set_clause} WHERE project = ?",
+                list(fields.values()) + [key],
+            )
+        await db.commit()
+    async def list_for_reflection(
+        self,
+        project: Optional[str] = None,
+        since: Optional[str] = None,
+    ) -> list[dict]:
+        """Get active memories for the Observer to process.
+        project=None returns ALL memories (global + all projects).
+        project='X' returns only scope='project:X' memories.
+        since restricts to memories updated after that ISO timestamp.
+        """
+        db = await self._get_db()
+        conditions = ["archived_at IS NULL", "invalidated_at IS NULL"]
+        params: list[Any] = []
+        if project is not None:
+            conditions.append("scope = ?")
+            params.append(f"project:{project}")
+        if since:
+            conditions.append("updated_at > ?")
+            params.append(since)
+        where = "WHERE " + " AND ".join(conditions)
+        query = f"SELECT * FROM memories {where} ORDER BY updated_at ASC"
+        async with db.execute(query, params) as cursor:
+            rows = await cursor.fetchall()
+            return [_strip_embedding(dict(row)) for row in rows]
+    async def decay_unconfirmed(
+        self,
+        days_threshold: int,
+        decay_per_week: float,
+        user_decay_per_week: float,
+    ) -> int:
+        """Reduce confidence on memories not confirmed in days_threshold days.
+        Two decay rates are applied based on memory category:
+        - category='agent' (gotcha, decision, progress, pattern): uses decay_per_week.
+          Operational knowledge goes stale; higher rate reflects that.
+        - category='user' (preferences, habits): uses user_decay_per_week.
+          Personal style preferences are stable across time; much lower rate.
+        Returns number of memories whose confidence was updated.
+        """
+        db = await self._get_db()
+        now = datetime.now(timezone.utc)
+        threshold_iso = (now - timedelta(days=days_threshold)).isoformat()
+        async with db.execute(
+            """
+            SELECT id, category, confidence, last_confirmed FROM memories
+            WHERE archived_at IS NULL
+              AND invalidated_at IS NULL
+              AND (last_confirmed IS NULL OR last_confirmed < ?)
+            """,
+            (threshold_iso,),
+        ) as cursor:
+            rows = [dict(r) for r in await cursor.fetchall()]
+        count = 0
+        for row in rows:
+            lc = row["last_confirmed"]
+            if lc:
+                weeks_since = (now - datetime.fromisoformat(lc)).days / 7.0
+            else:
+                weeks_since = days_threshold / 7.0
+            rate = user_decay_per_week if row["category"] == "user" else decay_per_week
+            new_conf = max(0.0, row["confidence"] - rate * weeks_since)
+            if abs(new_conf - row["confidence"]) > 0.001:
+                await db.execute(
+                    "UPDATE memories SET confidence = ?, updated_at = ? WHERE id = ?",
+                    (new_conf, _now_iso(), row["id"]),
+                )
+                count += 1
+        if count:
+            await db.commit()
+        return count
+    async def archive_below_threshold(self, threshold: float) -> int:
+        """Archive memories whose confidence is below threshold.
+        Returns number of memories archived.
+        """
+        db = await self._get_db()
+        now = _now_iso()
+        async with db.execute(
+            """
+            UPDATE memories SET archived_at = ?, updated_at = ?
+            WHERE confidence < ? AND archived_at IS NULL AND invalidated_at IS NULL
+            """,
+            (now, now, threshold),
+        ) as cursor:
+            count = cursor.rowcount
+        await db.commit()
+        return count
+    async def increment_reuse_count(self, project: Optional[str] = None) -> int:
+        """Increment reuse_count by 1 for all active memories (used after Reflector pass).
+        Surviving compression is a confirmation signal — memories still represented
+        in the observation log have proven worth keeping.
+        Returns number of memories updated.
+        """
+        db = await self._get_db()
+        conditions = ["archived_at IS NULL", "invalidated_at IS NULL"]
+        params: list[Any] = []
+        if project is not None:
+            conditions.append("scope = ?")
+            params.append(f"project:{project}")
+        where = "WHERE " + " AND ".join(conditions)
+        async with db.execute(
+            f"UPDATE memories SET reuse_count = reuse_count + 1, updated_at = ? {where}",
+            [_now_iso()] + params,
+        ) as cursor:
+            count = cursor.rowcount
+        await db.commit()
+        return count
+    async def promote_patterns_to_global(self, min_project_count: int) -> int:
+        """Promote pattern and procedure memories appearing in min_project_count+
+        distinct projects to global scope. Returns number promoted.
+        """
+        db = await self._get_db()
+        async with db.execute(
+            """
+            SELECT * FROM memories
+            WHERE type IN ('pattern', 'procedure')
+              AND scope != 'global'
+              AND archived_at IS NULL
+              AND invalidated_at IS NULL
+            ORDER BY created_at ASC
+            """
+        ) as cursor:
+            patterns = [dict(r) for r in await cursor.fetchall()]
+        promoted = 0
+        processed_ids: set[str] = set()
+        for i, pat in enumerate(patterns):
+            if pat["id"] in processed_ids:
+                continue
+            similar = [pat]
+            for other in patterns[i + 1:]:
+                if other["project"] != pat["project"] and other["id"] not in processed_ids:
+                    if _token_overlap(pat["content"], other["content"]) > 0.7:
+                        similar.append(other)
+            distinct_projects = {m["project"] for m in similar if m.get("project")}
+            if len(distinct_projects) >= min_project_count:
+                # Use raw FTS5 search (no side effects) to check for existing global
+                global_candidates = await self._search_fts(
+                    pat["content"], None, mem_type=pat["type"], limit=5
+                )
+                has_global = any(m["scope"] == "global" for m in global_candidates)
+                if not has_global:
+                    best = max(similar, key=lambda m: m["confidence"])
+                    now = _now_iso()
+                    new_id = str(ULID())
+                    await db.execute(
+                        """
+                        INSERT INTO memories
+                            (id, type, category, content, scope, project, source,
+                             confidence, reuse_count, domain, entity_graph, trigger,
+                             created_at, updated_at, last_confirmed)
+                        VALUES (?, ?, ?, ?, 'global', NULL, 'deja_promote',
+                                ?, ?, ?, NULL, ?, ?, ?, ?)
+                        """,
+                        (
+                            new_id,
+                            best["type"],
+                            best.get("category", "agent"),
+                            best["content"],
+                            best["confidence"],
+                            best.get("reuse_count", 0),
+                            best.get("domain"),
+                            best.get("trigger"),
+                            now, now, now,
+                        ),
+                    )
+                    promoted += 1
+                for m in similar:
+                    processed_ids.add(m["id"])
+        if promoted:
+            await db.commit()
+        return promoted
+    async def get_stats(self, project: Optional[str] = None) -> dict:
+        """Return memory statistics for a project or all memories."""
+        db = await self._get_db()
+        if project:
+            scope_filter = "scope = ?"
+            scope_params: list[Any] = [f"project:{project}"]
+        else:
+            scope_filter = "1=1"
+            scope_params = []
+        async with db.execute(
+            f"""
+            SELECT type, COUNT(*) as cnt FROM memories
+            WHERE {scope_filter} AND archived_at IS NULL AND invalidated_at IS NULL
+            GROUP BY type
+            """,
+            scope_params,
+        ) as cursor:
+            by_type = {r["type"]: r["cnt"] for r in await cursor.fetchall()}
+        async with db.execute(
+            f"SELECT COUNT(*) as n FROM memories WHERE {scope_filter} AND archived_at IS NOT NULL",
+            scope_params,
+        ) as cursor:
+            archived = (await cursor.fetchone())["n"]
+        async with db.execute(
+            f"""SELECT COUNT(*) as n FROM memories
+            WHERE {scope_filter} AND invalidated_at IS NOT NULL AND archived_at IS NULL""",
+            scope_params,
+        ) as cursor:
+            invalidated = (await cursor.fetchone())["n"]
+        async with db.execute(
+            f"""SELECT content FROM memories
+            WHERE {scope_filter} AND archived_at IS NULL AND invalidated_at IS NULL""",
+            scope_params,
+        ) as cursor:
+            token_estimate = sum(
+                len(r["content"].split()) * 2 for r in await cursor.fetchall()
+            )
+        # Count memories with embeddings
+        async with db.execute(
+            f"""SELECT COUNT(*) as n FROM memories
+            WHERE {scope_filter} AND archived_at IS NULL AND invalidated_at IS NULL
+            AND embedding IS NOT NULL""",
+            scope_params,
+        ) as cursor:
+            with_embeddings = (await cursor.fetchone())["n"]
+        if project:
+            obs_clause, obs_params = "WHERE project = ?", [project]
+        else:
+            obs_clause, obs_params = "", []
+        async with db.execute(
+            f"SELECT COUNT(*) as n FROM observations {obs_clause}", obs_params
+        ) as cursor:
+            obs_count = (await cursor.fetchone())["n"]
+        meta = await self.get_reflection_meta(project)
+        return {
+            "project": project or "global",
+            "active": sum(by_type.values()),
+            "by_type": by_type,
+            "archived": archived,
+            "invalidated": invalidated,
+            "observations": obs_count,
+            "token_estimate": token_estimate,
+            "with_embeddings": with_embeddings,
+            "last_observer_at": meta.get("last_observer_at") if meta else None,
+            "last_reflector_at": meta.get("last_reflector_at") if meta else None,
+            "last_decay_at": meta.get("last_decay_at") if meta else None,
+        }
+    async def close(self) -> None:
+        if self._db is not None:
+            await self._db.close()
+            self._db = None