npm - superlocalmemory - Versions diffs - 3.3.9 → 3.3.11 - Mend

superlocalmemory 3.3.9 → 3.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/core/config.py +1 -1
package/src/superlocalmemory/core/store_pipeline.py +21 -0
package/src/superlocalmemory/retrieval/entity_channel.py +163 -20

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.9",
+  "version": "3.3.11",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.9"
+version = "3.3.11"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}

package/src/superlocalmemory/core/config.py CHANGED Viewed

@@ -115,7 +115,7 @@ class EncodingConfig:
     # Fact extraction
     chunk_size: int = 10           # Conversation turns per extraction chunk
-    max_facts_per_chunk: int = 5   # Max facts extracted per chunk
+    max_facts_per_chunk: int = 10  # V3.3.11: increased from 5 to preserve more details
     min_fact_confidence: float = 0.3
     # Entity resolution

package/src/superlocalmemory/core/store_pipeline.py CHANGED Viewed

@@ -166,6 +166,27 @@ def run_store(
         turns=[content], session_id=session_id,
         session_date=parsed_date, speaker_a=speaker,
     )
+    # V3.3.11: Also store raw content as a verbatim fact to preserve details
+    # that fact extraction may abstract away (dates, names, specifics).
+    # This ensures BM25 and semantic search can always find the original text.
+    if content.strip() and len(content.strip()) >= 20:
+        import uuid
+        verbatim = AtomicFact(
+            fact_id=uuid.uuid4().hex[:16],
+            content=content.strip(),
+            fact_type=FactType.EPISODIC,
+            entities=[],
+            session_id=session_id,
+            observation_date=parsed_date,
+            confidence=0.9,
+            importance=0.5,
+        )
+        # Avoid duplicate if extraction already produced the exact same text
+        extracted_texts = {f.content.strip().lower() for f in facts}
+        if verbatim.content.strip().lower() not in extracted_texts:
+            facts.append(verbatim)
     if not facts:
         return []

package/src/superlocalmemory/retrieval/entity_channel.py CHANGED Viewed

@@ -72,7 +72,13 @@ def extract_query_entities(query: str) -> list[str]:
 class EntityGraphChannel:
-    """Entity-based retrieval with spreading activation (SA-RAG)."""
+    """Entity-based retrieval with spreading activation (SA-RAG).
+    V3.3.9: In-memory adjacency cache for O(1) edge lookup.
+    Replaces per-node SQLite queries (23ms each) with dict lookup (<0.001ms).
+    The cache is loaded once per profile and invalidated on store/edge changes.
+    Memory cost: ~18 MB for 232K edges. Zero quality change — same algorithm.
+    """
     def __init__(
         self, db: DatabaseManager,
@@ -85,9 +91,115 @@ class EntityGraphChannel:
         self._decay = decay
         self._threshold = activation_threshold
         self._max_hops = max_hops
+        # In-memory adjacency: {node_id -> [(neighbor_id, weight), ...]}
+        self._adj: dict[str, list[tuple[str, float]]] = {}
+        self._adj_profile: str = ""  # Track which profile is loaded
+        self._adj_edge_count: int = 0  # Track edge count for staleness detection
+    def _ensure_adjacency(self, profile_id: str) -> None:
+        """Load graph adjacency into memory for fast spreading activation.
+        Loads ALL edges for a profile into a bidirectional dict.
+        Called once per profile switch or when edge count changes (new store).
+        Cost: ~1s for 232K edges, ~18 MB RAM.
+        """
+        # Check staleness: profile changed or new edges added since last load
+        current_count = self._get_edge_count(profile_id)
+        if (self._adj_profile == profile_id
+                and self._adj
+                and self._adj_edge_count == current_count):
+            return
+        adj: dict[str, list[tuple[str, float]]] = defaultdict(list)
+        try:
+            rows = self._db.execute(
+                "SELECT source_id, target_id, weight FROM graph_edges WHERE profile_id = ?",
+                (profile_id,),
+            )
+        except Exception:
+            rows = []
+        for r in rows:
+            d = dict(r)
+            s, t, w = d["source_id"], d["target_id"], float(d["weight"])
+            adj[s].append((t, w))
+            adj[t].append((s, w))
+        self._adj = dict(adj)  # Convert defaultdict to regular dict (no accidental growth)
+        self._adj_profile = profile_id
+        self._adj_edge_count = current_count
+        # Also load entity maps (same staleness lifecycle)
+        self._load_entity_maps(profile_id)
+        logger.info(
+            "Loaded adjacency cache: %d nodes, %d edges, %d entity mappings for profile %s",
+            len(self._adj), sum(len(v) for v in self._adj.values()) // 2,
+            len(self._entity_to_facts), profile_id,
+        )
+    def _get_edge_count(self, profile_id: str) -> int:
+        """Fast edge count for staleness check (~1ms)."""
+        try:
+            rows = self._db.execute(
+                "SELECT COUNT(*) as cnt FROM graph_edges WHERE profile_id = ?",
+                (profile_id,),
+            )
+            if rows:
+                return int(dict(rows[0]).get("cnt", 0))
+        except Exception:
+            pass
+        return 0
+    def _load_entity_maps(self, profile_id: str) -> None:
+        """Pre-load entity→fact and fact→entity maps into memory.
+        Eliminates per-entity and per-fact SQL in the spreading activation loop.
+        Same data, same algorithm — zero quality change.
+        """
+        # entity_id -> [fact_id, ...]
+        self._entity_to_facts: dict[str, list[str]] = defaultdict(list)
+        # fact_id -> [entity_id, ...]
+        self._fact_to_entities: dict[str, list[str]] = defaultdict(list)
+        try:
+            rows = self._db.execute(
+                "SELECT fact_id, canonical_entities_json FROM atomic_facts "
+                "WHERE profile_id = ? AND canonical_entities_json IS NOT NULL "
+                "AND canonical_entities_json != ''",
+                (profile_id,),
+            )
+        except Exception:
+            rows = []
+        for r in rows:
+            d = dict(r)
+            fid = d["fact_id"]
+            raw = d.get("canonical_entities_json")
+            if not raw:
+                continue
+            try:
+                eids = json.loads(raw)
+                for eid in eids:
+                    self._entity_to_facts[eid].append(fid)
+                    self._fact_to_entities[fid].append(eid)
+            except (ValueError, TypeError):
+                continue
+        logger.info(
+            "Loaded entity maps: %d entities, %d facts with entities",
+            len(self._entity_to_facts), len(self._fact_to_entities),
+        )
+    def invalidate_cache(self) -> None:
+        """Clear all caches. Call after adding/removing edges or facts."""
+        self._adj.clear()
+        self._adj_profile = ""
+        self._adj_edge_count = 0
+        self._entity_to_facts = defaultdict(list)
+        self._fact_to_entities = defaultdict(list)
     def search(self, query: str, profile_id: str, top_k: int = 50) -> list[tuple[str, float]]:
-        """Search via entity graph with spreading activation."""
+        """Search via entity graph with spreading activation.
+        V3.3.9: Uses in-memory adjacency for O(1) edge lookups.
+        Same algorithm as before — zero quality change.
+        """
         raw_entities = extract_query_entities(query)
         if not raw_entities:
             return []
@@ -96,15 +208,24 @@ class EntityGraphChannel:
         if not canonical_ids:
             return []
+        # Load adjacency cache (no-op if already loaded for this profile)
+        self._ensure_adjacency(profile_id)
         # Seed activation from direct entity-linked facts
+        # Use in-memory map when available, fall back to SQL for mock/test DBs
         activation: dict[str, float] = defaultdict(float)
         visited_entities: set[str] = set(canonical_ids)
+        use_cache = bool(self._entity_to_facts)
         for eid in canonical_ids:
-            for fact in self._db.get_facts_by_entity(eid, profile_id):
-                activation[fact.fact_id] = max(activation[fact.fact_id], 1.0)
+            if use_cache:
+                for fid in self._entity_to_facts.get(eid, ()):
+                    activation[fid] = max(activation[fid], 1.0)
+            else:
+                for fact in self._db.get_facts_by_entity(eid, profile_id):
+                    activation[fact.fact_id] = max(activation[fact.fact_id], 1.0)
-        # Spreading activation through graph edges
+        # Spreading activation through graph edges (all in-memory O(1) lookups)
         frontier = set(activation.keys())
         for hop in range(1, self._max_hops):
             hop_decay = self._decay ** hop
@@ -113,21 +234,43 @@ class EntityGraphChannel:
             next_frontier: set[str] = set()
             for fid in frontier:
-                for edge in self._db.get_edges_for_node(fid, profile_id):
-                    neighbor = edge.target_id if edge.source_id == fid else edge.source_id
-                    propagated = activation[fid] * self._decay
-                    if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0):
-                        activation[neighbor] = propagated
-                        next_frontier.add(neighbor)
-            # Discover new entities from activated facts -> get their facts
-            new_eids = self._discover_entities(frontier, profile_id, visited_entities)
-            for eid in new_eids:
-                visited_entities.add(eid)
-                for fact in self._db.get_facts_by_entity(eid, profile_id):
-                    if hop_decay > activation.get(fact.fact_id, 0.0):
-                        activation[fact.fact_id] = hop_decay
-                        next_frontier.add(fact.fact_id)
+                if use_cache:
+                    neighbors = self._adj.get(fid, ())
+                    for neighbor, _weight in neighbors:
+                        propagated = activation[fid] * self._decay
+                        if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0):
+                            activation[neighbor] = propagated
+                            next_frontier.add(neighbor)
+                else:
+                    for edge in self._db.get_edges_for_node(fid, profile_id):
+                        neighbor = edge.target_id if edge.source_id == fid else edge.source_id
+                        propagated = activation[fid] * self._decay
+                        if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0):
+                            activation[neighbor] = propagated
+                            next_frontier.add(neighbor)
+            # Discover new entities from activated facts
+            if use_cache:
+                new_eids: list[str] = []
+                for fid in frontier:
+                    for eid in self._fact_to_entities.get(fid, ()):
+                        if eid not in visited_entities:
+                            visited_entities.add(eid)
+                            new_eids.append(eid)
+                for eid in new_eids:
+                    for fid in self._entity_to_facts.get(eid, ()):
+                        if hop_decay > activation.get(fid, 0.0):
+                            activation[fid] = hop_decay
+                            next_frontier.add(fid)
+            else:
+                # SQL fallback (mock/test DBs)
+                new_eids_sql = self._discover_entities(frontier, profile_id, visited_entities)
+                for eid in new_eids_sql:
+                    visited_entities.add(eid)
+                    for fact in self._db.get_facts_by_entity(eid, profile_id):
+                        if hop_decay > activation.get(fact.fact_id, 0.0):
+                            activation[fact.fact_id] = hop_decay
+                            next_frontier.add(fact.fact_id)
             frontier = next_frontier
             if not frontier: