superlocalmemory 3.3.9 → 3.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.3.9",
3
+ "version": "3.3.11",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.3.9"
3
+ version = "3.3.11"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -115,7 +115,7 @@ class EncodingConfig:
115
115
 
116
116
  # Fact extraction
117
117
  chunk_size: int = 10 # Conversation turns per extraction chunk
118
- max_facts_per_chunk: int = 5 # Max facts extracted per chunk
118
+ max_facts_per_chunk: int = 10 # V3.3.11: increased from 5 to preserve more details
119
119
  min_fact_confidence: float = 0.3
120
120
 
121
121
  # Entity resolution
@@ -166,6 +166,27 @@ def run_store(
166
166
  turns=[content], session_id=session_id,
167
167
  session_date=parsed_date, speaker_a=speaker,
168
168
  )
169
+
170
+ # V3.3.11: Also store raw content as a verbatim fact to preserve details
171
+ # that fact extraction may abstract away (dates, names, specifics).
172
+ # This ensures BM25 and semantic search can always find the original text.
173
+ if content.strip() and len(content.strip()) >= 20:
174
+ import uuid
175
+ verbatim = AtomicFact(
176
+ fact_id=uuid.uuid4().hex[:16],
177
+ content=content.strip(),
178
+ fact_type=FactType.EPISODIC,
179
+ entities=[],
180
+ session_id=session_id,
181
+ observation_date=parsed_date,
182
+ confidence=0.9,
183
+ importance=0.5,
184
+ )
185
+ # Avoid duplicate if extraction already produced the exact same text
186
+ extracted_texts = {f.content.strip().lower() for f in facts}
187
+ if verbatim.content.strip().lower() not in extracted_texts:
188
+ facts.append(verbatim)
189
+
169
190
  if not facts:
170
191
  return []
171
192
 
@@ -72,7 +72,13 @@ def extract_query_entities(query: str) -> list[str]:
72
72
 
73
73
 
74
74
  class EntityGraphChannel:
75
- """Entity-based retrieval with spreading activation (SA-RAG)."""
75
+ """Entity-based retrieval with spreading activation (SA-RAG).
76
+
77
+ V3.3.9: In-memory adjacency cache for O(1) edge lookup.
78
+ Replaces per-node SQLite queries (23ms each) with dict lookup (<0.001ms).
79
+ The cache is loaded once per profile and invalidated on store/edge changes.
80
+ Memory cost: ~18 MB for 232K edges. Zero quality change — same algorithm.
81
+ """
76
82
 
77
83
  def __init__(
78
84
  self, db: DatabaseManager,
@@ -85,9 +91,115 @@ class EntityGraphChannel:
85
91
  self._decay = decay
86
92
  self._threshold = activation_threshold
87
93
  self._max_hops = max_hops
94
+ # In-memory adjacency: {node_id -> [(neighbor_id, weight), ...]}
95
+ self._adj: dict[str, list[tuple[str, float]]] = {}
96
+ self._adj_profile: str = "" # Track which profile is loaded
97
+ self._adj_edge_count: int = 0 # Track edge count for staleness detection
98
+
99
+ def _ensure_adjacency(self, profile_id: str) -> None:
100
+ """Load graph adjacency into memory for fast spreading activation.
101
+
102
+ Loads ALL edges for a profile into a bidirectional dict.
103
+ Called once per profile switch or when edge count changes (new store).
104
+ Cost: ~1s for 232K edges, ~18 MB RAM.
105
+ """
106
+ # Check staleness: profile changed or new edges added since last load
107
+ current_count = self._get_edge_count(profile_id)
108
+ if (self._adj_profile == profile_id
109
+ and self._adj
110
+ and self._adj_edge_count == current_count):
111
+ return
112
+ adj: dict[str, list[tuple[str, float]]] = defaultdict(list)
113
+ try:
114
+ rows = self._db.execute(
115
+ "SELECT source_id, target_id, weight FROM graph_edges WHERE profile_id = ?",
116
+ (profile_id,),
117
+ )
118
+ except Exception:
119
+ rows = []
120
+ for r in rows:
121
+ d = dict(r)
122
+ s, t, w = d["source_id"], d["target_id"], float(d["weight"])
123
+ adj[s].append((t, w))
124
+ adj[t].append((s, w))
125
+ self._adj = dict(adj) # Convert defaultdict to regular dict (no accidental growth)
126
+ self._adj_profile = profile_id
127
+ self._adj_edge_count = current_count
128
+ # Also load entity maps (same staleness lifecycle)
129
+ self._load_entity_maps(profile_id)
130
+
131
+ logger.info(
132
+ "Loaded adjacency cache: %d nodes, %d edges, %d entity mappings for profile %s",
133
+ len(self._adj), sum(len(v) for v in self._adj.values()) // 2,
134
+ len(self._entity_to_facts), profile_id,
135
+ )
136
+
137
+ def _get_edge_count(self, profile_id: str) -> int:
138
+ """Fast edge count for staleness check (~1ms)."""
139
+ try:
140
+ rows = self._db.execute(
141
+ "SELECT COUNT(*) as cnt FROM graph_edges WHERE profile_id = ?",
142
+ (profile_id,),
143
+ )
144
+ if rows:
145
+ return int(dict(rows[0]).get("cnt", 0))
146
+ except Exception:
147
+ pass
148
+ return 0
149
+
150
+ def _load_entity_maps(self, profile_id: str) -> None:
151
+ """Pre-load entity→fact and fact→entity maps into memory.
152
+
153
+ Eliminates per-entity and per-fact SQL in the spreading activation loop.
154
+ Same data, same algorithm — zero quality change.
155
+ """
156
+ # entity_id -> [fact_id, ...]
157
+ self._entity_to_facts: dict[str, list[str]] = defaultdict(list)
158
+ # fact_id -> [entity_id, ...]
159
+ self._fact_to_entities: dict[str, list[str]] = defaultdict(list)
160
+
161
+ try:
162
+ rows = self._db.execute(
163
+ "SELECT fact_id, canonical_entities_json FROM atomic_facts "
164
+ "WHERE profile_id = ? AND canonical_entities_json IS NOT NULL "
165
+ "AND canonical_entities_json != ''",
166
+ (profile_id,),
167
+ )
168
+ except Exception:
169
+ rows = []
170
+ for r in rows:
171
+ d = dict(r)
172
+ fid = d["fact_id"]
173
+ raw = d.get("canonical_entities_json")
174
+ if not raw:
175
+ continue
176
+ try:
177
+ eids = json.loads(raw)
178
+ for eid in eids:
179
+ self._entity_to_facts[eid].append(fid)
180
+ self._fact_to_entities[fid].append(eid)
181
+ except (ValueError, TypeError):
182
+ continue
183
+
184
+ logger.info(
185
+ "Loaded entity maps: %d entities, %d facts with entities",
186
+ len(self._entity_to_facts), len(self._fact_to_entities),
187
+ )
188
+
189
+ def invalidate_cache(self) -> None:
190
+ """Clear all caches. Call after adding/removing edges or facts."""
191
+ self._adj.clear()
192
+ self._adj_profile = ""
193
+ self._adj_edge_count = 0
194
+ self._entity_to_facts = defaultdict(list)
195
+ self._fact_to_entities = defaultdict(list)
88
196
 
89
197
  def search(self, query: str, profile_id: str, top_k: int = 50) -> list[tuple[str, float]]:
90
- """Search via entity graph with spreading activation."""
198
+ """Search via entity graph with spreading activation.
199
+
200
+ V3.3.9: Uses in-memory adjacency for O(1) edge lookups.
201
+ Same algorithm as before — zero quality change.
202
+ """
91
203
  raw_entities = extract_query_entities(query)
92
204
  if not raw_entities:
93
205
  return []
@@ -96,15 +208,24 @@ class EntityGraphChannel:
96
208
  if not canonical_ids:
97
209
  return []
98
210
 
211
+ # Load adjacency cache (no-op if already loaded for this profile)
212
+ self._ensure_adjacency(profile_id)
213
+
99
214
  # Seed activation from direct entity-linked facts
215
+ # Use in-memory map when available, fall back to SQL for mock/test DBs
100
216
  activation: dict[str, float] = defaultdict(float)
101
217
  visited_entities: set[str] = set(canonical_ids)
102
218
 
219
+ use_cache = bool(self._entity_to_facts)
103
220
  for eid in canonical_ids:
104
- for fact in self._db.get_facts_by_entity(eid, profile_id):
105
- activation[fact.fact_id] = max(activation[fact.fact_id], 1.0)
221
+ if use_cache:
222
+ for fid in self._entity_to_facts.get(eid, ()):
223
+ activation[fid] = max(activation[fid], 1.0)
224
+ else:
225
+ for fact in self._db.get_facts_by_entity(eid, profile_id):
226
+ activation[fact.fact_id] = max(activation[fact.fact_id], 1.0)
106
227
 
107
- # Spreading activation through graph edges
228
+ # Spreading activation through graph edges (all in-memory O(1) lookups)
108
229
  frontier = set(activation.keys())
109
230
  for hop in range(1, self._max_hops):
110
231
  hop_decay = self._decay ** hop
@@ -113,21 +234,43 @@ class EntityGraphChannel:
113
234
  next_frontier: set[str] = set()
114
235
 
115
236
  for fid in frontier:
116
- for edge in self._db.get_edges_for_node(fid, profile_id):
117
- neighbor = edge.target_id if edge.source_id == fid else edge.source_id
118
- propagated = activation[fid] * self._decay
119
- if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0):
120
- activation[neighbor] = propagated
121
- next_frontier.add(neighbor)
122
-
123
- # Discover new entities from activated facts -> get their facts
124
- new_eids = self._discover_entities(frontier, profile_id, visited_entities)
125
- for eid in new_eids:
126
- visited_entities.add(eid)
127
- for fact in self._db.get_facts_by_entity(eid, profile_id):
128
- if hop_decay > activation.get(fact.fact_id, 0.0):
129
- activation[fact.fact_id] = hop_decay
130
- next_frontier.add(fact.fact_id)
237
+ if use_cache:
238
+ neighbors = self._adj.get(fid, ())
239
+ for neighbor, _weight in neighbors:
240
+ propagated = activation[fid] * self._decay
241
+ if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0):
242
+ activation[neighbor] = propagated
243
+ next_frontier.add(neighbor)
244
+ else:
245
+ for edge in self._db.get_edges_for_node(fid, profile_id):
246
+ neighbor = edge.target_id if edge.source_id == fid else edge.source_id
247
+ propagated = activation[fid] * self._decay
248
+ if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0):
249
+ activation[neighbor] = propagated
250
+ next_frontier.add(neighbor)
251
+
252
+ # Discover new entities from activated facts
253
+ if use_cache:
254
+ new_eids: list[str] = []
255
+ for fid in frontier:
256
+ for eid in self._fact_to_entities.get(fid, ()):
257
+ if eid not in visited_entities:
258
+ visited_entities.add(eid)
259
+ new_eids.append(eid)
260
+ for eid in new_eids:
261
+ for fid in self._entity_to_facts.get(eid, ()):
262
+ if hop_decay > activation.get(fid, 0.0):
263
+ activation[fid] = hop_decay
264
+ next_frontier.add(fid)
265
+ else:
266
+ # SQL fallback (mock/test DBs)
267
+ new_eids_sql = self._discover_entities(frontier, profile_id, visited_entities)
268
+ for eid in new_eids_sql:
269
+ visited_entities.add(eid)
270
+ for fact in self._db.get_facts_by_entity(eid, profile_id):
271
+ if hop_decay > activation.get(fact.fact_id, 0.0):
272
+ activation[fact.fact_id] = hop_decay
273
+ next_frontier.add(fact.fact_id)
131
274
 
132
275
  frontier = next_frontier
133
276
  if not frontier: