superlocalmemory 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -71
- package/api_server.py +47 -0
- package/docs/architecture-diagram.drawio +4 -4
- package/docs/plans/2026-02-13-benchmark-suite.md +1349 -0
- package/mcp_server.py +72 -17
- package/package.json +3 -3
- package/src/agent_registry.py +34 -1
- package/src/auth_middleware.py +63 -0
- package/src/cache_manager.py +1 -1
- package/src/db_connection_manager.py +16 -2
- package/src/event_bus.py +15 -0
- package/src/graph_engine.py +113 -44
- package/src/hybrid_search.py +2 -2
- package/src/memory-reset.py +17 -3
- package/src/memory_store_v2.py +80 -7
- package/src/rate_limiter.py +87 -0
- package/src/trust_scorer.py +38 -6
- package/src/webhook_dispatcher.py +17 -0
- package/ui_server.py +55 -1
package/mcp_server.py
CHANGED
|
@@ -28,7 +28,9 @@ Usage:
|
|
|
28
28
|
from mcp.server.fastmcp import FastMCP
|
|
29
29
|
from mcp.types import ToolAnnotations
|
|
30
30
|
import sys
|
|
31
|
+
import os
|
|
31
32
|
import json
|
|
33
|
+
import re
|
|
32
34
|
from pathlib import Path
|
|
33
35
|
from typing import Optional
|
|
34
36
|
|
|
@@ -54,6 +56,25 @@ try:
|
|
|
54
56
|
except ImportError:
|
|
55
57
|
PROVENANCE_AVAILABLE = False
|
|
56
58
|
|
|
59
|
+
# Trust Scorer (v2.6 — enforcement)
|
|
60
|
+
try:
|
|
61
|
+
from trust_scorer import TrustScorer
|
|
62
|
+
TRUST_AVAILABLE = True
|
|
63
|
+
except ImportError:
|
|
64
|
+
TRUST_AVAILABLE = False
|
|
65
|
+
|
|
66
|
+
def _sanitize_error(error: Exception) -> str:
|
|
67
|
+
"""Strip internal paths and structure from error messages."""
|
|
68
|
+
msg = str(error)
|
|
69
|
+
# Strip file paths containing claude-memory
|
|
70
|
+
msg = re.sub(r'/[\w./-]*claude-memory[\w./-]*', '[internal-path]', msg)
|
|
71
|
+
# Strip file paths containing SuperLocalMemory
|
|
72
|
+
msg = re.sub(r'/[\w./-]*SuperLocalMemory[\w./-]*', '[internal-path]', msg)
|
|
73
|
+
# Strip SQLite table names from error messages
|
|
74
|
+
msg = re.sub(r'table\s+\w+', 'table [redacted]', msg)
|
|
75
|
+
return msg
|
|
76
|
+
|
|
77
|
+
|
|
57
78
|
# Parse command line arguments early (needed for port in constructor)
|
|
58
79
|
import argparse as _argparse
|
|
59
80
|
_parser = _argparse.ArgumentParser(add_help=False)
|
|
@@ -131,6 +152,19 @@ def get_provenance_tracker():
|
|
|
131
152
|
return _provenance_tracker
|
|
132
153
|
|
|
133
154
|
|
|
155
|
+
_trust_scorer = None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_trust_scorer():
|
|
159
|
+
"""Get shared TrustScorer singleton (v2.6+). Returns None if unavailable."""
|
|
160
|
+
global _trust_scorer
|
|
161
|
+
if not TRUST_AVAILABLE:
|
|
162
|
+
return None
|
|
163
|
+
if _trust_scorer is None:
|
|
164
|
+
_trust_scorer = TrustScorer.get_instance(DB_PATH)
|
|
165
|
+
return _trust_scorer
|
|
166
|
+
|
|
167
|
+
|
|
134
168
|
def _register_mcp_agent(agent_name: str = "mcp-client"):
|
|
135
169
|
"""Register the calling MCP agent and record activity. Non-blocking."""
|
|
136
170
|
registry = get_agent_registry()
|
|
@@ -188,6 +222,18 @@ async def remember(
|
|
|
188
222
|
# Register MCP agent (v2.5 — agent tracking)
|
|
189
223
|
_register_mcp_agent()
|
|
190
224
|
|
|
225
|
+
# Trust enforcement (v2.6) — block untrusted agents from writing
|
|
226
|
+
try:
|
|
227
|
+
trust = get_trust_scorer()
|
|
228
|
+
if trust and not trust.check_trust("mcp:mcp-client", "write"):
|
|
229
|
+
return {
|
|
230
|
+
"success": False,
|
|
231
|
+
"error": "Agent trust score too low for write operations",
|
|
232
|
+
"message": "Trust enforcement blocked this operation"
|
|
233
|
+
}
|
|
234
|
+
except Exception:
|
|
235
|
+
pass # Trust check failure should not block operations
|
|
236
|
+
|
|
191
237
|
# Use existing MemoryStoreV2 class (no duplicate logic)
|
|
192
238
|
store = get_store()
|
|
193
239
|
|
|
@@ -228,7 +274,7 @@ async def remember(
|
|
|
228
274
|
except Exception as e:
|
|
229
275
|
return {
|
|
230
276
|
"success": False,
|
|
231
|
-
"error":
|
|
277
|
+
"error": _sanitize_error(e),
|
|
232
278
|
"message": "Failed to save memory"
|
|
233
279
|
}
|
|
234
280
|
|
|
@@ -277,8 +323,17 @@ async def recall(
|
|
|
277
323
|
# Use existing MemoryStoreV2 class
|
|
278
324
|
store = get_store()
|
|
279
325
|
|
|
280
|
-
#
|
|
281
|
-
|
|
326
|
+
# Hybrid search (opt-in via env var, v2.6)
|
|
327
|
+
_use_hybrid = os.environ.get('SLM_HYBRID_SEARCH', 'false').lower() == 'true'
|
|
328
|
+
if _use_hybrid:
|
|
329
|
+
try:
|
|
330
|
+
from hybrid_search import HybridSearchEngine
|
|
331
|
+
engine = HybridSearchEngine(store=store)
|
|
332
|
+
results = engine.search(query, limit=limit)
|
|
333
|
+
except (ImportError, Exception):
|
|
334
|
+
results = store.search(query, limit=limit)
|
|
335
|
+
else:
|
|
336
|
+
results = store.search(query, limit=limit)
|
|
282
337
|
|
|
283
338
|
# Filter by minimum score
|
|
284
339
|
filtered_results = [
|
|
@@ -297,7 +352,7 @@ async def recall(
|
|
|
297
352
|
except Exception as e:
|
|
298
353
|
return {
|
|
299
354
|
"success": False,
|
|
300
|
-
"error":
|
|
355
|
+
"error": _sanitize_error(e),
|
|
301
356
|
"message": "Failed to search memories",
|
|
302
357
|
"results": [],
|
|
303
358
|
"count": 0
|
|
@@ -338,7 +393,7 @@ async def list_recent(limit: int = 10) -> dict:
|
|
|
338
393
|
except Exception as e:
|
|
339
394
|
return {
|
|
340
395
|
"success": False,
|
|
341
|
-
"error":
|
|
396
|
+
"error": _sanitize_error(e),
|
|
342
397
|
"message": "Failed to list memories",
|
|
343
398
|
"memories": [],
|
|
344
399
|
"count": 0
|
|
@@ -377,7 +432,7 @@ async def get_status() -> dict:
|
|
|
377
432
|
except Exception as e:
|
|
378
433
|
return {
|
|
379
434
|
"success": False,
|
|
380
|
-
"error":
|
|
435
|
+
"error": _sanitize_error(e),
|
|
381
436
|
"message": "Failed to get status"
|
|
382
437
|
}
|
|
383
438
|
|
|
@@ -418,7 +473,7 @@ async def build_graph() -> dict:
|
|
|
418
473
|
except Exception as e:
|
|
419
474
|
return {
|
|
420
475
|
"success": False,
|
|
421
|
-
"error":
|
|
476
|
+
"error": _sanitize_error(e),
|
|
422
477
|
"message": "Failed to build graph"
|
|
423
478
|
}
|
|
424
479
|
|
|
@@ -486,7 +541,7 @@ async def switch_profile(name: str) -> dict:
|
|
|
486
541
|
except Exception as e:
|
|
487
542
|
return {
|
|
488
543
|
"success": False,
|
|
489
|
-
"error":
|
|
544
|
+
"error": _sanitize_error(e),
|
|
490
545
|
"message": "Failed to switch profile"
|
|
491
546
|
}
|
|
492
547
|
|
|
@@ -531,7 +586,7 @@ async def backup_status() -> dict:
|
|
|
531
586
|
except Exception as e:
|
|
532
587
|
return {
|
|
533
588
|
"success": False,
|
|
534
|
-
"error":
|
|
589
|
+
"error": _sanitize_error(e),
|
|
535
590
|
"message": "Failed to get backup status"
|
|
536
591
|
}
|
|
537
592
|
|
|
@@ -583,7 +638,7 @@ async def search(query: str) -> dict:
|
|
|
583
638
|
return {"results": results}
|
|
584
639
|
|
|
585
640
|
except Exception as e:
|
|
586
|
-
return {"results": [], "error":
|
|
641
|
+
return {"results": [], "error": _sanitize_error(e)}
|
|
587
642
|
|
|
588
643
|
|
|
589
644
|
@mcp.tool(annotations=ToolAnnotations(
|
|
@@ -635,7 +690,7 @@ async def fetch(id: str) -> dict:
|
|
|
635
690
|
}
|
|
636
691
|
|
|
637
692
|
except Exception as e:
|
|
638
|
-
raise ValueError(f"Failed to fetch memory {id}: {
|
|
693
|
+
raise ValueError(f"Failed to fetch memory {id}: {_sanitize_error(e)}")
|
|
639
694
|
|
|
640
695
|
|
|
641
696
|
# ============================================================================
|
|
@@ -654,7 +709,7 @@ async def get_recent_memories_resource(limit: str) -> str:
|
|
|
654
709
|
memories = store.list_all(limit=int(limit))
|
|
655
710
|
return json.dumps(memories, indent=2)
|
|
656
711
|
except Exception as e:
|
|
657
|
-
return json.dumps({"error":
|
|
712
|
+
return json.dumps({"error": _sanitize_error(e)}, indent=2)
|
|
658
713
|
|
|
659
714
|
|
|
660
715
|
@mcp.resource("memory://stats")
|
|
@@ -669,7 +724,7 @@ async def get_stats_resource() -> str:
|
|
|
669
724
|
stats = store.get_stats()
|
|
670
725
|
return json.dumps(stats, indent=2)
|
|
671
726
|
except Exception as e:
|
|
672
|
-
return json.dumps({"error":
|
|
727
|
+
return json.dumps({"error": _sanitize_error(e)}, indent=2)
|
|
673
728
|
|
|
674
729
|
|
|
675
730
|
@mcp.resource("memory://graph/clusters")
|
|
@@ -685,7 +740,7 @@ async def get_clusters_resource() -> str:
|
|
|
685
740
|
clusters = stats.get('clusters', [])
|
|
686
741
|
return json.dumps(clusters, indent=2)
|
|
687
742
|
except Exception as e:
|
|
688
|
-
return json.dumps({"error":
|
|
743
|
+
return json.dumps({"error": _sanitize_error(e)}, indent=2)
|
|
689
744
|
|
|
690
745
|
|
|
691
746
|
@mcp.resource("memory://patterns/identity")
|
|
@@ -700,7 +755,7 @@ async def get_coding_identity_resource() -> str:
|
|
|
700
755
|
patterns = learner.get_identity_context(min_confidence=0.5)
|
|
701
756
|
return json.dumps(patterns, indent=2)
|
|
702
757
|
except Exception as e:
|
|
703
|
-
return json.dumps({"error":
|
|
758
|
+
return json.dumps({"error": _sanitize_error(e)}, indent=2)
|
|
704
759
|
|
|
705
760
|
|
|
706
761
|
# ============================================================================
|
|
@@ -742,7 +797,7 @@ async def coding_identity_prompt() -> str:
|
|
|
742
797
|
return prompt
|
|
743
798
|
|
|
744
799
|
except Exception as e:
|
|
745
|
-
return f"# Coding Identity\n\nError loading patterns: {
|
|
800
|
+
return f"# Coding Identity\n\nError loading patterns: {_sanitize_error(e)}"
|
|
746
801
|
|
|
747
802
|
|
|
748
803
|
@mcp.prompt()
|
|
@@ -780,7 +835,7 @@ async def project_context_prompt(project_name: str) -> str:
|
|
|
780
835
|
return prompt
|
|
781
836
|
|
|
782
837
|
except Exception as e:
|
|
783
|
-
return f"# Project Context: {project_name}\n\nError loading context: {
|
|
838
|
+
return f"# Project Context: {project_name}\n\nError loading context: {_sanitize_error(e)}"
|
|
784
839
|
|
|
785
840
|
|
|
786
841
|
# ============================================================================
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "2.
|
|
4
|
-
"description": "Your AI Finally Remembers You - Local-first intelligent memory system for AI assistants. Works with Claude, Cursor, Windsurf, VS Code/Copilot, Codex, and
|
|
3
|
+
"version": "2.6.0",
|
|
4
|
+
"description": "Your AI Finally Remembers You - Local-first intelligent memory system for AI assistants. Works with Claude, Cursor, Windsurf, VS Code/Copilot, Codex, and 17+ AI tools. 100% local, zero cloud dependencies.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
|
7
7
|
"claude-ai",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"type": "git",
|
|
35
35
|
"url": "https://github.com/varun369/SuperLocalMemoryV2.git"
|
|
36
36
|
},
|
|
37
|
-
"homepage": "https://
|
|
37
|
+
"homepage": "https://superlocalmemory.com",
|
|
38
38
|
"bugs": {
|
|
39
39
|
"url": "https://github.com/varun369/SuperLocalMemoryV2/issues"
|
|
40
40
|
},
|
package/src/agent_registry.py
CHANGED
|
@@ -31,7 +31,7 @@ Protocols:
|
|
|
31
31
|
cli — Command-line interface (slm command, bin/ scripts)
|
|
32
32
|
rest — REST API (api_server.py)
|
|
33
33
|
python — Direct Python import
|
|
34
|
-
a2a — Agent-to-Agent Protocol (v2.
|
|
34
|
+
a2a — Agent-to-Agent Protocol (v2.7+)
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
import json
|
|
@@ -323,6 +323,39 @@ class AgentRegistry:
|
|
|
323
323
|
logger.error("Failed to list agents: %s", e)
|
|
324
324
|
return []
|
|
325
325
|
|
|
326
|
+
def list_active_agents(self, timeout_minutes: int = 5) -> List[dict]:
|
|
327
|
+
"""
|
|
328
|
+
List only active agents (seen within timeout_minutes).
|
|
329
|
+
|
|
330
|
+
Used by dashboard to filter out ghost/disconnected agents.
|
|
331
|
+
Default: agents seen within last 5 minutes are considered active.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
timeout_minutes: Consider agents active if seen within this many minutes
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
List of active agent dicts
|
|
338
|
+
"""
|
|
339
|
+
try:
|
|
340
|
+
from db_connection_manager import DbConnectionManager
|
|
341
|
+
mgr = DbConnectionManager.get_instance(self.db_path)
|
|
342
|
+
|
|
343
|
+
with mgr.read_connection() as conn:
|
|
344
|
+
cursor = conn.cursor()
|
|
345
|
+
cursor.execute("""
|
|
346
|
+
SELECT agent_id, agent_name, protocol, first_seen, last_seen,
|
|
347
|
+
memories_written, memories_recalled, trust_score, metadata
|
|
348
|
+
FROM agent_registry
|
|
349
|
+
WHERE last_seen >= datetime('now', '-' || ? || ' minutes')
|
|
350
|
+
ORDER BY last_seen DESC
|
|
351
|
+
""", (timeout_minutes,))
|
|
352
|
+
rows = cursor.fetchall()
|
|
353
|
+
|
|
354
|
+
return [self._row_to_dict(row) for row in rows]
|
|
355
|
+
except Exception as e:
|
|
356
|
+
logger.error("Failed to list active agents: %s", e)
|
|
357
|
+
return []
|
|
358
|
+
|
|
326
359
|
def get_stats(self) -> dict:
|
|
327
360
|
"""Get agent registry statistics."""
|
|
328
361
|
try:
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SuperLocalMemory V2 - Optional API Key Authentication
|
|
4
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
5
|
+
Licensed under MIT License
|
|
6
|
+
|
|
7
|
+
Opt-in API key authentication for dashboard and API endpoints.
|
|
8
|
+
When ~/.claude-memory/api_key file exists, write endpoints require
|
|
9
|
+
X-SLM-API-Key header. Read endpoints remain open for backward compatibility.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import hashlib
|
|
14
|
+
import logging
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
21
|
+
API_KEY_FILE = MEMORY_DIR / "api_key"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_api_key_hash() -> Optional[str]:
|
|
25
|
+
"""Load API key hash from file. Returns None if auth is not configured."""
|
|
26
|
+
if not API_KEY_FILE.exists():
|
|
27
|
+
return None
|
|
28
|
+
try:
|
|
29
|
+
key = API_KEY_FILE.read_text().strip()
|
|
30
|
+
if not key:
|
|
31
|
+
return None
|
|
32
|
+
return hashlib.sha256(key.encode()).hexdigest()
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.warning("Failed to load API key: %s", e)
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def check_api_key(request_headers: dict, is_write: bool = False) -> bool:
|
|
39
|
+
"""
|
|
40
|
+
Check if request is authorized.
|
|
41
|
+
|
|
42
|
+
Returns True if:
|
|
43
|
+
- No API key file exists (auth not configured — backward compatible)
|
|
44
|
+
- Request is a read operation (reads always allowed)
|
|
45
|
+
- Request has valid X-SLM-API-Key header matching the key file
|
|
46
|
+
"""
|
|
47
|
+
key_hash = _load_api_key_hash()
|
|
48
|
+
|
|
49
|
+
# No key file = auth not configured = allow all (backward compat)
|
|
50
|
+
if key_hash is None:
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
# Read operations always allowed
|
|
54
|
+
if not is_write:
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
# Write operations require valid key
|
|
58
|
+
provided_key = request_headers.get("x-slm-api-key", "")
|
|
59
|
+
if not provided_key:
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
provided_hash = hashlib.sha256(provided_key.encode()).hexdigest()
|
|
63
|
+
return provided_hash == key_hash
|
package/src/cache_manager.py
CHANGED
|
@@ -73,6 +73,7 @@ logger = logging.getLogger("superlocalmemory.db")
|
|
|
73
73
|
DEFAULT_BUSY_TIMEOUT_MS = 5000
|
|
74
74
|
DEFAULT_READ_POOL_SIZE = 4
|
|
75
75
|
WRITE_QUEUE_SENTINEL = None # Signals the writer thread to stop
|
|
76
|
+
MAX_READ_CONNECTIONS = 50 # Maximum concurrent read connections
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
class DbConnectionManager:
|
|
@@ -155,7 +156,7 @@ class DbConnectionManager:
|
|
|
155
156
|
self._read_connections_lock = threading.Lock()
|
|
156
157
|
|
|
157
158
|
# Write queue and dedicated writer thread
|
|
158
|
-
self._write_queue: Queue = Queue()
|
|
159
|
+
self._write_queue: Queue = Queue(maxsize=1000)
|
|
159
160
|
self._writer_thread = threading.Thread(
|
|
160
161
|
target=self._writer_loop,
|
|
161
162
|
name="slm-db-writer",
|
|
@@ -190,6 +191,8 @@ class DbConnectionManager:
|
|
|
190
191
|
conn.execute(f"PRAGMA busy_timeout={DEFAULT_BUSY_TIMEOUT_MS}")
|
|
191
192
|
# Sync mode NORMAL is safe with WAL and faster than FULL
|
|
192
193
|
conn.execute("PRAGMA synchronous=NORMAL")
|
|
194
|
+
# Incremental auto-vacuum reclaims space without full rebuild (v2.6)
|
|
195
|
+
conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
|
|
193
196
|
conn.close()
|
|
194
197
|
except Exception:
|
|
195
198
|
conn.close()
|
|
@@ -252,7 +255,18 @@ class DbConnectionManager:
|
|
|
252
255
|
self._remove_from_pool(conn)
|
|
253
256
|
conn = None
|
|
254
257
|
|
|
255
|
-
# Create new read connection for this thread
|
|
258
|
+
# Create new read connection for this thread (with pool limit)
|
|
259
|
+
with self._read_connections_lock:
|
|
260
|
+
if len(self._read_connections) >= MAX_READ_CONNECTIONS:
|
|
261
|
+
logger.warning(
|
|
262
|
+
"Read connection pool at capacity (%d). Reusing oldest connection.",
|
|
263
|
+
MAX_READ_CONNECTIONS
|
|
264
|
+
)
|
|
265
|
+
# Reuse the least recently used connection
|
|
266
|
+
conn = self._read_connections[0]
|
|
267
|
+
self._local.read_conn = conn
|
|
268
|
+
return conn
|
|
269
|
+
|
|
256
270
|
conn = self._create_connection(readonly=True)
|
|
257
271
|
self._local.read_conn = conn
|
|
258
272
|
|
package/src/event_bus.py
CHANGED
|
@@ -131,6 +131,10 @@ class EventBus:
|
|
|
131
131
|
self._listeners: List[Callable[[dict], None]] = []
|
|
132
132
|
self._listeners_lock = threading.Lock()
|
|
133
133
|
|
|
134
|
+
# Auto-prune tracking: lightweight heuristic trigger
|
|
135
|
+
self._write_count = 0
|
|
136
|
+
self._last_prune = datetime.now()
|
|
137
|
+
|
|
134
138
|
# Initialize schema
|
|
135
139
|
self._init_schema()
|
|
136
140
|
|
|
@@ -266,6 +270,17 @@ class EventBus:
|
|
|
266
270
|
self._notify_listeners(event)
|
|
267
271
|
|
|
268
272
|
logger.debug("Event emitted: type=%s, id=%s, memory_id=%s", event_type, event_id, memory_id)
|
|
273
|
+
|
|
274
|
+
# Auto-prune every 100 events or every 24 hours, whichever comes first
|
|
275
|
+
self._write_count += 1
|
|
276
|
+
if self._write_count >= 100 or (datetime.now() - self._last_prune).total_seconds() > 86400:
|
|
277
|
+
try:
|
|
278
|
+
self.prune_events()
|
|
279
|
+
self._write_count = 0
|
|
280
|
+
self._last_prune = datetime.now()
|
|
281
|
+
except Exception:
|
|
282
|
+
pass # Don't let prune failures block event emission
|
|
283
|
+
|
|
269
284
|
return event_id
|
|
270
285
|
|
|
271
286
|
def _persist_event(self, event: dict) -> Optional[int]:
|
package/src/graph_engine.py
CHANGED
|
@@ -15,12 +15,12 @@ Implements GraphRAG with Leiden community detection to:
|
|
|
15
15
|
All processing is local - no external APIs.
|
|
16
16
|
|
|
17
17
|
LIMITS:
|
|
18
|
-
- MAX_MEMORIES_FOR_GRAPH:
|
|
18
|
+
- MAX_MEMORIES_FOR_GRAPH: 10000 (prevents O(n²) explosion)
|
|
19
19
|
- For larger datasets, use incremental updates
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
# SECURITY: Graph build limits to prevent resource exhaustion
|
|
23
|
-
MAX_MEMORIES_FOR_GRAPH =
|
|
23
|
+
MAX_MEMORIES_FOR_GRAPH = 10000
|
|
24
24
|
|
|
25
25
|
import sqlite3
|
|
26
26
|
import json
|
|
@@ -157,43 +157,82 @@ class EdgeBuilder:
|
|
|
157
157
|
logger.warning("Need at least 2 memories to build edges")
|
|
158
158
|
return 0
|
|
159
159
|
|
|
160
|
-
#
|
|
161
|
-
|
|
160
|
+
# Try HNSW-accelerated edge building first (O(n log n))
|
|
161
|
+
use_hnsw = False
|
|
162
|
+
try:
|
|
163
|
+
from hnsw_index import HNSWIndex
|
|
164
|
+
if len(memory_ids) >= 50: # HNSW overhead not worth it for small sets
|
|
165
|
+
use_hnsw = True
|
|
166
|
+
except ImportError:
|
|
167
|
+
pass
|
|
162
168
|
|
|
163
169
|
edges_added = 0
|
|
164
170
|
conn = sqlite3.connect(self.db_path)
|
|
165
171
|
cursor = conn.cursor()
|
|
166
172
|
|
|
167
173
|
try:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
174
|
+
if use_hnsw:
|
|
175
|
+
logger.info("Using HNSW-accelerated edge building for %d memories", len(memory_ids))
|
|
176
|
+
try:
|
|
177
|
+
dim = vectors.shape[1]
|
|
178
|
+
hnsw = HNSWIndex(dimension=dim, max_elements=len(memory_ids))
|
|
179
|
+
hnsw.build(vectors, memory_ids)
|
|
180
|
+
|
|
181
|
+
for i in range(len(memory_ids)):
|
|
182
|
+
neighbors = hnsw.search(vectors[i], k=min(20, len(memory_ids) - 1))
|
|
183
|
+
for neighbor_id, similarity in neighbors:
|
|
184
|
+
if neighbor_id == memory_ids[i]:
|
|
185
|
+
continue # Skip self
|
|
186
|
+
# Only process each pair once (lower ID first)
|
|
187
|
+
if memory_ids[i] > neighbor_id:
|
|
188
|
+
continue
|
|
189
|
+
if similarity >= self.min_similarity:
|
|
190
|
+
# Find indices for entity lookup
|
|
191
|
+
j = memory_ids.index(neighbor_id)
|
|
192
|
+
entities_i = set(entities_list[i])
|
|
193
|
+
entities_j = set(entities_list[j])
|
|
194
|
+
shared = list(entities_i & entities_j)
|
|
195
|
+
rel_type = self._classify_relationship(similarity, shared)
|
|
196
|
+
|
|
197
|
+
cursor.execute('''
|
|
198
|
+
INSERT OR REPLACE INTO graph_edges
|
|
199
|
+
(source_memory_id, target_memory_id, relationship_type,
|
|
200
|
+
weight, shared_entities, similarity_score)
|
|
201
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
202
|
+
''', (
|
|
203
|
+
memory_ids[i], neighbor_id, rel_type,
|
|
204
|
+
float(similarity), json.dumps(shared), float(similarity)
|
|
205
|
+
))
|
|
206
|
+
edges_added += 1
|
|
207
|
+
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.warning("HNSW edge building failed, falling back to O(n²): %s", e)
|
|
210
|
+
use_hnsw = False # Fall through to O(n²) below
|
|
211
|
+
|
|
212
|
+
if not use_hnsw:
|
|
213
|
+
# Fallback: O(n²) pairwise cosine similarity
|
|
214
|
+
similarity_matrix = cosine_similarity(vectors)
|
|
215
|
+
|
|
216
|
+
for i in range(len(memory_ids)):
|
|
217
|
+
for j in range(i + 1, len(memory_ids)):
|
|
218
|
+
sim = similarity_matrix[i, j]
|
|
219
|
+
|
|
220
|
+
if sim >= self.min_similarity:
|
|
221
|
+
entities_i = set(entities_list[i])
|
|
222
|
+
entities_j = set(entities_list[j])
|
|
223
|
+
shared = list(entities_i & entities_j)
|
|
224
|
+
rel_type = self._classify_relationship(sim, shared)
|
|
225
|
+
|
|
226
|
+
cursor.execute('''
|
|
227
|
+
INSERT OR REPLACE INTO graph_edges
|
|
228
|
+
(source_memory_id, target_memory_id, relationship_type,
|
|
229
|
+
weight, shared_entities, similarity_score)
|
|
230
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
231
|
+
''', (
|
|
232
|
+
memory_ids[i], memory_ids[j], rel_type,
|
|
233
|
+
float(sim), json.dumps(shared), float(sim)
|
|
234
|
+
))
|
|
235
|
+
edges_added += 1
|
|
197
236
|
|
|
198
237
|
conn.commit()
|
|
199
238
|
logger.info(f"Created {edges_added} edges")
|
|
@@ -829,7 +868,7 @@ class GraphEngine:
|
|
|
829
868
|
Dictionary with build statistics
|
|
830
869
|
|
|
831
870
|
Raises:
|
|
832
|
-
ValueError: If too many memories (>
|
|
871
|
+
ValueError: If too many memories (>10000) for safe processing
|
|
833
872
|
"""
|
|
834
873
|
start_time = time.time()
|
|
835
874
|
logger.info("Starting full graph build...")
|
|
@@ -882,17 +921,47 @@ class GraphEngine:
|
|
|
882
921
|
'fix': "Add more memories: superlocalmemoryv2:remember 'Your content here'"
|
|
883
922
|
}
|
|
884
923
|
|
|
885
|
-
#
|
|
924
|
+
# SCALABILITY: Intelligent sampling for large datasets (v2.6)
|
|
886
925
|
if len(memories) > MAX_MEMORIES_FOR_GRAPH:
|
|
887
|
-
logger.
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
926
|
+
logger.warning(
|
|
927
|
+
"Memory count (%d) exceeds graph cap (%d). Using intelligent sampling.",
|
|
928
|
+
len(memories), MAX_MEMORIES_FOR_GRAPH
|
|
929
|
+
)
|
|
930
|
+
# Sample: 60% most recent + 40% highest importance (with overlap dedup)
|
|
931
|
+
recent_count = int(MAX_MEMORIES_FOR_GRAPH * 0.6)
|
|
932
|
+
important_count = int(MAX_MEMORIES_FOR_GRAPH * 0.4)
|
|
933
|
+
|
|
934
|
+
recent_memories = cursor.execute('''
|
|
935
|
+
SELECT id, content, summary FROM memories
|
|
936
|
+
WHERE profile = ?
|
|
937
|
+
ORDER BY created_at DESC
|
|
938
|
+
LIMIT ?
|
|
939
|
+
''', (active_profile, recent_count)).fetchall()
|
|
940
|
+
|
|
941
|
+
important_memories = cursor.execute('''
|
|
942
|
+
SELECT id, content, summary FROM memories
|
|
943
|
+
WHERE profile = ?
|
|
944
|
+
ORDER BY importance DESC, access_count DESC
|
|
945
|
+
LIMIT ?
|
|
946
|
+
''', (active_profile, important_count)).fetchall()
|
|
947
|
+
|
|
948
|
+
# Deduplicate by ID, preserving order
|
|
949
|
+
seen_ids = set()
|
|
950
|
+
memories = []
|
|
951
|
+
for m in recent_memories + important_memories:
|
|
952
|
+
if m[0] not in seen_ids:
|
|
953
|
+
seen_ids.add(m[0])
|
|
954
|
+
memories.append(m)
|
|
955
|
+
memories = memories[:MAX_MEMORIES_FOR_GRAPH]
|
|
956
|
+
logger.info("Sampled %d memories for graph build", len(memories))
|
|
957
|
+
|
|
958
|
+
elif len(memories) > MAX_MEMORIES_FOR_GRAPH * 0.8:
|
|
959
|
+
logger.warning(
|
|
960
|
+
"Approaching graph cap: %d/%d memories (%.0f%%). "
|
|
961
|
+
"Consider running memory compression.",
|
|
962
|
+
len(memories), MAX_MEMORIES_FOR_GRAPH,
|
|
963
|
+
len(memories) / MAX_MEMORIES_FOR_GRAPH * 100
|
|
964
|
+
)
|
|
896
965
|
|
|
897
966
|
# Clear existing graph data for this profile's memories
|
|
898
967
|
profile_memory_ids = [m[0] for m in memories]
|
package/src/hybrid_search.py
CHANGED
|
@@ -150,7 +150,7 @@ class HybridSearchEngine:
|
|
|
150
150
|
try:
|
|
151
151
|
tags = json.loads(row[3])
|
|
152
152
|
text_parts.extend(tags)
|
|
153
|
-
except:
|
|
153
|
+
except Exception:
|
|
154
154
|
pass
|
|
155
155
|
|
|
156
156
|
doc_text = ' '.join(text_parts)
|
|
@@ -334,7 +334,7 @@ class HybridSearchEngine:
|
|
|
334
334
|
new_score = current_score * similarity * (0.7 ** depth)
|
|
335
335
|
queue.append((rel_id, new_score, depth + 1))
|
|
336
336
|
|
|
337
|
-
except:
|
|
337
|
+
except Exception:
|
|
338
338
|
# Graph operation failed - skip
|
|
339
339
|
continue
|
|
340
340
|
|