superlocalmemory 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/ATTRIBUTION.md +140 -0
  2. package/CHANGELOG.md +1749 -0
  3. package/LICENSE +21 -0
  4. package/README.md +600 -0
  5. package/bin/aider-smart +72 -0
  6. package/bin/slm +202 -0
  7. package/bin/slm-npm +73 -0
  8. package/bin/slm.bat +195 -0
  9. package/bin/slm.cmd +10 -0
  10. package/bin/superlocalmemoryv2:list +3 -0
  11. package/bin/superlocalmemoryv2:profile +3 -0
  12. package/bin/superlocalmemoryv2:recall +3 -0
  13. package/bin/superlocalmemoryv2:remember +3 -0
  14. package/bin/superlocalmemoryv2:reset +3 -0
  15. package/bin/superlocalmemoryv2:status +3 -0
  16. package/completions/slm.bash +58 -0
  17. package/completions/slm.zsh +76 -0
  18. package/configs/antigravity-mcp.json +13 -0
  19. package/configs/chatgpt-desktop-mcp.json +7 -0
  20. package/configs/claude-desktop-mcp.json +15 -0
  21. package/configs/codex-mcp.toml +13 -0
  22. package/configs/cody-commands.json +29 -0
  23. package/configs/continue-mcp.yaml +14 -0
  24. package/configs/continue-skills.yaml +26 -0
  25. package/configs/cursor-mcp.json +15 -0
  26. package/configs/gemini-cli-mcp.json +11 -0
  27. package/configs/jetbrains-mcp.json +11 -0
  28. package/configs/opencode-mcp.json +12 -0
  29. package/configs/perplexity-mcp.json +9 -0
  30. package/configs/vscode-copilot-mcp.json +12 -0
  31. package/configs/windsurf-mcp.json +16 -0
  32. package/configs/zed-mcp.json +12 -0
  33. package/docs/ARCHITECTURE.md +877 -0
  34. package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
  35. package/docs/COMPETITIVE-ANALYSIS.md +210 -0
  36. package/docs/COMPRESSION-README.md +390 -0
  37. package/docs/GRAPH-ENGINE.md +503 -0
  38. package/docs/MCP-MANUAL-SETUP.md +720 -0
  39. package/docs/MCP-TROUBLESHOOTING.md +787 -0
  40. package/docs/PATTERN-LEARNING.md +363 -0
  41. package/docs/PROFILES-GUIDE.md +453 -0
  42. package/docs/RESET-GUIDE.md +353 -0
  43. package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
  44. package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
  45. package/docs/UI-SERVER.md +254 -0
  46. package/docs/UNIVERSAL-INTEGRATION.md +432 -0
  47. package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
  48. package/docs/WINDOWS-INSTALL-README.txt +34 -0
  49. package/docs/WINDOWS-POST-INSTALL.txt +45 -0
  50. package/docs/example_graph_usage.py +148 -0
  51. package/hooks/memory-list-skill.js +130 -0
  52. package/hooks/memory-profile-skill.js +284 -0
  53. package/hooks/memory-recall-skill.js +109 -0
  54. package/hooks/memory-remember-skill.js +127 -0
  55. package/hooks/memory-reset-skill.js +274 -0
  56. package/install-skills.sh +436 -0
  57. package/install.ps1 +417 -0
  58. package/install.sh +755 -0
  59. package/mcp_server.py +585 -0
  60. package/package.json +94 -0
  61. package/requirements-core.txt +24 -0
  62. package/requirements.txt +10 -0
  63. package/scripts/postinstall.js +126 -0
  64. package/scripts/preuninstall.js +57 -0
  65. package/skills/slm-build-graph/SKILL.md +423 -0
  66. package/skills/slm-list-recent/SKILL.md +348 -0
  67. package/skills/slm-recall/SKILL.md +325 -0
  68. package/skills/slm-remember/SKILL.md +194 -0
  69. package/skills/slm-status/SKILL.md +363 -0
  70. package/skills/slm-switch-profile/SKILL.md +442 -0
  71. package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
  72. package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
  73. package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
  74. package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
  75. package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
  76. package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
  77. package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
  78. package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
  79. package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
  80. package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
  81. package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
  82. package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
  83. package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
  84. package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
  85. package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
  86. package/src/cache_manager.py +520 -0
  87. package/src/embedding_engine.py +671 -0
  88. package/src/graph_engine.py +970 -0
  89. package/src/hnsw_index.py +626 -0
  90. package/src/hybrid_search.py +693 -0
  91. package/src/memory-profiles.py +518 -0
  92. package/src/memory-reset.py +485 -0
  93. package/src/memory_compression.py +999 -0
  94. package/src/memory_store_v2.py +1088 -0
  95. package/src/migrate_v1_to_v2.py +638 -0
  96. package/src/pattern_learner.py +898 -0
  97. package/src/query_optimizer.py +513 -0
  98. package/src/search_engine_v2.py +403 -0
  99. package/src/setup_validator.py +479 -0
  100. package/src/tree_manager.py +720 -0
@@ -0,0 +1,970 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ GraphEngine - Knowledge Graph Clustering for SuperLocalMemory V2
4
+
5
+ Copyright (c) 2026 Varun Pratap Bhardwaj
6
+ Licensed under MIT License
7
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
8
+
9
+ Implements GraphRAG with Leiden community detection to:
10
+ - Extract entities from memories (TF-IDF keyword extraction)
11
+ - Build similarity-based edges between memories
12
+ - Detect thematic clusters using Leiden algorithm
13
+ - Enable graph traversal for related memory discovery
14
+
15
+ All processing is local - no external APIs.
16
+
17
+ LIMITS:
18
+ - MAX_MEMORIES_FOR_GRAPH: 5000 (prevents O(n²) explosion)
19
+ - For larger datasets, use incremental updates
20
+ """
21
+
22
+ # SECURITY: Graph build limits to prevent resource exhaustion
23
+ MAX_MEMORIES_FOR_GRAPH = 5000
24
+
25
+ import sqlite3
26
+ import json
27
+ import time
28
+ import logging
29
+ from datetime import datetime
30
+ from pathlib import Path
31
+ from typing import List, Dict, Optional, Tuple, Set
32
+ from collections import Counter
33
+
34
+ # Core dependencies
35
+ try:
36
+ from sklearn.feature_extraction.text import TfidfVectorizer
37
+ from sklearn.metrics.pairwise import cosine_similarity
38
+ import numpy as np
39
+ SKLEARN_AVAILABLE = True
40
+ except ImportError:
41
+ SKLEARN_AVAILABLE = False
42
+ raise ImportError("scikit-learn is required. Install: pip install scikit-learn")
43
+
44
+ # Graph dependencies - lazy import to avoid conflicts with compression module
45
+ IGRAPH_AVAILABLE = False
46
+ try:
47
+ # Import only when needed to avoid module conflicts
48
+ import importlib
49
+ ig_module = importlib.import_module('igraph')
50
+ leiden_module = importlib.import_module('leidenalg')
51
+ IGRAPH_AVAILABLE = True
52
+ except ImportError:
53
+ pass # Will raise error when building clusters if not available
54
+
55
+ # Setup logging
56
+ logging.basicConfig(
57
+ level=logging.INFO,
58
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
59
+ )
60
+ logger = logging.getLogger(__name__)
61
+
62
+ MEMORY_DIR = Path.home() / ".claude-memory"
63
+ DB_PATH = MEMORY_DIR / "memory.db"
64
+
65
+
66
+ class EntityExtractor:
67
+ """Extract key entities/concepts from memory content using TF-IDF."""
68
+
69
+ def __init__(self, max_features: int = 20, min_df: int = 1):
70
+ """
71
+ Initialize entity extractor.
72
+
73
+ Args:
74
+ max_features: Top N keywords to extract per memory
75
+ min_df: Minimum document frequency (ignore very rare terms)
76
+ """
77
+ self.max_features = max_features
78
+ self.vectorizer = TfidfVectorizer(
79
+ max_features=max_features,
80
+ stop_words='english',
81
+ ngram_range=(1, 2), # Unigrams + bigrams
82
+ min_df=min_df,
83
+ lowercase=True,
84
+ token_pattern=r'(?u)\b[a-zA-Z][a-zA-Z0-9_-]*\b' # Alphanumeric tokens
85
+ )
86
+
87
+ def extract_entities(self, contents: List[str]) -> Tuple[List[List[str]], np.ndarray]:
88
+ """
89
+ Extract entities from multiple contents.
90
+
91
+ Args:
92
+ contents: List of memory content strings
93
+
94
+ Returns:
95
+ Tuple of (entities_per_content, tfidf_vectors)
96
+ """
97
+ if not contents:
98
+ return [], np.array([])
99
+
100
+ try:
101
+ # Fit and transform all contents
102
+ vectors = self.vectorizer.fit_transform(contents)
103
+ feature_names = self.vectorizer.get_feature_names_out()
104
+
105
+ # Extract top entities for each content
106
+ all_entities = []
107
+ for idx in range(len(contents)):
108
+ scores = vectors[idx].toarray()[0]
109
+
110
+ # Get indices of top features
111
+ top_indices = np.argsort(scores)[::-1]
112
+
113
+ # Extract entities with score > 0
114
+ entities = [
115
+ feature_names[i]
116
+ for i in top_indices
117
+ if scores[i] > 0.05 # Minimum threshold
118
+ ][:self.max_features]
119
+
120
+ all_entities.append(entities)
121
+
122
+ return all_entities, vectors.toarray()
123
+
124
+ except Exception as e:
125
+ logger.error(f"Entity extraction failed: {e}")
126
+ return [[] for _ in contents], np.zeros((len(contents), 1))
127
+
128
+
129
+ class EdgeBuilder:
130
+ """Build similarity edges between memories based on entity overlap."""
131
+
132
+ def __init__(self, db_path: Path, min_similarity: float = 0.3):
133
+ """
134
+ Initialize edge builder.
135
+
136
+ Args:
137
+ db_path: Path to SQLite database
138
+ min_similarity: Minimum cosine similarity to create edge
139
+ """
140
+ self.db_path = db_path
141
+ self.min_similarity = min_similarity
142
+
143
+ def build_edges(self, memory_ids: List[int], vectors: np.ndarray,
144
+ entities_list: List[List[str]]) -> int:
145
+ """
146
+ Build edges between similar memories.
147
+
148
+ Args:
149
+ memory_ids: List of memory IDs
150
+ vectors: TF-IDF vectors (n x features)
151
+ entities_list: List of entity lists per memory
152
+
153
+ Returns:
154
+ Number of edges created
155
+ """
156
+ if len(memory_ids) < 2:
157
+ logger.warning("Need at least 2 memories to build edges")
158
+ return 0
159
+
160
+ # Compute pairwise cosine similarity
161
+ similarity_matrix = cosine_similarity(vectors)
162
+
163
+ edges_added = 0
164
+ conn = sqlite3.connect(self.db_path)
165
+ cursor = conn.cursor()
166
+
167
+ try:
168
+ for i in range(len(memory_ids)):
169
+ for j in range(i + 1, len(memory_ids)):
170
+ sim = similarity_matrix[i, j]
171
+
172
+ if sim >= self.min_similarity:
173
+ # Find shared entities
174
+ entities_i = set(entities_list[i])
175
+ entities_j = set(entities_list[j])
176
+ shared = list(entities_i & entities_j)
177
+
178
+ # Classify relationship type
179
+ rel_type = self._classify_relationship(sim, shared)
180
+
181
+ # Insert edge (or update if exists)
182
+ cursor.execute('''
183
+ INSERT OR REPLACE INTO graph_edges
184
+ (source_memory_id, target_memory_id, relationship_type,
185
+ weight, shared_entities, similarity_score)
186
+ VALUES (?, ?, ?, ?, ?, ?)
187
+ ''', (
188
+ memory_ids[i],
189
+ memory_ids[j],
190
+ rel_type,
191
+ float(sim),
192
+ json.dumps(shared),
193
+ float(sim)
194
+ ))
195
+
196
+ edges_added += 1
197
+
198
+ conn.commit()
199
+ logger.info(f"Created {edges_added} edges")
200
+ return edges_added
201
+
202
+ except Exception as e:
203
+ logger.error(f"Edge building failed: {e}")
204
+ conn.rollback()
205
+ return 0
206
+ finally:
207
+ conn.close()
208
+
209
+ def _classify_relationship(self, similarity: float, shared_entities: List[str]) -> str:
210
+ """
211
+ Classify edge type based on similarity and shared entities.
212
+
213
+ Args:
214
+ similarity: Cosine similarity score
215
+ shared_entities: List of shared entity strings
216
+
217
+ Returns:
218
+ Relationship type: 'similar', 'depends_on', or 'related_to'
219
+ """
220
+ # Check for dependency keywords
221
+ dependency_keywords = {'dependency', 'require', 'import', 'use', 'need'}
222
+ has_dependency = any(
223
+ any(kw in entity.lower() for kw in dependency_keywords)
224
+ for entity in shared_entities
225
+ )
226
+
227
+ if similarity > 0.7:
228
+ return 'similar'
229
+ elif has_dependency:
230
+ return 'depends_on'
231
+ else:
232
+ return 'related_to'
233
+
234
+
235
+ class ClusterBuilder:
236
+ """Detect memory communities using Leiden algorithm."""
237
+
238
+ def __init__(self, db_path: Path):
239
+ """Initialize cluster builder."""
240
+ self.db_path = db_path
241
+
242
+ def detect_communities(self) -> int:
243
+ """
244
+ Run Leiden algorithm to find memory clusters.
245
+
246
+ Returns:
247
+ Number of clusters created
248
+ """
249
+ # Import igraph modules here to avoid conflicts
250
+ try:
251
+ import igraph as ig
252
+ import leidenalg
253
+ except ImportError:
254
+ raise ImportError("python-igraph and leidenalg required. Install: pip install python-igraph leidenalg")
255
+
256
+ conn = sqlite3.connect(self.db_path)
257
+ cursor = conn.cursor()
258
+
259
+ try:
260
+ # Load all edges
261
+ edges = cursor.execute('''
262
+ SELECT source_memory_id, target_memory_id, weight
263
+ FROM graph_edges
264
+ ''').fetchall()
265
+
266
+ if not edges:
267
+ logger.warning("No edges found - cannot build clusters")
268
+ return 0
269
+
270
+ # Build memory ID mapping
271
+ memory_ids = set()
272
+ for source, target, _ in edges:
273
+ memory_ids.add(source)
274
+ memory_ids.add(target)
275
+
276
+ memory_ids = sorted(list(memory_ids))
277
+ memory_id_to_vertex = {mid: idx for idx, mid in enumerate(memory_ids)}
278
+ vertex_to_memory_id = {idx: mid for mid, idx in memory_id_to_vertex.items()}
279
+
280
+ # Create igraph graph
281
+ g = ig.Graph()
282
+ g.add_vertices(len(memory_ids))
283
+
284
+ # Add edges with weights
285
+ edge_list = []
286
+ edge_weights = []
287
+
288
+ for source, target, weight in edges:
289
+ edge_list.append((
290
+ memory_id_to_vertex[source],
291
+ memory_id_to_vertex[target]
292
+ ))
293
+ edge_weights.append(weight)
294
+
295
+ g.add_edges(edge_list)
296
+
297
+ # Run Leiden algorithm
298
+ logger.info(f"Running Leiden on {len(memory_ids)} nodes, {len(edges)} edges")
299
+ partition = leidenalg.find_partition(
300
+ g,
301
+ leidenalg.ModularityVertexPartition,
302
+ weights=edge_weights,
303
+ n_iterations=100,
304
+ seed=42 # Reproducible
305
+ )
306
+
307
+ # Process communities
308
+ clusters_created = 0
309
+
310
+ for cluster_idx, community in enumerate(partition):
311
+ if len(community) < 2: # Skip singleton clusters
312
+ continue
313
+
314
+ # Get memory IDs in this cluster
315
+ cluster_memory_ids = [vertex_to_memory_id[v] for v in community]
316
+
317
+ # Calculate cluster stats
318
+ avg_importance = self._get_avg_importance(cursor, cluster_memory_ids)
319
+
320
+ # Auto-generate cluster name
321
+ cluster_name = self._generate_cluster_name(cursor, cluster_memory_ids)
322
+
323
+ # Insert cluster
324
+ result = cursor.execute('''
325
+ INSERT INTO graph_clusters (name, member_count, avg_importance)
326
+ VALUES (?, ?, ?)
327
+ ''', (cluster_name, len(cluster_memory_ids), avg_importance))
328
+
329
+ cluster_id = result.lastrowid
330
+
331
+ # Update memories with cluster_id
332
+ cursor.executemany('''
333
+ UPDATE memories SET cluster_id = ? WHERE id = ?
334
+ ''', [(cluster_id, mid) for mid in cluster_memory_ids])
335
+
336
+ clusters_created += 1
337
+ logger.info(f"Cluster {cluster_id}: '{cluster_name}' ({len(cluster_memory_ids)} members)")
338
+
339
+ conn.commit()
340
+ logger.info(f"Created {clusters_created} clusters")
341
+ return clusters_created
342
+
343
+ except Exception as e:
344
+ logger.error(f"Community detection failed: {e}")
345
+ conn.rollback()
346
+ return 0
347
+ finally:
348
+ conn.close()
349
+
350
+ def _get_avg_importance(self, cursor, memory_ids: List[int]) -> float:
351
+ """Calculate average importance for cluster."""
352
+ placeholders = ','.join('?' * len(memory_ids))
353
+ result = cursor.execute(f'''
354
+ SELECT AVG(importance) FROM memories WHERE id IN ({placeholders})
355
+ ''', memory_ids).fetchone()
356
+
357
+ return result[0] if result and result[0] else 5.0
358
+
359
+ def _generate_cluster_name(self, cursor, memory_ids: List[int]) -> str:
360
+ """Generate cluster name from member entities (TF-IDF approach)."""
361
+ # Get all entities from cluster members
362
+ placeholders = ','.join('?' * len(memory_ids))
363
+ nodes = cursor.execute(f'''
364
+ SELECT entities FROM graph_nodes WHERE memory_id IN ({placeholders})
365
+ ''', memory_ids).fetchall()
366
+
367
+ all_entities = []
368
+ for node in nodes:
369
+ if node[0]:
370
+ all_entities.extend(json.loads(node[0]))
371
+
372
+ if not all_entities:
373
+ return f"Cluster (ID auto-assigned)"
374
+
375
+ # Count entity frequencies
376
+ entity_counts = Counter(all_entities)
377
+
378
+ # Top 2-3 most common entities
379
+ top_entities = [e for e, _ in entity_counts.most_common(3)]
380
+
381
+ # Build name
382
+ if len(top_entities) >= 2:
383
+ name = f"{top_entities[0].title()} & {top_entities[1].title()}"
384
+ elif len(top_entities) == 1:
385
+ name = f"{top_entities[0].title()} Contexts"
386
+ else:
387
+ name = "Mixed Contexts"
388
+
389
+ return name[:100] # Limit length
390
+
391
+
392
+ class ClusterNamer:
393
+ """Enhanced cluster naming with optional LLM support (future)."""
394
+
395
+ @staticmethod
396
+ def generate_name_tfidf(entities: List[str]) -> str:
397
+ """Generate name from entity list (TF-IDF fallback)."""
398
+ if not entities:
399
+ return "Unnamed Cluster"
400
+
401
+ entity_counts = Counter(entities)
402
+ top_entities = [e for e, _ in entity_counts.most_common(2)]
403
+
404
+ if len(top_entities) >= 2:
405
+ return f"{top_entities[0].title()} & {top_entities[1].title()}"
406
+ else:
407
+ return f"{top_entities[0].title()} Contexts"
408
+
409
+
410
+ class GraphEngine:
411
+ """Main graph engine coordinating all graph operations."""
412
+
413
+ def __init__(self, db_path: Path = DB_PATH):
414
+ """Initialize graph engine."""
415
+ self.db_path = db_path
416
+ self.entity_extractor = EntityExtractor(max_features=20)
417
+ self.edge_builder = EdgeBuilder(db_path)
418
+ self.cluster_builder = ClusterBuilder(db_path)
419
+ self._ensure_graph_tables()
420
+
421
+ def _ensure_graph_tables(self):
422
+ """Create graph tables if they don't exist."""
423
+ conn = sqlite3.connect(self.db_path)
424
+ cursor = conn.cursor()
425
+
426
+ # Graph nodes table
427
+ cursor.execute('''
428
+ CREATE TABLE IF NOT EXISTS graph_nodes (
429
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
430
+ memory_id INTEGER UNIQUE NOT NULL,
431
+ entities TEXT,
432
+ embedding_vector TEXT,
433
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
434
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
435
+ )
436
+ ''')
437
+
438
+ # Graph edges table
439
+ cursor.execute('''
440
+ CREATE TABLE IF NOT EXISTS graph_edges (
441
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
442
+ source_memory_id INTEGER NOT NULL,
443
+ target_memory_id INTEGER NOT NULL,
444
+ relationship_type TEXT,
445
+ weight REAL DEFAULT 1.0,
446
+ shared_entities TEXT,
447
+ similarity_score REAL,
448
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
449
+ FOREIGN KEY (source_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
450
+ FOREIGN KEY (target_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
451
+ UNIQUE(source_memory_id, target_memory_id)
452
+ )
453
+ ''')
454
+
455
+ # Graph clusters table
456
+ cursor.execute('''
457
+ CREATE TABLE IF NOT EXISTS graph_clusters (
458
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
459
+ name TEXT NOT NULL,
460
+ description TEXT,
461
+ member_count INTEGER DEFAULT 0,
462
+ avg_importance REAL,
463
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
464
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
465
+ )
466
+ ''')
467
+
468
+ # Add cluster_id to memories if not exists
469
+ try:
470
+ cursor.execute('ALTER TABLE memories ADD COLUMN cluster_id INTEGER')
471
+ except sqlite3.OperationalError:
472
+ pass # Column already exists
473
+
474
+ # Create indexes
475
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_source ON graph_edges(source_memory_id)')
476
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_target ON graph_edges(target_memory_id)')
477
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_cluster_members ON memories(cluster_id)')
478
+
479
+ conn.commit()
480
+ conn.close()
481
+ logger.info("Graph tables initialized")
482
+
483
+ def build_graph(self, min_similarity: float = 0.3) -> Dict[str, any]:
484
+ """
485
+ Build complete knowledge graph from all memories.
486
+
487
+ Args:
488
+ min_similarity: Minimum cosine similarity for edge creation
489
+
490
+ Returns:
491
+ Dictionary with build statistics
492
+
493
+ Raises:
494
+ ValueError: If too many memories (>5000) for safe processing
495
+ """
496
+ start_time = time.time()
497
+ logger.info("Starting full graph build...")
498
+
499
+ conn = sqlite3.connect(self.db_path)
500
+ cursor = conn.cursor()
501
+
502
+ try:
503
+ # First check if required tables exist
504
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
505
+ existing_tables = {row[0] for row in cursor.fetchall()}
506
+
507
+ required_tables = {'memories', 'graph_edges', 'graph_nodes', 'graph_clusters'}
508
+ missing_tables = required_tables - existing_tables
509
+
510
+ if missing_tables:
511
+ logger.error(f"Missing required tables: {missing_tables}")
512
+ return {
513
+ 'success': False,
514
+ 'error': 'database_not_initialized',
515
+ 'message': f"Database not initialized. Missing tables: {', '.join(missing_tables)}",
516
+ 'fix': "Run 'superlocalmemoryv2:status' first to initialize the database, or add some memories."
517
+ }
518
+
519
+ # Load all memories
520
+ memories = cursor.execute('''
521
+ SELECT id, content, summary FROM memories
522
+ ORDER BY id
523
+ ''').fetchall()
524
+
525
+ if len(memories) == 0:
526
+ logger.warning("No memories found")
527
+ return {
528
+ 'success': False,
529
+ 'error': 'no_memories',
530
+ 'message': 'No memories found in database.',
531
+ 'fix': "Add some memories first: superlocalmemoryv2:remember 'Your content here'"
532
+ }
533
+
534
+ if len(memories) < 2:
535
+ logger.warning("Need at least 2 memories to build graph")
536
+ return {
537
+ 'success': False,
538
+ 'error': 'insufficient_memories',
539
+ 'message': 'Need at least 2 memories to build knowledge graph.',
540
+ 'memories': len(memories),
541
+ 'fix': "Add more memories: superlocalmemoryv2:remember 'Your content here'"
542
+ }
543
+
544
+ # SECURITY: Prevent O(n²) explosion for large datasets
545
+ if len(memories) > MAX_MEMORIES_FOR_GRAPH:
546
+ logger.error(f"Too many memories for graph build: {len(memories)}")
547
+ return {
548
+ 'success': False,
549
+ 'error': 'too_many_memories',
550
+ 'message': f"Graph build limited to {MAX_MEMORIES_FOR_GRAPH} memories for performance.",
551
+ 'memories': len(memories),
552
+ 'limit': MAX_MEMORIES_FOR_GRAPH,
553
+ 'fix': "Use incremental updates or reduce memory count with compression."
554
+ }
555
+
556
+ # Clear existing graph data
557
+ cursor.execute('DELETE FROM graph_edges')
558
+ cursor.execute('DELETE FROM graph_nodes')
559
+ cursor.execute('DELETE FROM graph_clusters')
560
+ cursor.execute('UPDATE memories SET cluster_id = NULL')
561
+ conn.commit()
562
+
563
+ logger.info(f"Processing {len(memories)} memories")
564
+
565
+ # Extract entities and vectors
566
+ memory_ids = [m[0] for m in memories]
567
+ contents = [f"{m[1]} {m[2] or ''}" for m in memories] # Combine content + summary
568
+
569
+ entities_list, vectors = self.entity_extractor.extract_entities(contents)
570
+
571
+ # Store nodes
572
+ for memory_id, entities, vector in zip(memory_ids, entities_list, vectors):
573
+ cursor.execute('''
574
+ INSERT INTO graph_nodes (memory_id, entities, embedding_vector)
575
+ VALUES (?, ?, ?)
576
+ ''', (
577
+ memory_id,
578
+ json.dumps(entities),
579
+ json.dumps(vector.tolist())
580
+ ))
581
+
582
+ conn.commit()
583
+ logger.info(f"Stored {len(memory_ids)} graph nodes")
584
+
585
+ # Build edges
586
+ edges_count = self.edge_builder.build_edges(
587
+ memory_ids, vectors, entities_list
588
+ )
589
+
590
+ # Detect communities
591
+ clusters_count = self.cluster_builder.detect_communities()
592
+
593
+ elapsed = time.time() - start_time
594
+
595
+ stats = {
596
+ 'success': True,
597
+ 'memories': len(memories),
598
+ 'nodes': len(memory_ids),
599
+ 'edges': edges_count,
600
+ 'clusters': clusters_count,
601
+ 'time_seconds': round(elapsed, 2)
602
+ }
603
+
604
+ logger.info(f"Graph build complete: {stats}")
605
+ return stats
606
+
607
+ except Exception as e:
608
+ logger.error(f"Graph build failed: {e}")
609
+ conn.rollback()
610
+ return {
611
+ 'success': False,
612
+ 'error': str(e)
613
+ }
614
+ finally:
615
+ conn.close()
616
+
617
+ def extract_entities(self, memory_id: int) -> List[str]:
618
+ """
619
+ Extract entities for a single memory.
620
+
621
+ Args:
622
+ memory_id: Memory ID
623
+
624
+ Returns:
625
+ List of entity strings
626
+ """
627
+ conn = sqlite3.connect(self.db_path)
628
+ cursor = conn.cursor()
629
+
630
+ try:
631
+ # Get memory content
632
+ memory = cursor.execute('''
633
+ SELECT content, summary FROM memories WHERE id = ?
634
+ ''', (memory_id,)).fetchone()
635
+
636
+ if not memory:
637
+ return []
638
+
639
+ content = f"{memory[0]} {memory[1] or ''}"
640
+ entities_list, _ = self.entity_extractor.extract_entities([content])
641
+
642
+ return entities_list[0] if entities_list else []
643
+
644
+ finally:
645
+ conn.close()
646
+
647
+ def get_related(self, memory_id: int, max_hops: int = 2) -> List[Dict]:
648
+ """
649
+ Get memories connected to this memory via graph edges.
650
+
651
+ Args:
652
+ memory_id: Source memory ID
653
+ max_hops: Maximum traversal depth (1 or 2)
654
+
655
+ Returns:
656
+ List of related memory dictionaries
657
+ """
658
+ conn = sqlite3.connect(self.db_path)
659
+ cursor = conn.cursor()
660
+
661
+ try:
662
+ # Get 1-hop neighbors
663
+ edges = cursor.execute('''
664
+ SELECT target_memory_id, relationship_type, weight, shared_entities
665
+ FROM graph_edges
666
+ WHERE source_memory_id = ?
667
+ UNION
668
+ SELECT source_memory_id, relationship_type, weight, shared_entities
669
+ FROM graph_edges
670
+ WHERE target_memory_id = ?
671
+ ''', (memory_id, memory_id)).fetchall()
672
+
673
+ results = []
674
+ seen_ids = {memory_id}
675
+
676
+ for target_id, rel_type, weight, shared_entities in edges:
677
+ if target_id in seen_ids:
678
+ continue
679
+
680
+ seen_ids.add(target_id)
681
+
682
+ # Get memory details
683
+ memory = cursor.execute('''
684
+ SELECT id, summary, importance, tags
685
+ FROM memories WHERE id = ?
686
+ ''', (target_id,)).fetchone()
687
+
688
+ if memory:
689
+ results.append({
690
+ 'id': memory[0],
691
+ 'summary': memory[1],
692
+ 'importance': memory[2],
693
+ 'tags': json.loads(memory[3]) if memory[3] else [],
694
+ 'relationship': rel_type,
695
+ 'weight': weight,
696
+ 'shared_entities': json.loads(shared_entities) if shared_entities else [],
697
+ 'hops': 1
698
+ })
699
+
700
+ # If max_hops == 2, get 2-hop neighbors
701
+ if max_hops >= 2:
702
+ for result in results[:]: # Copy to avoid modification during iteration
703
+ second_hop = cursor.execute('''
704
+ SELECT target_memory_id, relationship_type, weight
705
+ FROM graph_edges
706
+ WHERE source_memory_id = ?
707
+ UNION
708
+ SELECT source_memory_id, relationship_type, weight
709
+ FROM graph_edges
710
+ WHERE target_memory_id = ?
711
+ ''', (result['id'], result['id'])).fetchall()
712
+
713
+ for target_id, rel_type, weight in second_hop:
714
+ if target_id in seen_ids:
715
+ continue
716
+
717
+ seen_ids.add(target_id)
718
+
719
+ memory = cursor.execute('''
720
+ SELECT id, summary, importance, tags
721
+ FROM memories WHERE id = ?
722
+ ''', (target_id,)).fetchone()
723
+
724
+ if memory:
725
+ results.append({
726
+ 'id': memory[0],
727
+ 'summary': memory[1],
728
+ 'importance': memory[2],
729
+ 'tags': json.loads(memory[3]) if memory[3] else [],
730
+ 'relationship': rel_type,
731
+ 'weight': weight,
732
+ 'shared_entities': [],
733
+ 'hops': 2
734
+ })
735
+
736
+ # Sort by weight (strongest connections first)
737
+ results.sort(key=lambda x: (-x['hops'], -x['weight']))
738
+
739
+ return results
740
+
741
+ finally:
742
+ conn.close()
743
+
744
+ def get_cluster_members(self, cluster_id: int) -> List[Dict]:
745
+ """
746
+ Get all memories in a cluster.
747
+
748
+ Args:
749
+ cluster_id: Cluster ID
750
+
751
+ Returns:
752
+ List of memory dictionaries
753
+ """
754
+ conn = sqlite3.connect(self.db_path)
755
+ cursor = conn.cursor()
756
+
757
+ try:
758
+ memories = cursor.execute('''
759
+ SELECT id, summary, importance, tags, created_at
760
+ FROM memories
761
+ WHERE cluster_id = ?
762
+ ORDER BY importance DESC
763
+ ''', (cluster_id,)).fetchall()
764
+
765
+ return [
766
+ {
767
+ 'id': m[0],
768
+ 'summary': m[1],
769
+ 'importance': m[2],
770
+ 'tags': json.loads(m[3]) if m[3] else [],
771
+ 'created_at': m[4]
772
+ }
773
+ for m in memories
774
+ ]
775
+
776
+ finally:
777
+ conn.close()
778
+
779
+ def add_memory_incremental(self, memory_id: int) -> bool:
780
+ """
781
+ Add single memory to existing graph (incremental update).
782
+
783
+ Args:
784
+ memory_id: New memory ID to add
785
+
786
+ Returns:
787
+ Success status
788
+ """
789
+ conn = sqlite3.connect(self.db_path)
790
+ cursor = conn.cursor()
791
+
792
+ try:
793
+ # Get new memory content
794
+ memory = cursor.execute('''
795
+ SELECT content, summary FROM memories WHERE id = ?
796
+ ''', (memory_id,)).fetchone()
797
+
798
+ if not memory:
799
+ return False
800
+
801
+ # Extract entities for new memory
802
+ content = f"{memory[0]} {memory[1] or ''}"
803
+ entities_list, vector = self.entity_extractor.extract_entities([content])
804
+
805
+ if not entities_list:
806
+ return False
807
+
808
+ new_entities = entities_list[0]
809
+ new_vector = vector[0]
810
+
811
+ # Store node
812
+ cursor.execute('''
813
+ INSERT OR REPLACE INTO graph_nodes (memory_id, entities, embedding_vector)
814
+ VALUES (?, ?, ?)
815
+ ''', (memory_id, json.dumps(new_entities), json.dumps(new_vector.tolist())))
816
+
817
+ # Compare to existing memories
818
+ existing = cursor.execute('''
819
+ SELECT memory_id, embedding_vector, entities
820
+ FROM graph_nodes
821
+ WHERE memory_id != ?
822
+ ''', (memory_id,)).fetchall()
823
+
824
+ edges_added = 0
825
+
826
+ for existing_id, existing_vector_json, existing_entities_json in existing:
827
+ existing_vector = np.array(json.loads(existing_vector_json))
828
+
829
+ # Compute similarity
830
+ sim = cosine_similarity([new_vector], [existing_vector])[0][0]
831
+
832
+ if sim >= self.edge_builder.min_similarity:
833
+ # Find shared entities
834
+ existing_entities = json.loads(existing_entities_json)
835
+ shared = list(set(new_entities) & set(existing_entities))
836
+
837
+ # Classify relationship
838
+ rel_type = self.edge_builder._classify_relationship(sim, shared)
839
+
840
+ # Insert edge
841
+ cursor.execute('''
842
+ INSERT OR REPLACE INTO graph_edges
843
+ (source_memory_id, target_memory_id, relationship_type,
844
+ weight, shared_entities, similarity_score)
845
+ VALUES (?, ?, ?, ?, ?, ?)
846
+ ''', (
847
+ memory_id,
848
+ existing_id,
849
+ rel_type,
850
+ float(sim),
851
+ json.dumps(shared),
852
+ float(sim)
853
+ ))
854
+
855
+ edges_added += 1
856
+
857
+ conn.commit()
858
+ logger.info(f"Added memory {memory_id} to graph with {edges_added} edges")
859
+
860
+ # Optionally re-cluster if significant change
861
+ if edges_added > 5:
862
+ logger.info("Significant graph change - consider re-clustering")
863
+
864
+ return True
865
+
866
+ except Exception as e:
867
+ logger.error(f"Incremental add failed: {e}")
868
+ conn.rollback()
869
+ return False
870
+ finally:
871
+ conn.close()
872
+
873
+ def get_stats(self) -> Dict[str, any]:
874
+ """Get graph statistics."""
875
+ conn = sqlite3.connect(self.db_path)
876
+ cursor = conn.cursor()
877
+
878
+ try:
879
+ nodes = cursor.execute('SELECT COUNT(*) FROM graph_nodes').fetchone()[0]
880
+ edges = cursor.execute('SELECT COUNT(*) FROM graph_edges').fetchone()[0]
881
+ clusters = cursor.execute('SELECT COUNT(*) FROM graph_clusters').fetchone()[0]
882
+
883
+ # Cluster breakdown
884
+ cluster_info = cursor.execute('''
885
+ SELECT cluster_name, memory_count, avg_importance
886
+ FROM graph_clusters
887
+ ORDER BY memory_count DESC
888
+ LIMIT 10
889
+ ''').fetchall()
890
+
891
+ return {
892
+ 'nodes': nodes,
893
+ 'edges': edges,
894
+ 'clusters': clusters,
895
+ 'top_clusters': [
896
+ {
897
+ 'name': c[0],
898
+ 'members': c[1],
899
+ 'avg_importance': round(c[2], 1)
900
+ }
901
+ for c in cluster_info
902
+ ]
903
+ }
904
+
905
+ finally:
906
+ conn.close()
907
+
908
+
909
+ def main():
910
+ """CLI interface for manual graph operations."""
911
+ import argparse
912
+
913
+ parser = argparse.ArgumentParser(description='GraphEngine - Knowledge Graph Management')
914
+ parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster'],
915
+ help='Command to execute')
916
+ parser.add_argument('--memory-id', type=int, help='Memory ID for related/add commands')
917
+ parser.add_argument('--cluster-id', type=int, help='Cluster ID for cluster command')
918
+ parser.add_argument('--min-similarity', type=float, default=0.3,
919
+ help='Minimum similarity for edges (default: 0.3)')
920
+ parser.add_argument('--hops', type=int, default=2, help='Max hops for related (default: 2)')
921
+
922
+ args = parser.parse_args()
923
+
924
+ engine = GraphEngine()
925
+
926
+ if args.command == 'build':
927
+ print("Building knowledge graph...")
928
+ stats = engine.build_graph(min_similarity=args.min_similarity)
929
+ print(json.dumps(stats, indent=2))
930
+
931
+ elif args.command == 'stats':
932
+ print("Graph Statistics:")
933
+ stats = engine.get_stats()
934
+ print(json.dumps(stats, indent=2))
935
+
936
+ elif args.command == 'related':
937
+ if not args.memory_id:
938
+ print("Error: --memory-id required for 'related' command")
939
+ return
940
+
941
+ print(f"Finding memories related to #{args.memory_id}...")
942
+ related = engine.get_related(args.memory_id, max_hops=args.hops)
943
+
944
+ if not related:
945
+ print("No related memories found")
946
+ else:
947
+ for idx, mem in enumerate(related, 1):
948
+ print(f"\n{idx}. Memory #{mem['id']} ({mem['hops']}-hop, weight={mem['weight']:.3f})")
949
+ print(f" Relationship: {mem['relationship']}")
950
+ summary = mem['summary'] or '[No summary]'
951
+ print(f" Summary: {summary[:100]}...")
952
+ if mem['shared_entities']:
953
+ print(f" Shared: {', '.join(mem['shared_entities'][:5])}")
954
+
955
+ elif args.command == 'cluster':
956
+ if not args.cluster_id:
957
+ print("Error: --cluster-id required for 'cluster' command")
958
+ return
959
+
960
+ print(f"Cluster #{args.cluster_id} members:")
961
+ members = engine.get_cluster_members(args.cluster_id)
962
+
963
+ for idx, mem in enumerate(members, 1):
964
+ print(f"\n{idx}. Memory #{mem['id']} (importance={mem['importance']})")
965
+ summary = mem['summary'] or '[No summary]'
966
+ print(f" {summary[:100]}...")
967
+
968
+
969
+ if __name__ == '__main__':
970
+ main()