superlocalmemory 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +140 -0
- package/CHANGELOG.md +1749 -0
- package/LICENSE +21 -0
- package/README.md +600 -0
- package/bin/aider-smart +72 -0
- package/bin/slm +202 -0
- package/bin/slm-npm +73 -0
- package/bin/slm.bat +195 -0
- package/bin/slm.cmd +10 -0
- package/bin/superlocalmemoryv2:list +3 -0
- package/bin/superlocalmemoryv2:profile +3 -0
- package/bin/superlocalmemoryv2:recall +3 -0
- package/bin/superlocalmemoryv2:remember +3 -0
- package/bin/superlocalmemoryv2:reset +3 -0
- package/bin/superlocalmemoryv2:status +3 -0
- package/completions/slm.bash +58 -0
- package/completions/slm.zsh +76 -0
- package/configs/antigravity-mcp.json +13 -0
- package/configs/chatgpt-desktop-mcp.json +7 -0
- package/configs/claude-desktop-mcp.json +15 -0
- package/configs/codex-mcp.toml +13 -0
- package/configs/cody-commands.json +29 -0
- package/configs/continue-mcp.yaml +14 -0
- package/configs/continue-skills.yaml +26 -0
- package/configs/cursor-mcp.json +15 -0
- package/configs/gemini-cli-mcp.json +11 -0
- package/configs/jetbrains-mcp.json +11 -0
- package/configs/opencode-mcp.json +12 -0
- package/configs/perplexity-mcp.json +9 -0
- package/configs/vscode-copilot-mcp.json +12 -0
- package/configs/windsurf-mcp.json +16 -0
- package/configs/zed-mcp.json +12 -0
- package/docs/ARCHITECTURE.md +877 -0
- package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
- package/docs/COMPETITIVE-ANALYSIS.md +210 -0
- package/docs/COMPRESSION-README.md +390 -0
- package/docs/GRAPH-ENGINE.md +503 -0
- package/docs/MCP-MANUAL-SETUP.md +720 -0
- package/docs/MCP-TROUBLESHOOTING.md +787 -0
- package/docs/PATTERN-LEARNING.md +363 -0
- package/docs/PROFILES-GUIDE.md +453 -0
- package/docs/RESET-GUIDE.md +353 -0
- package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
- package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
- package/docs/UI-SERVER.md +254 -0
- package/docs/UNIVERSAL-INTEGRATION.md +432 -0
- package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
- package/docs/WINDOWS-INSTALL-README.txt +34 -0
- package/docs/WINDOWS-POST-INSTALL.txt +45 -0
- package/docs/example_graph_usage.py +148 -0
- package/hooks/memory-list-skill.js +130 -0
- package/hooks/memory-profile-skill.js +284 -0
- package/hooks/memory-recall-skill.js +109 -0
- package/hooks/memory-remember-skill.js +127 -0
- package/hooks/memory-reset-skill.js +274 -0
- package/install-skills.sh +436 -0
- package/install.ps1 +417 -0
- package/install.sh +755 -0
- package/mcp_server.py +585 -0
- package/package.json +94 -0
- package/requirements-core.txt +24 -0
- package/requirements.txt +10 -0
- package/scripts/postinstall.js +126 -0
- package/scripts/preuninstall.js +57 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +325 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
- package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
- package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
- package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
- package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
- package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
- package/src/cache_manager.py +520 -0
- package/src/embedding_engine.py +671 -0
- package/src/graph_engine.py +970 -0
- package/src/hnsw_index.py +626 -0
- package/src/hybrid_search.py +693 -0
- package/src/memory-profiles.py +518 -0
- package/src/memory-reset.py +485 -0
- package/src/memory_compression.py +999 -0
- package/src/memory_store_v2.py +1088 -0
- package/src/migrate_v1_to_v2.py +638 -0
- package/src/pattern_learner.py +898 -0
- package/src/query_optimizer.py +513 -0
- package/src/search_engine_v2.py +403 -0
- package/src/setup_validator.py +479 -0
- package/src/tree_manager.py +720 -0
|
@@ -0,0 +1,970 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
GraphEngine - Knowledge Graph Clustering for SuperLocalMemory V2
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
6
|
+
Licensed under MIT License
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
|
|
9
|
+
Implements GraphRAG with Leiden community detection to:
|
|
10
|
+
- Extract entities from memories (TF-IDF keyword extraction)
|
|
11
|
+
- Build similarity-based edges between memories
|
|
12
|
+
- Detect thematic clusters using Leiden algorithm
|
|
13
|
+
- Enable graph traversal for related memory discovery
|
|
14
|
+
|
|
15
|
+
All processing is local - no external APIs.
|
|
16
|
+
|
|
17
|
+
LIMITS:
|
|
18
|
+
- MAX_MEMORIES_FOR_GRAPH: 5000 (prevents O(n²) explosion)
|
|
19
|
+
- For larger datasets, use incremental updates
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
# SECURITY: Graph build limits to prevent resource exhaustion
|
|
23
|
+
MAX_MEMORIES_FOR_GRAPH = 5000
|
|
24
|
+
|
|
25
|
+
import sqlite3
|
|
26
|
+
import json
|
|
27
|
+
import time
|
|
28
|
+
import logging
|
|
29
|
+
from datetime import datetime
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import List, Dict, Optional, Tuple, Set
|
|
32
|
+
from collections import Counter
|
|
33
|
+
|
|
34
|
+
# Core dependencies
|
|
35
|
+
try:
|
|
36
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
37
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
38
|
+
import numpy as np
|
|
39
|
+
SKLEARN_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
SKLEARN_AVAILABLE = False
|
|
42
|
+
raise ImportError("scikit-learn is required. Install: pip install scikit-learn")
|
|
43
|
+
|
|
44
|
+
# Graph dependencies - lazy import to avoid conflicts with compression module
|
|
45
|
+
IGRAPH_AVAILABLE = False
|
|
46
|
+
try:
|
|
47
|
+
# Import only when needed to avoid module conflicts
|
|
48
|
+
import importlib
|
|
49
|
+
ig_module = importlib.import_module('igraph')
|
|
50
|
+
leiden_module = importlib.import_module('leidenalg')
|
|
51
|
+
IGRAPH_AVAILABLE = True
|
|
52
|
+
except ImportError:
|
|
53
|
+
pass # Will raise error when building clusters if not available
|
|
54
|
+
|
|
55
|
+
# Setup logging
|
|
56
|
+
logging.basicConfig(
|
|
57
|
+
level=logging.INFO,
|
|
58
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
59
|
+
)
|
|
60
|
+
logger = logging.getLogger(__name__)
|
|
61
|
+
|
|
62
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
63
|
+
DB_PATH = MEMORY_DIR / "memory.db"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class EntityExtractor:
|
|
67
|
+
"""Extract key entities/concepts from memory content using TF-IDF."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, max_features: int = 20, min_df: int = 1):
|
|
70
|
+
"""
|
|
71
|
+
Initialize entity extractor.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
max_features: Top N keywords to extract per memory
|
|
75
|
+
min_df: Minimum document frequency (ignore very rare terms)
|
|
76
|
+
"""
|
|
77
|
+
self.max_features = max_features
|
|
78
|
+
self.vectorizer = TfidfVectorizer(
|
|
79
|
+
max_features=max_features,
|
|
80
|
+
stop_words='english',
|
|
81
|
+
ngram_range=(1, 2), # Unigrams + bigrams
|
|
82
|
+
min_df=min_df,
|
|
83
|
+
lowercase=True,
|
|
84
|
+
token_pattern=r'(?u)\b[a-zA-Z][a-zA-Z0-9_-]*\b' # Alphanumeric tokens
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def extract_entities(self, contents: List[str]) -> Tuple[List[List[str]], np.ndarray]:
|
|
88
|
+
"""
|
|
89
|
+
Extract entities from multiple contents.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
contents: List of memory content strings
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Tuple of (entities_per_content, tfidf_vectors)
|
|
96
|
+
"""
|
|
97
|
+
if not contents:
|
|
98
|
+
return [], np.array([])
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Fit and transform all contents
|
|
102
|
+
vectors = self.vectorizer.fit_transform(contents)
|
|
103
|
+
feature_names = self.vectorizer.get_feature_names_out()
|
|
104
|
+
|
|
105
|
+
# Extract top entities for each content
|
|
106
|
+
all_entities = []
|
|
107
|
+
for idx in range(len(contents)):
|
|
108
|
+
scores = vectors[idx].toarray()[0]
|
|
109
|
+
|
|
110
|
+
# Get indices of top features
|
|
111
|
+
top_indices = np.argsort(scores)[::-1]
|
|
112
|
+
|
|
113
|
+
# Extract entities with score > 0
|
|
114
|
+
entities = [
|
|
115
|
+
feature_names[i]
|
|
116
|
+
for i in top_indices
|
|
117
|
+
if scores[i] > 0.05 # Minimum threshold
|
|
118
|
+
][:self.max_features]
|
|
119
|
+
|
|
120
|
+
all_entities.append(entities)
|
|
121
|
+
|
|
122
|
+
return all_entities, vectors.toarray()
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"Entity extraction failed: {e}")
|
|
126
|
+
return [[] for _ in contents], np.zeros((len(contents), 1))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class EdgeBuilder:
|
|
130
|
+
"""Build similarity edges between memories based on entity overlap."""
|
|
131
|
+
|
|
132
|
+
def __init__(self, db_path: Path, min_similarity: float = 0.3):
|
|
133
|
+
"""
|
|
134
|
+
Initialize edge builder.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
db_path: Path to SQLite database
|
|
138
|
+
min_similarity: Minimum cosine similarity to create edge
|
|
139
|
+
"""
|
|
140
|
+
self.db_path = db_path
|
|
141
|
+
self.min_similarity = min_similarity
|
|
142
|
+
|
|
143
|
+
def build_edges(self, memory_ids: List[int], vectors: np.ndarray,
|
|
144
|
+
entities_list: List[List[str]]) -> int:
|
|
145
|
+
"""
|
|
146
|
+
Build edges between similar memories.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
memory_ids: List of memory IDs
|
|
150
|
+
vectors: TF-IDF vectors (n x features)
|
|
151
|
+
entities_list: List of entity lists per memory
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Number of edges created
|
|
155
|
+
"""
|
|
156
|
+
if len(memory_ids) < 2:
|
|
157
|
+
logger.warning("Need at least 2 memories to build edges")
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
# Compute pairwise cosine similarity
|
|
161
|
+
similarity_matrix = cosine_similarity(vectors)
|
|
162
|
+
|
|
163
|
+
edges_added = 0
|
|
164
|
+
conn = sqlite3.connect(self.db_path)
|
|
165
|
+
cursor = conn.cursor()
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
for i in range(len(memory_ids)):
|
|
169
|
+
for j in range(i + 1, len(memory_ids)):
|
|
170
|
+
sim = similarity_matrix[i, j]
|
|
171
|
+
|
|
172
|
+
if sim >= self.min_similarity:
|
|
173
|
+
# Find shared entities
|
|
174
|
+
entities_i = set(entities_list[i])
|
|
175
|
+
entities_j = set(entities_list[j])
|
|
176
|
+
shared = list(entities_i & entities_j)
|
|
177
|
+
|
|
178
|
+
# Classify relationship type
|
|
179
|
+
rel_type = self._classify_relationship(sim, shared)
|
|
180
|
+
|
|
181
|
+
# Insert edge (or update if exists)
|
|
182
|
+
cursor.execute('''
|
|
183
|
+
INSERT OR REPLACE INTO graph_edges
|
|
184
|
+
(source_memory_id, target_memory_id, relationship_type,
|
|
185
|
+
weight, shared_entities, similarity_score)
|
|
186
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
187
|
+
''', (
|
|
188
|
+
memory_ids[i],
|
|
189
|
+
memory_ids[j],
|
|
190
|
+
rel_type,
|
|
191
|
+
float(sim),
|
|
192
|
+
json.dumps(shared),
|
|
193
|
+
float(sim)
|
|
194
|
+
))
|
|
195
|
+
|
|
196
|
+
edges_added += 1
|
|
197
|
+
|
|
198
|
+
conn.commit()
|
|
199
|
+
logger.info(f"Created {edges_added} edges")
|
|
200
|
+
return edges_added
|
|
201
|
+
|
|
202
|
+
except Exception as e:
|
|
203
|
+
logger.error(f"Edge building failed: {e}")
|
|
204
|
+
conn.rollback()
|
|
205
|
+
return 0
|
|
206
|
+
finally:
|
|
207
|
+
conn.close()
|
|
208
|
+
|
|
209
|
+
def _classify_relationship(self, similarity: float, shared_entities: List[str]) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Classify edge type based on similarity and shared entities.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
similarity: Cosine similarity score
|
|
215
|
+
shared_entities: List of shared entity strings
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Relationship type: 'similar', 'depends_on', or 'related_to'
|
|
219
|
+
"""
|
|
220
|
+
# Check for dependency keywords
|
|
221
|
+
dependency_keywords = {'dependency', 'require', 'import', 'use', 'need'}
|
|
222
|
+
has_dependency = any(
|
|
223
|
+
any(kw in entity.lower() for kw in dependency_keywords)
|
|
224
|
+
for entity in shared_entities
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
if similarity > 0.7:
|
|
228
|
+
return 'similar'
|
|
229
|
+
elif has_dependency:
|
|
230
|
+
return 'depends_on'
|
|
231
|
+
else:
|
|
232
|
+
return 'related_to'
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class ClusterBuilder:
|
|
236
|
+
"""Detect memory communities using Leiden algorithm."""
|
|
237
|
+
|
|
238
|
+
def __init__(self, db_path: Path):
|
|
239
|
+
"""Initialize cluster builder."""
|
|
240
|
+
self.db_path = db_path
|
|
241
|
+
|
|
242
|
+
def detect_communities(self) -> int:
|
|
243
|
+
"""
|
|
244
|
+
Run Leiden algorithm to find memory clusters.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Number of clusters created
|
|
248
|
+
"""
|
|
249
|
+
# Import igraph modules here to avoid conflicts
|
|
250
|
+
try:
|
|
251
|
+
import igraph as ig
|
|
252
|
+
import leidenalg
|
|
253
|
+
except ImportError:
|
|
254
|
+
raise ImportError("python-igraph and leidenalg required. Install: pip install python-igraph leidenalg")
|
|
255
|
+
|
|
256
|
+
conn = sqlite3.connect(self.db_path)
|
|
257
|
+
cursor = conn.cursor()
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
# Load all edges
|
|
261
|
+
edges = cursor.execute('''
|
|
262
|
+
SELECT source_memory_id, target_memory_id, weight
|
|
263
|
+
FROM graph_edges
|
|
264
|
+
''').fetchall()
|
|
265
|
+
|
|
266
|
+
if not edges:
|
|
267
|
+
logger.warning("No edges found - cannot build clusters")
|
|
268
|
+
return 0
|
|
269
|
+
|
|
270
|
+
# Build memory ID mapping
|
|
271
|
+
memory_ids = set()
|
|
272
|
+
for source, target, _ in edges:
|
|
273
|
+
memory_ids.add(source)
|
|
274
|
+
memory_ids.add(target)
|
|
275
|
+
|
|
276
|
+
memory_ids = sorted(list(memory_ids))
|
|
277
|
+
memory_id_to_vertex = {mid: idx for idx, mid in enumerate(memory_ids)}
|
|
278
|
+
vertex_to_memory_id = {idx: mid for mid, idx in memory_id_to_vertex.items()}
|
|
279
|
+
|
|
280
|
+
# Create igraph graph
|
|
281
|
+
g = ig.Graph()
|
|
282
|
+
g.add_vertices(len(memory_ids))
|
|
283
|
+
|
|
284
|
+
# Add edges with weights
|
|
285
|
+
edge_list = []
|
|
286
|
+
edge_weights = []
|
|
287
|
+
|
|
288
|
+
for source, target, weight in edges:
|
|
289
|
+
edge_list.append((
|
|
290
|
+
memory_id_to_vertex[source],
|
|
291
|
+
memory_id_to_vertex[target]
|
|
292
|
+
))
|
|
293
|
+
edge_weights.append(weight)
|
|
294
|
+
|
|
295
|
+
g.add_edges(edge_list)
|
|
296
|
+
|
|
297
|
+
# Run Leiden algorithm
|
|
298
|
+
logger.info(f"Running Leiden on {len(memory_ids)} nodes, {len(edges)} edges")
|
|
299
|
+
partition = leidenalg.find_partition(
|
|
300
|
+
g,
|
|
301
|
+
leidenalg.ModularityVertexPartition,
|
|
302
|
+
weights=edge_weights,
|
|
303
|
+
n_iterations=100,
|
|
304
|
+
seed=42 # Reproducible
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Process communities
|
|
308
|
+
clusters_created = 0
|
|
309
|
+
|
|
310
|
+
for cluster_idx, community in enumerate(partition):
|
|
311
|
+
if len(community) < 2: # Skip singleton clusters
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
# Get memory IDs in this cluster
|
|
315
|
+
cluster_memory_ids = [vertex_to_memory_id[v] for v in community]
|
|
316
|
+
|
|
317
|
+
# Calculate cluster stats
|
|
318
|
+
avg_importance = self._get_avg_importance(cursor, cluster_memory_ids)
|
|
319
|
+
|
|
320
|
+
# Auto-generate cluster name
|
|
321
|
+
cluster_name = self._generate_cluster_name(cursor, cluster_memory_ids)
|
|
322
|
+
|
|
323
|
+
# Insert cluster
|
|
324
|
+
result = cursor.execute('''
|
|
325
|
+
INSERT INTO graph_clusters (name, member_count, avg_importance)
|
|
326
|
+
VALUES (?, ?, ?)
|
|
327
|
+
''', (cluster_name, len(cluster_memory_ids), avg_importance))
|
|
328
|
+
|
|
329
|
+
cluster_id = result.lastrowid
|
|
330
|
+
|
|
331
|
+
# Update memories with cluster_id
|
|
332
|
+
cursor.executemany('''
|
|
333
|
+
UPDATE memories SET cluster_id = ? WHERE id = ?
|
|
334
|
+
''', [(cluster_id, mid) for mid in cluster_memory_ids])
|
|
335
|
+
|
|
336
|
+
clusters_created += 1
|
|
337
|
+
logger.info(f"Cluster {cluster_id}: '{cluster_name}' ({len(cluster_memory_ids)} members)")
|
|
338
|
+
|
|
339
|
+
conn.commit()
|
|
340
|
+
logger.info(f"Created {clusters_created} clusters")
|
|
341
|
+
return clusters_created
|
|
342
|
+
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.error(f"Community detection failed: {e}")
|
|
345
|
+
conn.rollback()
|
|
346
|
+
return 0
|
|
347
|
+
finally:
|
|
348
|
+
conn.close()
|
|
349
|
+
|
|
350
|
+
def _get_avg_importance(self, cursor, memory_ids: List[int]) -> float:
|
|
351
|
+
"""Calculate average importance for cluster."""
|
|
352
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
353
|
+
result = cursor.execute(f'''
|
|
354
|
+
SELECT AVG(importance) FROM memories WHERE id IN ({placeholders})
|
|
355
|
+
''', memory_ids).fetchone()
|
|
356
|
+
|
|
357
|
+
return result[0] if result and result[0] else 5.0
|
|
358
|
+
|
|
359
|
+
def _generate_cluster_name(self, cursor, memory_ids: List[int]) -> str:
|
|
360
|
+
"""Generate cluster name from member entities (TF-IDF approach)."""
|
|
361
|
+
# Get all entities from cluster members
|
|
362
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
363
|
+
nodes = cursor.execute(f'''
|
|
364
|
+
SELECT entities FROM graph_nodes WHERE memory_id IN ({placeholders})
|
|
365
|
+
''', memory_ids).fetchall()
|
|
366
|
+
|
|
367
|
+
all_entities = []
|
|
368
|
+
for node in nodes:
|
|
369
|
+
if node[0]:
|
|
370
|
+
all_entities.extend(json.loads(node[0]))
|
|
371
|
+
|
|
372
|
+
if not all_entities:
|
|
373
|
+
return f"Cluster (ID auto-assigned)"
|
|
374
|
+
|
|
375
|
+
# Count entity frequencies
|
|
376
|
+
entity_counts = Counter(all_entities)
|
|
377
|
+
|
|
378
|
+
# Top 2-3 most common entities
|
|
379
|
+
top_entities = [e for e, _ in entity_counts.most_common(3)]
|
|
380
|
+
|
|
381
|
+
# Build name
|
|
382
|
+
if len(top_entities) >= 2:
|
|
383
|
+
name = f"{top_entities[0].title()} & {top_entities[1].title()}"
|
|
384
|
+
elif len(top_entities) == 1:
|
|
385
|
+
name = f"{top_entities[0].title()} Contexts"
|
|
386
|
+
else:
|
|
387
|
+
name = "Mixed Contexts"
|
|
388
|
+
|
|
389
|
+
return name[:100] # Limit length
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class ClusterNamer:
|
|
393
|
+
"""Enhanced cluster naming with optional LLM support (future)."""
|
|
394
|
+
|
|
395
|
+
@staticmethod
|
|
396
|
+
def generate_name_tfidf(entities: List[str]) -> str:
|
|
397
|
+
"""Generate name from entity list (TF-IDF fallback)."""
|
|
398
|
+
if not entities:
|
|
399
|
+
return "Unnamed Cluster"
|
|
400
|
+
|
|
401
|
+
entity_counts = Counter(entities)
|
|
402
|
+
top_entities = [e for e, _ in entity_counts.most_common(2)]
|
|
403
|
+
|
|
404
|
+
if len(top_entities) >= 2:
|
|
405
|
+
return f"{top_entities[0].title()} & {top_entities[1].title()}"
|
|
406
|
+
else:
|
|
407
|
+
return f"{top_entities[0].title()} Contexts"
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
class GraphEngine:
|
|
411
|
+
"""Main graph engine coordinating all graph operations."""
|
|
412
|
+
|
|
413
|
+
def __init__(self, db_path: Path = DB_PATH):
|
|
414
|
+
"""Initialize graph engine."""
|
|
415
|
+
self.db_path = db_path
|
|
416
|
+
self.entity_extractor = EntityExtractor(max_features=20)
|
|
417
|
+
self.edge_builder = EdgeBuilder(db_path)
|
|
418
|
+
self.cluster_builder = ClusterBuilder(db_path)
|
|
419
|
+
self._ensure_graph_tables()
|
|
420
|
+
|
|
421
|
+
def _ensure_graph_tables(self):
|
|
422
|
+
"""Create graph tables if they don't exist."""
|
|
423
|
+
conn = sqlite3.connect(self.db_path)
|
|
424
|
+
cursor = conn.cursor()
|
|
425
|
+
|
|
426
|
+
# Graph nodes table
|
|
427
|
+
cursor.execute('''
|
|
428
|
+
CREATE TABLE IF NOT EXISTS graph_nodes (
|
|
429
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
430
|
+
memory_id INTEGER UNIQUE NOT NULL,
|
|
431
|
+
entities TEXT,
|
|
432
|
+
embedding_vector TEXT,
|
|
433
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
434
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
435
|
+
)
|
|
436
|
+
''')
|
|
437
|
+
|
|
438
|
+
# Graph edges table
|
|
439
|
+
cursor.execute('''
|
|
440
|
+
CREATE TABLE IF NOT EXISTS graph_edges (
|
|
441
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
442
|
+
source_memory_id INTEGER NOT NULL,
|
|
443
|
+
target_memory_id INTEGER NOT NULL,
|
|
444
|
+
relationship_type TEXT,
|
|
445
|
+
weight REAL DEFAULT 1.0,
|
|
446
|
+
shared_entities TEXT,
|
|
447
|
+
similarity_score REAL,
|
|
448
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
449
|
+
FOREIGN KEY (source_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
|
|
450
|
+
FOREIGN KEY (target_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
|
|
451
|
+
UNIQUE(source_memory_id, target_memory_id)
|
|
452
|
+
)
|
|
453
|
+
''')
|
|
454
|
+
|
|
455
|
+
# Graph clusters table
|
|
456
|
+
cursor.execute('''
|
|
457
|
+
CREATE TABLE IF NOT EXISTS graph_clusters (
|
|
458
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
459
|
+
name TEXT NOT NULL,
|
|
460
|
+
description TEXT,
|
|
461
|
+
member_count INTEGER DEFAULT 0,
|
|
462
|
+
avg_importance REAL,
|
|
463
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
464
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
465
|
+
)
|
|
466
|
+
''')
|
|
467
|
+
|
|
468
|
+
# Add cluster_id to memories if not exists
|
|
469
|
+
try:
|
|
470
|
+
cursor.execute('ALTER TABLE memories ADD COLUMN cluster_id INTEGER')
|
|
471
|
+
except sqlite3.OperationalError:
|
|
472
|
+
pass # Column already exists
|
|
473
|
+
|
|
474
|
+
# Create indexes
|
|
475
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_source ON graph_edges(source_memory_id)')
|
|
476
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_target ON graph_edges(target_memory_id)')
|
|
477
|
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_cluster_members ON memories(cluster_id)')
|
|
478
|
+
|
|
479
|
+
conn.commit()
|
|
480
|
+
conn.close()
|
|
481
|
+
logger.info("Graph tables initialized")
|
|
482
|
+
|
|
483
|
+
def build_graph(self, min_similarity: float = 0.3) -> Dict[str, any]:
|
|
484
|
+
"""
|
|
485
|
+
Build complete knowledge graph from all memories.
|
|
486
|
+
|
|
487
|
+
Args:
|
|
488
|
+
min_similarity: Minimum cosine similarity for edge creation
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Dictionary with build statistics
|
|
492
|
+
|
|
493
|
+
Raises:
|
|
494
|
+
ValueError: If too many memories (>5000) for safe processing
|
|
495
|
+
"""
|
|
496
|
+
start_time = time.time()
|
|
497
|
+
logger.info("Starting full graph build...")
|
|
498
|
+
|
|
499
|
+
conn = sqlite3.connect(self.db_path)
|
|
500
|
+
cursor = conn.cursor()
|
|
501
|
+
|
|
502
|
+
try:
|
|
503
|
+
# First check if required tables exist
|
|
504
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
505
|
+
existing_tables = {row[0] for row in cursor.fetchall()}
|
|
506
|
+
|
|
507
|
+
required_tables = {'memories', 'graph_edges', 'graph_nodes', 'graph_clusters'}
|
|
508
|
+
missing_tables = required_tables - existing_tables
|
|
509
|
+
|
|
510
|
+
if missing_tables:
|
|
511
|
+
logger.error(f"Missing required tables: {missing_tables}")
|
|
512
|
+
return {
|
|
513
|
+
'success': False,
|
|
514
|
+
'error': 'database_not_initialized',
|
|
515
|
+
'message': f"Database not initialized. Missing tables: {', '.join(missing_tables)}",
|
|
516
|
+
'fix': "Run 'superlocalmemoryv2:status' first to initialize the database, or add some memories."
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
# Load all memories
|
|
520
|
+
memories = cursor.execute('''
|
|
521
|
+
SELECT id, content, summary FROM memories
|
|
522
|
+
ORDER BY id
|
|
523
|
+
''').fetchall()
|
|
524
|
+
|
|
525
|
+
if len(memories) == 0:
|
|
526
|
+
logger.warning("No memories found")
|
|
527
|
+
return {
|
|
528
|
+
'success': False,
|
|
529
|
+
'error': 'no_memories',
|
|
530
|
+
'message': 'No memories found in database.',
|
|
531
|
+
'fix': "Add some memories first: superlocalmemoryv2:remember 'Your content here'"
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
if len(memories) < 2:
|
|
535
|
+
logger.warning("Need at least 2 memories to build graph")
|
|
536
|
+
return {
|
|
537
|
+
'success': False,
|
|
538
|
+
'error': 'insufficient_memories',
|
|
539
|
+
'message': 'Need at least 2 memories to build knowledge graph.',
|
|
540
|
+
'memories': len(memories),
|
|
541
|
+
'fix': "Add more memories: superlocalmemoryv2:remember 'Your content here'"
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
# SECURITY: Prevent O(n²) explosion for large datasets
|
|
545
|
+
if len(memories) > MAX_MEMORIES_FOR_GRAPH:
|
|
546
|
+
logger.error(f"Too many memories for graph build: {len(memories)}")
|
|
547
|
+
return {
|
|
548
|
+
'success': False,
|
|
549
|
+
'error': 'too_many_memories',
|
|
550
|
+
'message': f"Graph build limited to {MAX_MEMORIES_FOR_GRAPH} memories for performance.",
|
|
551
|
+
'memories': len(memories),
|
|
552
|
+
'limit': MAX_MEMORIES_FOR_GRAPH,
|
|
553
|
+
'fix': "Use incremental updates or reduce memory count with compression."
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
# Clear existing graph data
|
|
557
|
+
cursor.execute('DELETE FROM graph_edges')
|
|
558
|
+
cursor.execute('DELETE FROM graph_nodes')
|
|
559
|
+
cursor.execute('DELETE FROM graph_clusters')
|
|
560
|
+
cursor.execute('UPDATE memories SET cluster_id = NULL')
|
|
561
|
+
conn.commit()
|
|
562
|
+
|
|
563
|
+
logger.info(f"Processing {len(memories)} memories")
|
|
564
|
+
|
|
565
|
+
# Extract entities and vectors
|
|
566
|
+
memory_ids = [m[0] for m in memories]
|
|
567
|
+
contents = [f"{m[1]} {m[2] or ''}" for m in memories] # Combine content + summary
|
|
568
|
+
|
|
569
|
+
entities_list, vectors = self.entity_extractor.extract_entities(contents)
|
|
570
|
+
|
|
571
|
+
# Store nodes
|
|
572
|
+
for memory_id, entities, vector in zip(memory_ids, entities_list, vectors):
|
|
573
|
+
cursor.execute('''
|
|
574
|
+
INSERT INTO graph_nodes (memory_id, entities, embedding_vector)
|
|
575
|
+
VALUES (?, ?, ?)
|
|
576
|
+
''', (
|
|
577
|
+
memory_id,
|
|
578
|
+
json.dumps(entities),
|
|
579
|
+
json.dumps(vector.tolist())
|
|
580
|
+
))
|
|
581
|
+
|
|
582
|
+
conn.commit()
|
|
583
|
+
logger.info(f"Stored {len(memory_ids)} graph nodes")
|
|
584
|
+
|
|
585
|
+
# Build edges
|
|
586
|
+
edges_count = self.edge_builder.build_edges(
|
|
587
|
+
memory_ids, vectors, entities_list
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
# Detect communities
|
|
591
|
+
clusters_count = self.cluster_builder.detect_communities()
|
|
592
|
+
|
|
593
|
+
elapsed = time.time() - start_time
|
|
594
|
+
|
|
595
|
+
stats = {
|
|
596
|
+
'success': True,
|
|
597
|
+
'memories': len(memories),
|
|
598
|
+
'nodes': len(memory_ids),
|
|
599
|
+
'edges': edges_count,
|
|
600
|
+
'clusters': clusters_count,
|
|
601
|
+
'time_seconds': round(elapsed, 2)
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
logger.info(f"Graph build complete: {stats}")
|
|
605
|
+
return stats
|
|
606
|
+
|
|
607
|
+
except Exception as e:
|
|
608
|
+
logger.error(f"Graph build failed: {e}")
|
|
609
|
+
conn.rollback()
|
|
610
|
+
return {
|
|
611
|
+
'success': False,
|
|
612
|
+
'error': str(e)
|
|
613
|
+
}
|
|
614
|
+
finally:
|
|
615
|
+
conn.close()
|
|
616
|
+
|
|
617
|
+
def extract_entities(self, memory_id: int) -> List[str]:
|
|
618
|
+
"""
|
|
619
|
+
Extract entities for a single memory.
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
memory_id: Memory ID
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
List of entity strings
|
|
626
|
+
"""
|
|
627
|
+
conn = sqlite3.connect(self.db_path)
|
|
628
|
+
cursor = conn.cursor()
|
|
629
|
+
|
|
630
|
+
try:
|
|
631
|
+
# Get memory content
|
|
632
|
+
memory = cursor.execute('''
|
|
633
|
+
SELECT content, summary FROM memories WHERE id = ?
|
|
634
|
+
''', (memory_id,)).fetchone()
|
|
635
|
+
|
|
636
|
+
if not memory:
|
|
637
|
+
return []
|
|
638
|
+
|
|
639
|
+
content = f"{memory[0]} {memory[1] or ''}"
|
|
640
|
+
entities_list, _ = self.entity_extractor.extract_entities([content])
|
|
641
|
+
|
|
642
|
+
return entities_list[0] if entities_list else []
|
|
643
|
+
|
|
644
|
+
finally:
|
|
645
|
+
conn.close()
|
|
646
|
+
|
|
647
|
+
def get_related(self, memory_id: int, max_hops: int = 2) -> List[Dict]:
|
|
648
|
+
"""
|
|
649
|
+
Get memories connected to this memory via graph edges.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
memory_id: Source memory ID
|
|
653
|
+
max_hops: Maximum traversal depth (1 or 2)
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
List of related memory dictionaries
|
|
657
|
+
"""
|
|
658
|
+
conn = sqlite3.connect(self.db_path)
|
|
659
|
+
cursor = conn.cursor()
|
|
660
|
+
|
|
661
|
+
try:
|
|
662
|
+
# Get 1-hop neighbors
|
|
663
|
+
edges = cursor.execute('''
|
|
664
|
+
SELECT target_memory_id, relationship_type, weight, shared_entities
|
|
665
|
+
FROM graph_edges
|
|
666
|
+
WHERE source_memory_id = ?
|
|
667
|
+
UNION
|
|
668
|
+
SELECT source_memory_id, relationship_type, weight, shared_entities
|
|
669
|
+
FROM graph_edges
|
|
670
|
+
WHERE target_memory_id = ?
|
|
671
|
+
''', (memory_id, memory_id)).fetchall()
|
|
672
|
+
|
|
673
|
+
results = []
|
|
674
|
+
seen_ids = {memory_id}
|
|
675
|
+
|
|
676
|
+
for target_id, rel_type, weight, shared_entities in edges:
|
|
677
|
+
if target_id in seen_ids:
|
|
678
|
+
continue
|
|
679
|
+
|
|
680
|
+
seen_ids.add(target_id)
|
|
681
|
+
|
|
682
|
+
# Get memory details
|
|
683
|
+
memory = cursor.execute('''
|
|
684
|
+
SELECT id, summary, importance, tags
|
|
685
|
+
FROM memories WHERE id = ?
|
|
686
|
+
''', (target_id,)).fetchone()
|
|
687
|
+
|
|
688
|
+
if memory:
|
|
689
|
+
results.append({
|
|
690
|
+
'id': memory[0],
|
|
691
|
+
'summary': memory[1],
|
|
692
|
+
'importance': memory[2],
|
|
693
|
+
'tags': json.loads(memory[3]) if memory[3] else [],
|
|
694
|
+
'relationship': rel_type,
|
|
695
|
+
'weight': weight,
|
|
696
|
+
'shared_entities': json.loads(shared_entities) if shared_entities else [],
|
|
697
|
+
'hops': 1
|
|
698
|
+
})
|
|
699
|
+
|
|
700
|
+
# If max_hops == 2, get 2-hop neighbors
|
|
701
|
+
if max_hops >= 2:
|
|
702
|
+
for result in results[:]: # Copy to avoid modification during iteration
|
|
703
|
+
second_hop = cursor.execute('''
|
|
704
|
+
SELECT target_memory_id, relationship_type, weight
|
|
705
|
+
FROM graph_edges
|
|
706
|
+
WHERE source_memory_id = ?
|
|
707
|
+
UNION
|
|
708
|
+
SELECT source_memory_id, relationship_type, weight
|
|
709
|
+
FROM graph_edges
|
|
710
|
+
WHERE target_memory_id = ?
|
|
711
|
+
''', (result['id'], result['id'])).fetchall()
|
|
712
|
+
|
|
713
|
+
for target_id, rel_type, weight in second_hop:
|
|
714
|
+
if target_id in seen_ids:
|
|
715
|
+
continue
|
|
716
|
+
|
|
717
|
+
seen_ids.add(target_id)
|
|
718
|
+
|
|
719
|
+
memory = cursor.execute('''
|
|
720
|
+
SELECT id, summary, importance, tags
|
|
721
|
+
FROM memories WHERE id = ?
|
|
722
|
+
''', (target_id,)).fetchone()
|
|
723
|
+
|
|
724
|
+
if memory:
|
|
725
|
+
results.append({
|
|
726
|
+
'id': memory[0],
|
|
727
|
+
'summary': memory[1],
|
|
728
|
+
'importance': memory[2],
|
|
729
|
+
'tags': json.loads(memory[3]) if memory[3] else [],
|
|
730
|
+
'relationship': rel_type,
|
|
731
|
+
'weight': weight,
|
|
732
|
+
'shared_entities': [],
|
|
733
|
+
'hops': 2
|
|
734
|
+
})
|
|
735
|
+
|
|
736
|
+
# Sort by weight (strongest connections first)
|
|
737
|
+
results.sort(key=lambda x: (-x['hops'], -x['weight']))
|
|
738
|
+
|
|
739
|
+
return results
|
|
740
|
+
|
|
741
|
+
finally:
|
|
742
|
+
conn.close()
|
|
743
|
+
|
|
744
|
+
def get_cluster_members(self, cluster_id: int) -> List[Dict]:
|
|
745
|
+
"""
|
|
746
|
+
Get all memories in a cluster.
|
|
747
|
+
|
|
748
|
+
Args:
|
|
749
|
+
cluster_id: Cluster ID
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
List of memory dictionaries
|
|
753
|
+
"""
|
|
754
|
+
conn = sqlite3.connect(self.db_path)
|
|
755
|
+
cursor = conn.cursor()
|
|
756
|
+
|
|
757
|
+
try:
|
|
758
|
+
memories = cursor.execute('''
|
|
759
|
+
SELECT id, summary, importance, tags, created_at
|
|
760
|
+
FROM memories
|
|
761
|
+
WHERE cluster_id = ?
|
|
762
|
+
ORDER BY importance DESC
|
|
763
|
+
''', (cluster_id,)).fetchall()
|
|
764
|
+
|
|
765
|
+
return [
|
|
766
|
+
{
|
|
767
|
+
'id': m[0],
|
|
768
|
+
'summary': m[1],
|
|
769
|
+
'importance': m[2],
|
|
770
|
+
'tags': json.loads(m[3]) if m[3] else [],
|
|
771
|
+
'created_at': m[4]
|
|
772
|
+
}
|
|
773
|
+
for m in memories
|
|
774
|
+
]
|
|
775
|
+
|
|
776
|
+
finally:
|
|
777
|
+
conn.close()
|
|
778
|
+
|
|
779
|
+
def add_memory_incremental(self, memory_id: int) -> bool:
|
|
780
|
+
"""
|
|
781
|
+
Add single memory to existing graph (incremental update).
|
|
782
|
+
|
|
783
|
+
Args:
|
|
784
|
+
memory_id: New memory ID to add
|
|
785
|
+
|
|
786
|
+
Returns:
|
|
787
|
+
Success status
|
|
788
|
+
"""
|
|
789
|
+
conn = sqlite3.connect(self.db_path)
|
|
790
|
+
cursor = conn.cursor()
|
|
791
|
+
|
|
792
|
+
try:
|
|
793
|
+
# Get new memory content
|
|
794
|
+
memory = cursor.execute('''
|
|
795
|
+
SELECT content, summary FROM memories WHERE id = ?
|
|
796
|
+
''', (memory_id,)).fetchone()
|
|
797
|
+
|
|
798
|
+
if not memory:
|
|
799
|
+
return False
|
|
800
|
+
|
|
801
|
+
# Extract entities for new memory
|
|
802
|
+
content = f"{memory[0]} {memory[1] or ''}"
|
|
803
|
+
entities_list, vector = self.entity_extractor.extract_entities([content])
|
|
804
|
+
|
|
805
|
+
if not entities_list:
|
|
806
|
+
return False
|
|
807
|
+
|
|
808
|
+
new_entities = entities_list[0]
|
|
809
|
+
new_vector = vector[0]
|
|
810
|
+
|
|
811
|
+
# Store node
|
|
812
|
+
cursor.execute('''
|
|
813
|
+
INSERT OR REPLACE INTO graph_nodes (memory_id, entities, embedding_vector)
|
|
814
|
+
VALUES (?, ?, ?)
|
|
815
|
+
''', (memory_id, json.dumps(new_entities), json.dumps(new_vector.tolist())))
|
|
816
|
+
|
|
817
|
+
# Compare to existing memories
|
|
818
|
+
existing = cursor.execute('''
|
|
819
|
+
SELECT memory_id, embedding_vector, entities
|
|
820
|
+
FROM graph_nodes
|
|
821
|
+
WHERE memory_id != ?
|
|
822
|
+
''', (memory_id,)).fetchall()
|
|
823
|
+
|
|
824
|
+
edges_added = 0
|
|
825
|
+
|
|
826
|
+
for existing_id, existing_vector_json, existing_entities_json in existing:
|
|
827
|
+
existing_vector = np.array(json.loads(existing_vector_json))
|
|
828
|
+
|
|
829
|
+
# Compute similarity
|
|
830
|
+
sim = cosine_similarity([new_vector], [existing_vector])[0][0]
|
|
831
|
+
|
|
832
|
+
if sim >= self.edge_builder.min_similarity:
|
|
833
|
+
# Find shared entities
|
|
834
|
+
existing_entities = json.loads(existing_entities_json)
|
|
835
|
+
shared = list(set(new_entities) & set(existing_entities))
|
|
836
|
+
|
|
837
|
+
# Classify relationship
|
|
838
|
+
rel_type = self.edge_builder._classify_relationship(sim, shared)
|
|
839
|
+
|
|
840
|
+
# Insert edge
|
|
841
|
+
cursor.execute('''
|
|
842
|
+
INSERT OR REPLACE INTO graph_edges
|
|
843
|
+
(source_memory_id, target_memory_id, relationship_type,
|
|
844
|
+
weight, shared_entities, similarity_score)
|
|
845
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
846
|
+
''', (
|
|
847
|
+
memory_id,
|
|
848
|
+
existing_id,
|
|
849
|
+
rel_type,
|
|
850
|
+
float(sim),
|
|
851
|
+
json.dumps(shared),
|
|
852
|
+
float(sim)
|
|
853
|
+
))
|
|
854
|
+
|
|
855
|
+
edges_added += 1
|
|
856
|
+
|
|
857
|
+
conn.commit()
|
|
858
|
+
logger.info(f"Added memory {memory_id} to graph with {edges_added} edges")
|
|
859
|
+
|
|
860
|
+
# Optionally re-cluster if significant change
|
|
861
|
+
if edges_added > 5:
|
|
862
|
+
logger.info("Significant graph change - consider re-clustering")
|
|
863
|
+
|
|
864
|
+
return True
|
|
865
|
+
|
|
866
|
+
except Exception as e:
|
|
867
|
+
logger.error(f"Incremental add failed: {e}")
|
|
868
|
+
conn.rollback()
|
|
869
|
+
return False
|
|
870
|
+
finally:
|
|
871
|
+
conn.close()
|
|
872
|
+
|
|
873
|
+
def get_stats(self) -> Dict[str, any]:
|
|
874
|
+
"""Get graph statistics."""
|
|
875
|
+
conn = sqlite3.connect(self.db_path)
|
|
876
|
+
cursor = conn.cursor()
|
|
877
|
+
|
|
878
|
+
try:
|
|
879
|
+
nodes = cursor.execute('SELECT COUNT(*) FROM graph_nodes').fetchone()[0]
|
|
880
|
+
edges = cursor.execute('SELECT COUNT(*) FROM graph_edges').fetchone()[0]
|
|
881
|
+
clusters = cursor.execute('SELECT COUNT(*) FROM graph_clusters').fetchone()[0]
|
|
882
|
+
|
|
883
|
+
# Cluster breakdown
|
|
884
|
+
cluster_info = cursor.execute('''
|
|
885
|
+
SELECT cluster_name, memory_count, avg_importance
|
|
886
|
+
FROM graph_clusters
|
|
887
|
+
ORDER BY memory_count DESC
|
|
888
|
+
LIMIT 10
|
|
889
|
+
''').fetchall()
|
|
890
|
+
|
|
891
|
+
return {
|
|
892
|
+
'nodes': nodes,
|
|
893
|
+
'edges': edges,
|
|
894
|
+
'clusters': clusters,
|
|
895
|
+
'top_clusters': [
|
|
896
|
+
{
|
|
897
|
+
'name': c[0],
|
|
898
|
+
'members': c[1],
|
|
899
|
+
'avg_importance': round(c[2], 1)
|
|
900
|
+
}
|
|
901
|
+
for c in cluster_info
|
|
902
|
+
]
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
finally:
|
|
906
|
+
conn.close()
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def main():
|
|
910
|
+
"""CLI interface for manual graph operations."""
|
|
911
|
+
import argparse
|
|
912
|
+
|
|
913
|
+
parser = argparse.ArgumentParser(description='GraphEngine - Knowledge Graph Management')
|
|
914
|
+
parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster'],
|
|
915
|
+
help='Command to execute')
|
|
916
|
+
parser.add_argument('--memory-id', type=int, help='Memory ID for related/add commands')
|
|
917
|
+
parser.add_argument('--cluster-id', type=int, help='Cluster ID for cluster command')
|
|
918
|
+
parser.add_argument('--min-similarity', type=float, default=0.3,
|
|
919
|
+
help='Minimum similarity for edges (default: 0.3)')
|
|
920
|
+
parser.add_argument('--hops', type=int, default=2, help='Max hops for related (default: 2)')
|
|
921
|
+
|
|
922
|
+
args = parser.parse_args()
|
|
923
|
+
|
|
924
|
+
engine = GraphEngine()
|
|
925
|
+
|
|
926
|
+
if args.command == 'build':
|
|
927
|
+
print("Building knowledge graph...")
|
|
928
|
+
stats = engine.build_graph(min_similarity=args.min_similarity)
|
|
929
|
+
print(json.dumps(stats, indent=2))
|
|
930
|
+
|
|
931
|
+
elif args.command == 'stats':
|
|
932
|
+
print("Graph Statistics:")
|
|
933
|
+
stats = engine.get_stats()
|
|
934
|
+
print(json.dumps(stats, indent=2))
|
|
935
|
+
|
|
936
|
+
elif args.command == 'related':
|
|
937
|
+
if not args.memory_id:
|
|
938
|
+
print("Error: --memory-id required for 'related' command")
|
|
939
|
+
return
|
|
940
|
+
|
|
941
|
+
print(f"Finding memories related to #{args.memory_id}...")
|
|
942
|
+
related = engine.get_related(args.memory_id, max_hops=args.hops)
|
|
943
|
+
|
|
944
|
+
if not related:
|
|
945
|
+
print("No related memories found")
|
|
946
|
+
else:
|
|
947
|
+
for idx, mem in enumerate(related, 1):
|
|
948
|
+
print(f"\n{idx}. Memory #{mem['id']} ({mem['hops']}-hop, weight={mem['weight']:.3f})")
|
|
949
|
+
print(f" Relationship: {mem['relationship']}")
|
|
950
|
+
summary = mem['summary'] or '[No summary]'
|
|
951
|
+
print(f" Summary: {summary[:100]}...")
|
|
952
|
+
if mem['shared_entities']:
|
|
953
|
+
print(f" Shared: {', '.join(mem['shared_entities'][:5])}")
|
|
954
|
+
|
|
955
|
+
elif args.command == 'cluster':
|
|
956
|
+
if not args.cluster_id:
|
|
957
|
+
print("Error: --cluster-id required for 'cluster' command")
|
|
958
|
+
return
|
|
959
|
+
|
|
960
|
+
print(f"Cluster #{args.cluster_id} members:")
|
|
961
|
+
members = engine.get_cluster_members(args.cluster_id)
|
|
962
|
+
|
|
963
|
+
for idx, mem in enumerate(members, 1):
|
|
964
|
+
print(f"\n{idx}. Memory #{mem['id']} (importance={mem['importance']})")
|
|
965
|
+
summary = mem['summary'] or '[No summary]'
|
|
966
|
+
print(f" {summary[:100]}...")
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
if __name__ == '__main__':
|
|
970
|
+
main()
|