superlocalmemory 2.3.7 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/README.md +53 -6
- package/hooks/memory-profile-skill.js +7 -18
- package/mcp_server.py +74 -12
- package/package.json +2 -1
- package/src/auto_backup.py +424 -0
- package/src/graph_engine.py +459 -44
- package/src/memory-profiles.py +321 -243
- package/src/memory_store_v2.py +82 -31
- package/src/pattern_learner.py +126 -44
- package/src/setup_validator.py +8 -1
- package/ui/app.js +526 -17
- package/ui/index.html +182 -1
- package/ui_server.py +356 -55
- package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
- package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
- package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
- package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
- package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
- package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
package/src/graph_engine.py
CHANGED
|
@@ -239,9 +239,21 @@ class ClusterBuilder:
|
|
|
239
239
|
"""Initialize cluster builder."""
|
|
240
240
|
self.db_path = db_path
|
|
241
241
|
|
|
242
|
+
def _get_active_profile(self) -> str:
|
|
243
|
+
"""Get the currently active profile name from config."""
|
|
244
|
+
config_file = MEMORY_DIR / "profiles.json"
|
|
245
|
+
if config_file.exists():
|
|
246
|
+
try:
|
|
247
|
+
with open(config_file, 'r') as f:
|
|
248
|
+
config = json.load(f)
|
|
249
|
+
return config.get('active_profile', 'default')
|
|
250
|
+
except (json.JSONDecodeError, IOError):
|
|
251
|
+
pass
|
|
252
|
+
return 'default'
|
|
253
|
+
|
|
242
254
|
def detect_communities(self) -> int:
|
|
243
255
|
"""
|
|
244
|
-
Run Leiden algorithm to find memory clusters.
|
|
256
|
+
Run Leiden algorithm to find memory clusters (active profile only).
|
|
245
257
|
|
|
246
258
|
Returns:
|
|
247
259
|
Number of clusters created
|
|
@@ -255,13 +267,16 @@ class ClusterBuilder:
|
|
|
255
267
|
|
|
256
268
|
conn = sqlite3.connect(self.db_path)
|
|
257
269
|
cursor = conn.cursor()
|
|
270
|
+
active_profile = self._get_active_profile()
|
|
258
271
|
|
|
259
272
|
try:
|
|
260
|
-
# Load
|
|
273
|
+
# Load edges for active profile's memories only
|
|
261
274
|
edges = cursor.execute('''
|
|
262
|
-
SELECT source_memory_id, target_memory_id, weight
|
|
263
|
-
FROM graph_edges
|
|
264
|
-
|
|
275
|
+
SELECT ge.source_memory_id, ge.target_memory_id, ge.weight
|
|
276
|
+
FROM graph_edges ge
|
|
277
|
+
WHERE ge.source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
|
|
278
|
+
AND ge.target_memory_id IN (SELECT id FROM memories WHERE profile = ?)
|
|
279
|
+
''', (active_profile, active_profile)).fetchall()
|
|
265
280
|
|
|
266
281
|
if not edges:
|
|
267
282
|
logger.warning("No edges found - cannot build clusters")
|
|
@@ -389,6 +404,293 @@ class ClusterBuilder:
|
|
|
389
404
|
return name[:100] # Limit length
|
|
390
405
|
|
|
391
406
|
|
|
407
|
+
def hierarchical_cluster(self, min_subcluster_size: int = 5, max_depth: int = 3) -> Dict[str, any]:
|
|
408
|
+
"""
|
|
409
|
+
Run recursive Leiden clustering — cluster the clusters.
|
|
410
|
+
|
|
411
|
+
Large communities (>= min_subcluster_size * 2) are recursively sub-clustered
|
|
412
|
+
to reveal finer-grained thematic structure. E.g., "Python" → "FastAPI" → "Auth".
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
min_subcluster_size: Minimum members to attempt sub-clustering (default 5)
|
|
416
|
+
max_depth: Maximum recursion depth (default 3)
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Dictionary with hierarchical clustering statistics
|
|
420
|
+
"""
|
|
421
|
+
try:
|
|
422
|
+
import igraph as ig
|
|
423
|
+
import leidenalg
|
|
424
|
+
except ImportError:
|
|
425
|
+
raise ImportError("python-igraph and leidenalg required. Install: pip install python-igraph leidenalg")
|
|
426
|
+
|
|
427
|
+
conn = sqlite3.connect(self.db_path)
|
|
428
|
+
cursor = conn.cursor()
|
|
429
|
+
active_profile = self._get_active_profile()
|
|
430
|
+
|
|
431
|
+
try:
|
|
432
|
+
# Get top-level clusters for this profile that are large enough to sub-cluster
|
|
433
|
+
cursor.execute('''
|
|
434
|
+
SELECT cluster_id, COUNT(*) as cnt
|
|
435
|
+
FROM memories
|
|
436
|
+
WHERE cluster_id IS NOT NULL AND profile = ?
|
|
437
|
+
GROUP BY cluster_id
|
|
438
|
+
HAVING cnt >= ?
|
|
439
|
+
''', (active_profile, min_subcluster_size * 2))
|
|
440
|
+
large_clusters = cursor.fetchall()
|
|
441
|
+
|
|
442
|
+
if not large_clusters:
|
|
443
|
+
logger.info("No clusters large enough for hierarchical decomposition")
|
|
444
|
+
return {'subclusters_created': 0, 'depth_reached': 0}
|
|
445
|
+
|
|
446
|
+
total_subclusters = 0
|
|
447
|
+
max_depth_reached = 0
|
|
448
|
+
|
|
449
|
+
for parent_cid, member_count in large_clusters:
|
|
450
|
+
subs, depth = self._recursive_subcluster(
|
|
451
|
+
conn, cursor, parent_cid, active_profile,
|
|
452
|
+
min_subcluster_size, max_depth, current_depth=1
|
|
453
|
+
)
|
|
454
|
+
total_subclusters += subs
|
|
455
|
+
max_depth_reached = max(max_depth_reached, depth)
|
|
456
|
+
|
|
457
|
+
conn.commit()
|
|
458
|
+
logger.info(f"Hierarchical clustering: {total_subclusters} sub-clusters, depth {max_depth_reached}")
|
|
459
|
+
return {
|
|
460
|
+
'subclusters_created': total_subclusters,
|
|
461
|
+
'depth_reached': max_depth_reached,
|
|
462
|
+
'parent_clusters_processed': len(large_clusters)
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
except Exception as e:
|
|
466
|
+
logger.error(f"Hierarchical clustering failed: {e}")
|
|
467
|
+
conn.rollback()
|
|
468
|
+
return {'subclusters_created': 0, 'error': str(e)}
|
|
469
|
+
finally:
|
|
470
|
+
conn.close()
|
|
471
|
+
|
|
472
|
+
def _recursive_subcluster(self, conn, cursor, parent_cluster_id: int,
|
|
473
|
+
profile: str, min_size: int, max_depth: int,
|
|
474
|
+
current_depth: int) -> Tuple[int, int]:
|
|
475
|
+
"""Recursively sub-cluster a community using Leiden."""
|
|
476
|
+
import igraph as ig
|
|
477
|
+
import leidenalg
|
|
478
|
+
|
|
479
|
+
if current_depth > max_depth:
|
|
480
|
+
return 0, current_depth - 1
|
|
481
|
+
|
|
482
|
+
# Get memory IDs in this cluster
|
|
483
|
+
cursor.execute('''
|
|
484
|
+
SELECT id FROM memories
|
|
485
|
+
WHERE cluster_id = ? AND profile = ?
|
|
486
|
+
''', (parent_cluster_id, profile))
|
|
487
|
+
member_ids = [row[0] for row in cursor.fetchall()]
|
|
488
|
+
|
|
489
|
+
if len(member_ids) < min_size * 2:
|
|
490
|
+
return 0, current_depth - 1
|
|
491
|
+
|
|
492
|
+
# Get edges between members of this cluster
|
|
493
|
+
placeholders = ','.join('?' * len(member_ids))
|
|
494
|
+
edges = cursor.execute(f'''
|
|
495
|
+
SELECT source_memory_id, target_memory_id, weight
|
|
496
|
+
FROM graph_edges
|
|
497
|
+
WHERE source_memory_id IN ({placeholders})
|
|
498
|
+
AND target_memory_id IN ({placeholders})
|
|
499
|
+
''', member_ids + member_ids).fetchall()
|
|
500
|
+
|
|
501
|
+
if len(edges) < 2:
|
|
502
|
+
return 0, current_depth - 1
|
|
503
|
+
|
|
504
|
+
# Build sub-graph
|
|
505
|
+
id_to_vertex = {mid: idx for idx, mid in enumerate(member_ids)}
|
|
506
|
+
vertex_to_id = {idx: mid for mid, idx in id_to_vertex.items()}
|
|
507
|
+
|
|
508
|
+
g = ig.Graph()
|
|
509
|
+
g.add_vertices(len(member_ids))
|
|
510
|
+
edge_list, edge_weights = [], []
|
|
511
|
+
for src, tgt, w in edges:
|
|
512
|
+
if src in id_to_vertex and tgt in id_to_vertex:
|
|
513
|
+
edge_list.append((id_to_vertex[src], id_to_vertex[tgt]))
|
|
514
|
+
edge_weights.append(w)
|
|
515
|
+
|
|
516
|
+
if not edge_list:
|
|
517
|
+
return 0, current_depth - 1
|
|
518
|
+
|
|
519
|
+
g.add_edges(edge_list)
|
|
520
|
+
|
|
521
|
+
# Run Leiden with higher resolution for finer communities
|
|
522
|
+
partition = leidenalg.find_partition(
|
|
523
|
+
g, leidenalg.ModularityVertexPartition,
|
|
524
|
+
weights=edge_weights, n_iterations=100, seed=42
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
# Only proceed if Leiden found > 1 community (actual split)
|
|
528
|
+
non_singleton = [c for c in partition if len(c) >= 2]
|
|
529
|
+
if len(non_singleton) <= 1:
|
|
530
|
+
return 0, current_depth - 1
|
|
531
|
+
|
|
532
|
+
subclusters_created = 0
|
|
533
|
+
deepest = current_depth
|
|
534
|
+
|
|
535
|
+
# Get parent depth
|
|
536
|
+
cursor.execute('SELECT depth FROM graph_clusters WHERE id = ?', (parent_cluster_id,))
|
|
537
|
+
parent_row = cursor.fetchone()
|
|
538
|
+
parent_depth = parent_row[0] if parent_row else 0
|
|
539
|
+
|
|
540
|
+
for community in non_singleton:
|
|
541
|
+
sub_member_ids = [vertex_to_id[v] for v in community]
|
|
542
|
+
|
|
543
|
+
if len(sub_member_ids) < 2:
|
|
544
|
+
continue
|
|
545
|
+
|
|
546
|
+
avg_imp = self._get_avg_importance(cursor, sub_member_ids)
|
|
547
|
+
cluster_name = self._generate_cluster_name(cursor, sub_member_ids)
|
|
548
|
+
|
|
549
|
+
result = cursor.execute('''
|
|
550
|
+
INSERT INTO graph_clusters (name, member_count, avg_importance, parent_cluster_id, depth)
|
|
551
|
+
VALUES (?, ?, ?, ?, ?)
|
|
552
|
+
''', (cluster_name, len(sub_member_ids), avg_imp, parent_cluster_id, parent_depth + 1))
|
|
553
|
+
|
|
554
|
+
sub_cluster_id = result.lastrowid
|
|
555
|
+
|
|
556
|
+
# Update memories to point to sub-cluster
|
|
557
|
+
cursor.executemany('''
|
|
558
|
+
UPDATE memories SET cluster_id = ? WHERE id = ?
|
|
559
|
+
''', [(sub_cluster_id, mid) for mid in sub_member_ids])
|
|
560
|
+
|
|
561
|
+
subclusters_created += 1
|
|
562
|
+
logger.info(f"Sub-cluster {sub_cluster_id} under {parent_cluster_id}: "
|
|
563
|
+
f"'{cluster_name}' ({len(sub_member_ids)} members, depth {parent_depth + 1})")
|
|
564
|
+
|
|
565
|
+
# Recurse into this sub-cluster if large enough
|
|
566
|
+
child_subs, child_depth = self._recursive_subcluster(
|
|
567
|
+
conn, cursor, sub_cluster_id, profile,
|
|
568
|
+
min_size, max_depth, current_depth + 1
|
|
569
|
+
)
|
|
570
|
+
subclusters_created += child_subs
|
|
571
|
+
deepest = max(deepest, child_depth)
|
|
572
|
+
|
|
573
|
+
return subclusters_created, deepest
|
|
574
|
+
|
|
575
|
+
def generate_cluster_summaries(self) -> int:
|
|
576
|
+
"""
|
|
577
|
+
Generate TF-IDF structured summaries for all clusters.
|
|
578
|
+
|
|
579
|
+
For each cluster, analyzes member content to produce a human-readable
|
|
580
|
+
summary describing the cluster's theme, key topics, and scope.
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Number of clusters with summaries generated
|
|
584
|
+
"""
|
|
585
|
+
conn = sqlite3.connect(self.db_path)
|
|
586
|
+
cursor = conn.cursor()
|
|
587
|
+
active_profile = self._get_active_profile()
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
# Get all clusters for this profile
|
|
591
|
+
cursor.execute('''
|
|
592
|
+
SELECT DISTINCT gc.id, gc.name, gc.member_count
|
|
593
|
+
FROM graph_clusters gc
|
|
594
|
+
JOIN memories m ON m.cluster_id = gc.id
|
|
595
|
+
WHERE m.profile = ?
|
|
596
|
+
''', (active_profile,))
|
|
597
|
+
clusters = cursor.fetchall()
|
|
598
|
+
|
|
599
|
+
if not clusters:
|
|
600
|
+
return 0
|
|
601
|
+
|
|
602
|
+
summaries_generated = 0
|
|
603
|
+
|
|
604
|
+
for cluster_id, cluster_name, member_count in clusters:
|
|
605
|
+
summary = self._build_cluster_summary(cursor, cluster_id, active_profile)
|
|
606
|
+
if summary:
|
|
607
|
+
cursor.execute('''
|
|
608
|
+
UPDATE graph_clusters SET summary = ?, updated_at = CURRENT_TIMESTAMP
|
|
609
|
+
WHERE id = ?
|
|
610
|
+
''', (summary, cluster_id))
|
|
611
|
+
summaries_generated += 1
|
|
612
|
+
logger.info(f"Summary for cluster {cluster_id} ({cluster_name}): {summary[:80]}...")
|
|
613
|
+
|
|
614
|
+
conn.commit()
|
|
615
|
+
logger.info(f"Generated {summaries_generated} cluster summaries")
|
|
616
|
+
return summaries_generated
|
|
617
|
+
|
|
618
|
+
except Exception as e:
|
|
619
|
+
logger.error(f"Summary generation failed: {e}")
|
|
620
|
+
conn.rollback()
|
|
621
|
+
return 0
|
|
622
|
+
finally:
|
|
623
|
+
conn.close()
|
|
624
|
+
|
|
625
|
+
def _build_cluster_summary(self, cursor, cluster_id: int, profile: str) -> str:
|
|
626
|
+
"""Build a TF-IDF structured summary for a single cluster."""
|
|
627
|
+
# Get member content
|
|
628
|
+
cursor.execute('''
|
|
629
|
+
SELECT m.content, m.summary, m.tags, m.category, m.project_name
|
|
630
|
+
FROM memories m
|
|
631
|
+
WHERE m.cluster_id = ? AND m.profile = ?
|
|
632
|
+
''', (cluster_id, profile))
|
|
633
|
+
members = cursor.fetchall()
|
|
634
|
+
|
|
635
|
+
if not members:
|
|
636
|
+
return ""
|
|
637
|
+
|
|
638
|
+
# Collect entities from graph nodes
|
|
639
|
+
cursor.execute('''
|
|
640
|
+
SELECT gn.entities
|
|
641
|
+
FROM graph_nodes gn
|
|
642
|
+
JOIN memories m ON gn.memory_id = m.id
|
|
643
|
+
WHERE m.cluster_id = ? AND m.profile = ?
|
|
644
|
+
''', (cluster_id, profile))
|
|
645
|
+
all_entities = []
|
|
646
|
+
for row in cursor.fetchall():
|
|
647
|
+
if row[0]:
|
|
648
|
+
try:
|
|
649
|
+
all_entities.extend(json.loads(row[0]))
|
|
650
|
+
except (json.JSONDecodeError, TypeError):
|
|
651
|
+
pass
|
|
652
|
+
|
|
653
|
+
# Top entities by frequency (TF-IDF already extracted these)
|
|
654
|
+
entity_counts = Counter(all_entities)
|
|
655
|
+
top_entities = [e for e, _ in entity_counts.most_common(5)]
|
|
656
|
+
|
|
657
|
+
# Collect unique projects and categories
|
|
658
|
+
projects = set()
|
|
659
|
+
categories = set()
|
|
660
|
+
for m in members:
|
|
661
|
+
if m[3]: # category
|
|
662
|
+
categories.add(m[3])
|
|
663
|
+
if m[4]: # project_name
|
|
664
|
+
projects.add(m[4])
|
|
665
|
+
|
|
666
|
+
# Build structured summary
|
|
667
|
+
parts = []
|
|
668
|
+
|
|
669
|
+
# Theme from top entities
|
|
670
|
+
if top_entities:
|
|
671
|
+
parts.append(f"Key topics: {', '.join(top_entities[:5])}")
|
|
672
|
+
|
|
673
|
+
# Scope
|
|
674
|
+
if projects:
|
|
675
|
+
parts.append(f"Projects: {', '.join(sorted(projects)[:3])}")
|
|
676
|
+
if categories:
|
|
677
|
+
parts.append(f"Categories: {', '.join(sorted(categories)[:3])}")
|
|
678
|
+
|
|
679
|
+
# Size context
|
|
680
|
+
parts.append(f"{len(members)} memories")
|
|
681
|
+
|
|
682
|
+
# Check for hierarchical context
|
|
683
|
+
cursor.execute('SELECT parent_cluster_id FROM graph_clusters WHERE id = ?', (cluster_id,))
|
|
684
|
+
parent_row = cursor.fetchone()
|
|
685
|
+
if parent_row and parent_row[0]:
|
|
686
|
+
cursor.execute('SELECT name FROM graph_clusters WHERE id = ?', (parent_row[0],))
|
|
687
|
+
parent_name_row = cursor.fetchone()
|
|
688
|
+
if parent_name_row:
|
|
689
|
+
parts.append(f"Sub-cluster of: {parent_name_row[0]}")
|
|
690
|
+
|
|
691
|
+
return " | ".join(parts)
|
|
692
|
+
|
|
693
|
+
|
|
392
694
|
class ClusterNamer:
|
|
393
695
|
"""Enhanced cluster naming with optional LLM support (future)."""
|
|
394
696
|
|
|
@@ -418,11 +720,36 @@ class GraphEngine:
|
|
|
418
720
|
self.cluster_builder = ClusterBuilder(db_path)
|
|
419
721
|
self._ensure_graph_tables()
|
|
420
722
|
|
|
723
|
+
def _get_active_profile(self) -> str:
|
|
724
|
+
"""Get the currently active profile name from config."""
|
|
725
|
+
config_file = MEMORY_DIR / "profiles.json"
|
|
726
|
+
if config_file.exists():
|
|
727
|
+
try:
|
|
728
|
+
with open(config_file, 'r') as f:
|
|
729
|
+
config = json.load(f)
|
|
730
|
+
return config.get('active_profile', 'default')
|
|
731
|
+
except (json.JSONDecodeError, IOError):
|
|
732
|
+
pass
|
|
733
|
+
return 'default'
|
|
734
|
+
|
|
421
735
|
def _ensure_graph_tables(self):
|
|
422
|
-
"""Create graph tables if they don't exist."""
|
|
736
|
+
"""Create graph tables if they don't exist, or recreate if schema is incomplete."""
|
|
423
737
|
conn = sqlite3.connect(self.db_path)
|
|
424
738
|
cursor = conn.cursor()
|
|
425
739
|
|
|
740
|
+
# Check if existing tables have correct schema (not just id column)
|
|
741
|
+
for table_name, required_cols in [
|
|
742
|
+
('graph_nodes', {'memory_id', 'entities'}),
|
|
743
|
+
('graph_edges', {'source_memory_id', 'target_memory_id', 'weight'}),
|
|
744
|
+
('graph_clusters', {'name', 'member_count'}),
|
|
745
|
+
]:
|
|
746
|
+
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
747
|
+
existing_cols = {row[1] for row in cursor.fetchall()}
|
|
748
|
+
if existing_cols and not required_cols.issubset(existing_cols):
|
|
749
|
+
# Table exists but has incomplete schema — drop and recreate
|
|
750
|
+
logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
|
|
751
|
+
cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
|
|
752
|
+
|
|
426
753
|
# Graph nodes table
|
|
427
754
|
cursor.execute('''
|
|
428
755
|
CREATE TABLE IF NOT EXISTS graph_nodes (
|
|
@@ -458,13 +785,24 @@ class GraphEngine:
|
|
|
458
785
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
459
786
|
name TEXT NOT NULL,
|
|
460
787
|
description TEXT,
|
|
788
|
+
summary TEXT,
|
|
461
789
|
member_count INTEGER DEFAULT 0,
|
|
462
790
|
avg_importance REAL,
|
|
791
|
+
parent_cluster_id INTEGER,
|
|
792
|
+
depth INTEGER DEFAULT 0,
|
|
463
793
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
464
|
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
794
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
795
|
+
FOREIGN KEY (parent_cluster_id) REFERENCES graph_clusters(id) ON DELETE SET NULL
|
|
465
796
|
)
|
|
466
797
|
''')
|
|
467
798
|
|
|
799
|
+
# Safe column additions for existing databases
|
|
800
|
+
for col, col_type in [('summary', 'TEXT'), ('parent_cluster_id', 'INTEGER'), ('depth', 'INTEGER DEFAULT 0')]:
|
|
801
|
+
try:
|
|
802
|
+
cursor.execute(f'ALTER TABLE graph_clusters ADD COLUMN {col} {col_type}')
|
|
803
|
+
except sqlite3.OperationalError:
|
|
804
|
+
pass
|
|
805
|
+
|
|
468
806
|
# Add cluster_id to memories if not exists
|
|
469
807
|
try:
|
|
470
808
|
cursor.execute('ALTER TABLE memories ADD COLUMN cluster_id INTEGER')
|
|
@@ -516,11 +854,14 @@ class GraphEngine:
|
|
|
516
854
|
'fix': "Run 'superlocalmemoryv2:status' first to initialize the database, or add some memories."
|
|
517
855
|
}
|
|
518
856
|
|
|
519
|
-
# Load
|
|
857
|
+
# Load memories for active profile only
|
|
858
|
+
active_profile = self._get_active_profile()
|
|
859
|
+
logger.info(f"Building graph for profile: {active_profile}")
|
|
520
860
|
memories = cursor.execute('''
|
|
521
861
|
SELECT id, content, summary FROM memories
|
|
862
|
+
WHERE profile = ?
|
|
522
863
|
ORDER BY id
|
|
523
|
-
''').fetchall()
|
|
864
|
+
''', (active_profile,)).fetchall()
|
|
524
865
|
|
|
525
866
|
if len(memories) == 0:
|
|
526
867
|
logger.warning("No memories found")
|
|
@@ -553,11 +894,29 @@ class GraphEngine:
|
|
|
553
894
|
'fix': "Use incremental updates or reduce memory count with compression."
|
|
554
895
|
}
|
|
555
896
|
|
|
556
|
-
# Clear existing graph data
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
897
|
+
# Clear existing graph data for this profile's memories
|
|
898
|
+
profile_memory_ids = [m[0] for m in memories]
|
|
899
|
+
if profile_memory_ids:
|
|
900
|
+
placeholders = ','.join('?' * len(profile_memory_ids))
|
|
901
|
+
cursor.execute(f'''
|
|
902
|
+
DELETE FROM graph_edges
|
|
903
|
+
WHERE source_memory_id IN ({placeholders})
|
|
904
|
+
OR target_memory_id IN ({placeholders})
|
|
905
|
+
''', profile_memory_ids + profile_memory_ids)
|
|
906
|
+
cursor.execute(f'''
|
|
907
|
+
DELETE FROM graph_nodes
|
|
908
|
+
WHERE memory_id IN ({placeholders})
|
|
909
|
+
''', profile_memory_ids)
|
|
910
|
+
# Remove orphaned clusters (no remaining members)
|
|
911
|
+
cursor.execute('''
|
|
912
|
+
DELETE FROM graph_clusters
|
|
913
|
+
WHERE id NOT IN (
|
|
914
|
+
SELECT DISTINCT cluster_id FROM memories
|
|
915
|
+
WHERE cluster_id IS NOT NULL
|
|
916
|
+
)
|
|
917
|
+
''')
|
|
918
|
+
cursor.execute('UPDATE memories SET cluster_id = NULL WHERE profile = ?',
|
|
919
|
+
(active_profile,))
|
|
561
920
|
conn.commit()
|
|
562
921
|
|
|
563
922
|
logger.info(f"Processing {len(memories)} memories")
|
|
@@ -587,9 +946,16 @@ class GraphEngine:
|
|
|
587
946
|
memory_ids, vectors, entities_list
|
|
588
947
|
)
|
|
589
948
|
|
|
590
|
-
# Detect communities
|
|
949
|
+
# Detect communities (flat Leiden)
|
|
591
950
|
clusters_count = self.cluster_builder.detect_communities()
|
|
592
951
|
|
|
952
|
+
# Hierarchical sub-clustering on large communities
|
|
953
|
+
hierarchical_stats = self.cluster_builder.hierarchical_cluster()
|
|
954
|
+
subclusters = hierarchical_stats.get('subclusters_created', 0)
|
|
955
|
+
|
|
956
|
+
# Generate TF-IDF structured summaries for all clusters
|
|
957
|
+
summaries = self.cluster_builder.generate_cluster_summaries()
|
|
958
|
+
|
|
593
959
|
elapsed = time.time() - start_time
|
|
594
960
|
|
|
595
961
|
stats = {
|
|
@@ -598,6 +964,9 @@ class GraphEngine:
|
|
|
598
964
|
'nodes': len(memory_ids),
|
|
599
965
|
'edges': edges_count,
|
|
600
966
|
'clusters': clusters_count,
|
|
967
|
+
'subclusters': subclusters,
|
|
968
|
+
'max_depth': hierarchical_stats.get('depth_reached', 0),
|
|
969
|
+
'summaries_generated': summaries,
|
|
601
970
|
'time_seconds': round(elapsed, 2)
|
|
602
971
|
}
|
|
603
972
|
|
|
@@ -646,7 +1015,7 @@ class GraphEngine:
|
|
|
646
1015
|
|
|
647
1016
|
def get_related(self, memory_id: int, max_hops: int = 2) -> List[Dict]:
|
|
648
1017
|
"""
|
|
649
|
-
Get memories connected to this memory via graph edges.
|
|
1018
|
+
Get memories connected to this memory via graph edges (active profile only).
|
|
650
1019
|
|
|
651
1020
|
Args:
|
|
652
1021
|
memory_id: Source memory ID
|
|
@@ -657,18 +1026,21 @@ class GraphEngine:
|
|
|
657
1026
|
"""
|
|
658
1027
|
conn = sqlite3.connect(self.db_path)
|
|
659
1028
|
cursor = conn.cursor()
|
|
1029
|
+
active_profile = self._get_active_profile()
|
|
660
1030
|
|
|
661
1031
|
try:
|
|
662
|
-
# Get 1-hop neighbors
|
|
1032
|
+
# Get 1-hop neighbors (filtered to active profile)
|
|
663
1033
|
edges = cursor.execute('''
|
|
664
|
-
SELECT target_memory_id, relationship_type, weight, shared_entities
|
|
665
|
-
FROM graph_edges
|
|
666
|
-
|
|
1034
|
+
SELECT ge.target_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
|
|
1035
|
+
FROM graph_edges ge
|
|
1036
|
+
JOIN memories m ON ge.target_memory_id = m.id
|
|
1037
|
+
WHERE ge.source_memory_id = ? AND m.profile = ?
|
|
667
1038
|
UNION
|
|
668
|
-
SELECT source_memory_id, relationship_type, weight, shared_entities
|
|
669
|
-
FROM graph_edges
|
|
670
|
-
|
|
671
|
-
|
|
1039
|
+
SELECT ge.source_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
|
|
1040
|
+
FROM graph_edges ge
|
|
1041
|
+
JOIN memories m ON ge.source_memory_id = m.id
|
|
1042
|
+
WHERE ge.target_memory_id = ? AND m.profile = ?
|
|
1043
|
+
''', (memory_id, active_profile, memory_id, active_profile)).fetchall()
|
|
672
1044
|
|
|
673
1045
|
results = []
|
|
674
1046
|
seen_ids = {memory_id}
|
|
@@ -743,7 +1115,7 @@ class GraphEngine:
|
|
|
743
1115
|
|
|
744
1116
|
def get_cluster_members(self, cluster_id: int) -> List[Dict]:
|
|
745
1117
|
"""
|
|
746
|
-
Get all memories in a cluster.
|
|
1118
|
+
Get all memories in a cluster (filtered by active profile).
|
|
747
1119
|
|
|
748
1120
|
Args:
|
|
749
1121
|
cluster_id: Cluster ID
|
|
@@ -753,14 +1125,15 @@ class GraphEngine:
|
|
|
753
1125
|
"""
|
|
754
1126
|
conn = sqlite3.connect(self.db_path)
|
|
755
1127
|
cursor = conn.cursor()
|
|
1128
|
+
active_profile = self._get_active_profile()
|
|
756
1129
|
|
|
757
1130
|
try:
|
|
758
1131
|
memories = cursor.execute('''
|
|
759
1132
|
SELECT id, summary, importance, tags, created_at
|
|
760
1133
|
FROM memories
|
|
761
|
-
WHERE cluster_id = ?
|
|
1134
|
+
WHERE cluster_id = ? AND profile = ?
|
|
762
1135
|
ORDER BY importance DESC
|
|
763
|
-
''', (cluster_id,)).fetchall()
|
|
1136
|
+
''', (cluster_id, active_profile)).fetchall()
|
|
764
1137
|
|
|
765
1138
|
return [
|
|
766
1139
|
{
|
|
@@ -814,12 +1187,14 @@ class GraphEngine:
|
|
|
814
1187
|
VALUES (?, ?, ?)
|
|
815
1188
|
''', (memory_id, json.dumps(new_entities), json.dumps(new_vector.tolist())))
|
|
816
1189
|
|
|
817
|
-
# Compare to existing memories
|
|
1190
|
+
# Compare to existing memories in the same profile
|
|
1191
|
+
active_profile = self._get_active_profile()
|
|
818
1192
|
existing = cursor.execute('''
|
|
819
|
-
SELECT memory_id, embedding_vector, entities
|
|
820
|
-
FROM graph_nodes
|
|
821
|
-
|
|
822
|
-
|
|
1193
|
+
SELECT gn.memory_id, gn.embedding_vector, gn.entities
|
|
1194
|
+
FROM graph_nodes gn
|
|
1195
|
+
JOIN memories m ON gn.memory_id = m.id
|
|
1196
|
+
WHERE gn.memory_id != ? AND m.profile = ?
|
|
1197
|
+
''', (memory_id, active_profile)).fetchall()
|
|
823
1198
|
|
|
824
1199
|
edges_added = 0
|
|
825
1200
|
|
|
@@ -871,32 +1246,60 @@ class GraphEngine:
|
|
|
871
1246
|
conn.close()
|
|
872
1247
|
|
|
873
1248
|
def get_stats(self) -> Dict[str, any]:
|
|
874
|
-
"""Get graph statistics."""
|
|
1249
|
+
"""Get graph statistics for the active profile."""
|
|
875
1250
|
conn = sqlite3.connect(self.db_path)
|
|
876
1251
|
cursor = conn.cursor()
|
|
1252
|
+
active_profile = self._get_active_profile()
|
|
877
1253
|
|
|
878
1254
|
try:
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
1255
|
+
# Count nodes for active profile's memories
|
|
1256
|
+
nodes = cursor.execute('''
|
|
1257
|
+
SELECT COUNT(*) FROM graph_nodes
|
|
1258
|
+
WHERE memory_id IN (SELECT id FROM memories WHERE profile = ?)
|
|
1259
|
+
''', (active_profile,)).fetchone()[0]
|
|
882
1260
|
|
|
883
|
-
#
|
|
1261
|
+
# Count edges where at least one end is in active profile
|
|
1262
|
+
edges = cursor.execute('''
|
|
1263
|
+
SELECT COUNT(*) FROM graph_edges
|
|
1264
|
+
WHERE source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
|
|
1265
|
+
''', (active_profile,)).fetchone()[0]
|
|
1266
|
+
|
|
1267
|
+
# Clusters that have members in active profile
|
|
1268
|
+
clusters = cursor.execute('''
|
|
1269
|
+
SELECT COUNT(DISTINCT cluster_id) FROM memories
|
|
1270
|
+
WHERE cluster_id IS NOT NULL AND profile = ?
|
|
1271
|
+
''', (active_profile,)).fetchone()[0]
|
|
1272
|
+
|
|
1273
|
+
# Cluster breakdown for active profile (including hierarchy)
|
|
884
1274
|
cluster_info = cursor.execute('''
|
|
885
|
-
SELECT name, member_count, avg_importance
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
1275
|
+
SELECT gc.name, gc.member_count, gc.avg_importance,
|
|
1276
|
+
gc.summary, gc.parent_cluster_id, gc.depth
|
|
1277
|
+
FROM graph_clusters gc
|
|
1278
|
+
WHERE gc.id IN (
|
|
1279
|
+
SELECT DISTINCT cluster_id FROM memories
|
|
1280
|
+
WHERE cluster_id IS NOT NULL AND profile = ?
|
|
1281
|
+
)
|
|
1282
|
+
ORDER BY gc.depth ASC, gc.member_count DESC
|
|
1283
|
+
LIMIT 20
|
|
1284
|
+
''', (active_profile,)).fetchall()
|
|
1285
|
+
|
|
1286
|
+
# Count hierarchical depth
|
|
1287
|
+
max_depth = max((c[5] or 0 for c in cluster_info), default=0) if cluster_info else 0
|
|
890
1288
|
|
|
891
1289
|
return {
|
|
1290
|
+
'profile': active_profile,
|
|
892
1291
|
'nodes': nodes,
|
|
893
1292
|
'edges': edges,
|
|
894
1293
|
'clusters': clusters,
|
|
1294
|
+
'max_depth': max_depth,
|
|
895
1295
|
'top_clusters': [
|
|
896
1296
|
{
|
|
897
1297
|
'name': c[0],
|
|
898
1298
|
'members': c[1],
|
|
899
|
-
'avg_importance': round(c[2], 1)
|
|
1299
|
+
'avg_importance': round(c[2], 1) if c[2] else 5.0,
|
|
1300
|
+
'summary': c[3],
|
|
1301
|
+
'parent_cluster_id': c[4],
|
|
1302
|
+
'depth': c[5] or 0
|
|
900
1303
|
}
|
|
901
1304
|
for c in cluster_info
|
|
902
1305
|
]
|
|
@@ -911,7 +1314,7 @@ def main():
|
|
|
911
1314
|
import argparse
|
|
912
1315
|
|
|
913
1316
|
parser = argparse.ArgumentParser(description='GraphEngine - Knowledge Graph Management')
|
|
914
|
-
parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster'],
|
|
1317
|
+
parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster', 'hierarchical', 'summaries'],
|
|
915
1318
|
help='Command to execute')
|
|
916
1319
|
parser.add_argument('--memory-id', type=int, help='Memory ID for related/add commands')
|
|
917
1320
|
parser.add_argument('--cluster-id', type=int, help='Cluster ID for cluster command')
|
|
@@ -965,6 +1368,18 @@ def main():
|
|
|
965
1368
|
summary = mem['summary'] or '[No summary]'
|
|
966
1369
|
print(f" {summary[:100]}...")
|
|
967
1370
|
|
|
1371
|
+
elif args.command == 'hierarchical':
|
|
1372
|
+
print("Running hierarchical sub-clustering...")
|
|
1373
|
+
cluster_builder = ClusterBuilder(engine.db_path)
|
|
1374
|
+
stats = cluster_builder.hierarchical_cluster()
|
|
1375
|
+
print(json.dumps(stats, indent=2))
|
|
1376
|
+
|
|
1377
|
+
elif args.command == 'summaries':
|
|
1378
|
+
print("Generating cluster summaries...")
|
|
1379
|
+
cluster_builder = ClusterBuilder(engine.db_path)
|
|
1380
|
+
count = cluster_builder.generate_cluster_summaries()
|
|
1381
|
+
print(f"Generated summaries for {count} clusters")
|
|
1382
|
+
|
|
968
1383
|
|
|
969
1384
|
if __name__ == '__main__':
|
|
970
1385
|
main()
|