superlocalmemory 2.3.7 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -239,9 +239,21 @@ class ClusterBuilder:
239
239
  """Initialize cluster builder."""
240
240
  self.db_path = db_path
241
241
 
242
+ def _get_active_profile(self) -> str:
243
+ """Get the currently active profile name from config."""
244
+ config_file = MEMORY_DIR / "profiles.json"
245
+ if config_file.exists():
246
+ try:
247
+ with open(config_file, 'r') as f:
248
+ config = json.load(f)
249
+ return config.get('active_profile', 'default')
250
+ except (json.JSONDecodeError, IOError):
251
+ pass
252
+ return 'default'
253
+
242
254
  def detect_communities(self) -> int:
243
255
  """
244
- Run Leiden algorithm to find memory clusters.
256
+ Run Leiden algorithm to find memory clusters (active profile only).
245
257
 
246
258
  Returns:
247
259
  Number of clusters created
@@ -255,13 +267,16 @@ class ClusterBuilder:
255
267
 
256
268
  conn = sqlite3.connect(self.db_path)
257
269
  cursor = conn.cursor()
270
+ active_profile = self._get_active_profile()
258
271
 
259
272
  try:
260
- # Load all edges
273
+ # Load edges for active profile's memories only
261
274
  edges = cursor.execute('''
262
- SELECT source_memory_id, target_memory_id, weight
263
- FROM graph_edges
264
- ''').fetchall()
275
+ SELECT ge.source_memory_id, ge.target_memory_id, ge.weight
276
+ FROM graph_edges ge
277
+ WHERE ge.source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
278
+ AND ge.target_memory_id IN (SELECT id FROM memories WHERE profile = ?)
279
+ ''', (active_profile, active_profile)).fetchall()
265
280
 
266
281
  if not edges:
267
282
  logger.warning("No edges found - cannot build clusters")
@@ -389,6 +404,293 @@ class ClusterBuilder:
389
404
  return name[:100] # Limit length
390
405
 
391
406
 
407
+ def hierarchical_cluster(self, min_subcluster_size: int = 5, max_depth: int = 3) -> Dict[str, any]:
408
+ """
409
+ Run recursive Leiden clustering — cluster the clusters.
410
+
411
+ Large communities (>= min_subcluster_size * 2) are recursively sub-clustered
412
+ to reveal finer-grained thematic structure. E.g., "Python" → "FastAPI" → "Auth".
413
+
414
+ Args:
415
+ min_subcluster_size: Minimum members to attempt sub-clustering (default 5)
416
+ max_depth: Maximum recursion depth (default 3)
417
+
418
+ Returns:
419
+ Dictionary with hierarchical clustering statistics
420
+ """
421
+ try:
422
+ import igraph as ig
423
+ import leidenalg
424
+ except ImportError:
425
+ raise ImportError("python-igraph and leidenalg required. Install: pip install python-igraph leidenalg")
426
+
427
+ conn = sqlite3.connect(self.db_path)
428
+ cursor = conn.cursor()
429
+ active_profile = self._get_active_profile()
430
+
431
+ try:
432
+ # Get top-level clusters for this profile that are large enough to sub-cluster
433
+ cursor.execute('''
434
+ SELECT cluster_id, COUNT(*) as cnt
435
+ FROM memories
436
+ WHERE cluster_id IS NOT NULL AND profile = ?
437
+ GROUP BY cluster_id
438
+ HAVING cnt >= ?
439
+ ''', (active_profile, min_subcluster_size * 2))
440
+ large_clusters = cursor.fetchall()
441
+
442
+ if not large_clusters:
443
+ logger.info("No clusters large enough for hierarchical decomposition")
444
+ return {'subclusters_created': 0, 'depth_reached': 0}
445
+
446
+ total_subclusters = 0
447
+ max_depth_reached = 0
448
+
449
+ for parent_cid, member_count in large_clusters:
450
+ subs, depth = self._recursive_subcluster(
451
+ conn, cursor, parent_cid, active_profile,
452
+ min_subcluster_size, max_depth, current_depth=1
453
+ )
454
+ total_subclusters += subs
455
+ max_depth_reached = max(max_depth_reached, depth)
456
+
457
+ conn.commit()
458
+ logger.info(f"Hierarchical clustering: {total_subclusters} sub-clusters, depth {max_depth_reached}")
459
+ return {
460
+ 'subclusters_created': total_subclusters,
461
+ 'depth_reached': max_depth_reached,
462
+ 'parent_clusters_processed': len(large_clusters)
463
+ }
464
+
465
+ except Exception as e:
466
+ logger.error(f"Hierarchical clustering failed: {e}")
467
+ conn.rollback()
468
+ return {'subclusters_created': 0, 'error': str(e)}
469
+ finally:
470
+ conn.close()
471
+
472
+ def _recursive_subcluster(self, conn, cursor, parent_cluster_id: int,
473
+ profile: str, min_size: int, max_depth: int,
474
+ current_depth: int) -> Tuple[int, int]:
475
+ """Recursively sub-cluster a community using Leiden."""
476
+ import igraph as ig
477
+ import leidenalg
478
+
479
+ if current_depth > max_depth:
480
+ return 0, current_depth - 1
481
+
482
+ # Get memory IDs in this cluster
483
+ cursor.execute('''
484
+ SELECT id FROM memories
485
+ WHERE cluster_id = ? AND profile = ?
486
+ ''', (parent_cluster_id, profile))
487
+ member_ids = [row[0] for row in cursor.fetchall()]
488
+
489
+ if len(member_ids) < min_size * 2:
490
+ return 0, current_depth - 1
491
+
492
+ # Get edges between members of this cluster
493
+ placeholders = ','.join('?' * len(member_ids))
494
+ edges = cursor.execute(f'''
495
+ SELECT source_memory_id, target_memory_id, weight
496
+ FROM graph_edges
497
+ WHERE source_memory_id IN ({placeholders})
498
+ AND target_memory_id IN ({placeholders})
499
+ ''', member_ids + member_ids).fetchall()
500
+
501
+ if len(edges) < 2:
502
+ return 0, current_depth - 1
503
+
504
+ # Build sub-graph
505
+ id_to_vertex = {mid: idx for idx, mid in enumerate(member_ids)}
506
+ vertex_to_id = {idx: mid for mid, idx in id_to_vertex.items()}
507
+
508
+ g = ig.Graph()
509
+ g.add_vertices(len(member_ids))
510
+ edge_list, edge_weights = [], []
511
+ for src, tgt, w in edges:
512
+ if src in id_to_vertex and tgt in id_to_vertex:
513
+ edge_list.append((id_to_vertex[src], id_to_vertex[tgt]))
514
+ edge_weights.append(w)
515
+
516
+ if not edge_list:
517
+ return 0, current_depth - 1
518
+
519
+ g.add_edges(edge_list)
520
+
521
+ # Run Leiden with higher resolution for finer communities
522
+ partition = leidenalg.find_partition(
523
+ g, leidenalg.ModularityVertexPartition,
524
+ weights=edge_weights, n_iterations=100, seed=42
525
+ )
526
+
527
+ # Only proceed if Leiden found > 1 community (actual split)
528
+ non_singleton = [c for c in partition if len(c) >= 2]
529
+ if len(non_singleton) <= 1:
530
+ return 0, current_depth - 1
531
+
532
+ subclusters_created = 0
533
+ deepest = current_depth
534
+
535
+ # Get parent depth
536
+ cursor.execute('SELECT depth FROM graph_clusters WHERE id = ?', (parent_cluster_id,))
537
+ parent_row = cursor.fetchone()
538
+ parent_depth = parent_row[0] if parent_row else 0
539
+
540
+ for community in non_singleton:
541
+ sub_member_ids = [vertex_to_id[v] for v in community]
542
+
543
+ if len(sub_member_ids) < 2:
544
+ continue
545
+
546
+ avg_imp = self._get_avg_importance(cursor, sub_member_ids)
547
+ cluster_name = self._generate_cluster_name(cursor, sub_member_ids)
548
+
549
+ result = cursor.execute('''
550
+ INSERT INTO graph_clusters (name, member_count, avg_importance, parent_cluster_id, depth)
551
+ VALUES (?, ?, ?, ?, ?)
552
+ ''', (cluster_name, len(sub_member_ids), avg_imp, parent_cluster_id, parent_depth + 1))
553
+
554
+ sub_cluster_id = result.lastrowid
555
+
556
+ # Update memories to point to sub-cluster
557
+ cursor.executemany('''
558
+ UPDATE memories SET cluster_id = ? WHERE id = ?
559
+ ''', [(sub_cluster_id, mid) for mid in sub_member_ids])
560
+
561
+ subclusters_created += 1
562
+ logger.info(f"Sub-cluster {sub_cluster_id} under {parent_cluster_id}: "
563
+ f"'{cluster_name}' ({len(sub_member_ids)} members, depth {parent_depth + 1})")
564
+
565
+ # Recurse into this sub-cluster if large enough
566
+ child_subs, child_depth = self._recursive_subcluster(
567
+ conn, cursor, sub_cluster_id, profile,
568
+ min_size, max_depth, current_depth + 1
569
+ )
570
+ subclusters_created += child_subs
571
+ deepest = max(deepest, child_depth)
572
+
573
+ return subclusters_created, deepest
574
+
575
+ def generate_cluster_summaries(self) -> int:
576
+ """
577
+ Generate TF-IDF structured summaries for all clusters.
578
+
579
+ For each cluster, analyzes member content to produce a human-readable
580
+ summary describing the cluster's theme, key topics, and scope.
581
+
582
+ Returns:
583
+ Number of clusters with summaries generated
584
+ """
585
+ conn = sqlite3.connect(self.db_path)
586
+ cursor = conn.cursor()
587
+ active_profile = self._get_active_profile()
588
+
589
+ try:
590
+ # Get all clusters for this profile
591
+ cursor.execute('''
592
+ SELECT DISTINCT gc.id, gc.name, gc.member_count
593
+ FROM graph_clusters gc
594
+ JOIN memories m ON m.cluster_id = gc.id
595
+ WHERE m.profile = ?
596
+ ''', (active_profile,))
597
+ clusters = cursor.fetchall()
598
+
599
+ if not clusters:
600
+ return 0
601
+
602
+ summaries_generated = 0
603
+
604
+ for cluster_id, cluster_name, member_count in clusters:
605
+ summary = self._build_cluster_summary(cursor, cluster_id, active_profile)
606
+ if summary:
607
+ cursor.execute('''
608
+ UPDATE graph_clusters SET summary = ?, updated_at = CURRENT_TIMESTAMP
609
+ WHERE id = ?
610
+ ''', (summary, cluster_id))
611
+ summaries_generated += 1
612
+ logger.info(f"Summary for cluster {cluster_id} ({cluster_name}): {summary[:80]}...")
613
+
614
+ conn.commit()
615
+ logger.info(f"Generated {summaries_generated} cluster summaries")
616
+ return summaries_generated
617
+
618
+ except Exception as e:
619
+ logger.error(f"Summary generation failed: {e}")
620
+ conn.rollback()
621
+ return 0
622
+ finally:
623
+ conn.close()
624
+
625
+ def _build_cluster_summary(self, cursor, cluster_id: int, profile: str) -> str:
626
+ """Build a TF-IDF structured summary for a single cluster."""
627
+ # Get member content
628
+ cursor.execute('''
629
+ SELECT m.content, m.summary, m.tags, m.category, m.project_name
630
+ FROM memories m
631
+ WHERE m.cluster_id = ? AND m.profile = ?
632
+ ''', (cluster_id, profile))
633
+ members = cursor.fetchall()
634
+
635
+ if not members:
636
+ return ""
637
+
638
+ # Collect entities from graph nodes
639
+ cursor.execute('''
640
+ SELECT gn.entities
641
+ FROM graph_nodes gn
642
+ JOIN memories m ON gn.memory_id = m.id
643
+ WHERE m.cluster_id = ? AND m.profile = ?
644
+ ''', (cluster_id, profile))
645
+ all_entities = []
646
+ for row in cursor.fetchall():
647
+ if row[0]:
648
+ try:
649
+ all_entities.extend(json.loads(row[0]))
650
+ except (json.JSONDecodeError, TypeError):
651
+ pass
652
+
653
+ # Top entities by frequency (TF-IDF already extracted these)
654
+ entity_counts = Counter(all_entities)
655
+ top_entities = [e for e, _ in entity_counts.most_common(5)]
656
+
657
+ # Collect unique projects and categories
658
+ projects = set()
659
+ categories = set()
660
+ for m in members:
661
+ if m[3]: # category
662
+ categories.add(m[3])
663
+ if m[4]: # project_name
664
+ projects.add(m[4])
665
+
666
+ # Build structured summary
667
+ parts = []
668
+
669
+ # Theme from top entities
670
+ if top_entities:
671
+ parts.append(f"Key topics: {', '.join(top_entities[:5])}")
672
+
673
+ # Scope
674
+ if projects:
675
+ parts.append(f"Projects: {', '.join(sorted(projects)[:3])}")
676
+ if categories:
677
+ parts.append(f"Categories: {', '.join(sorted(categories)[:3])}")
678
+
679
+ # Size context
680
+ parts.append(f"{len(members)} memories")
681
+
682
+ # Check for hierarchical context
683
+ cursor.execute('SELECT parent_cluster_id FROM graph_clusters WHERE id = ?', (cluster_id,))
684
+ parent_row = cursor.fetchone()
685
+ if parent_row and parent_row[0]:
686
+ cursor.execute('SELECT name FROM graph_clusters WHERE id = ?', (parent_row[0],))
687
+ parent_name_row = cursor.fetchone()
688
+ if parent_name_row:
689
+ parts.append(f"Sub-cluster of: {parent_name_row[0]}")
690
+
691
+ return " | ".join(parts)
692
+
693
+
392
694
  class ClusterNamer:
393
695
  """Enhanced cluster naming with optional LLM support (future)."""
394
696
 
@@ -418,11 +720,36 @@ class GraphEngine:
418
720
  self.cluster_builder = ClusterBuilder(db_path)
419
721
  self._ensure_graph_tables()
420
722
 
723
+ def _get_active_profile(self) -> str:
724
+ """Get the currently active profile name from config."""
725
+ config_file = MEMORY_DIR / "profiles.json"
726
+ if config_file.exists():
727
+ try:
728
+ with open(config_file, 'r') as f:
729
+ config = json.load(f)
730
+ return config.get('active_profile', 'default')
731
+ except (json.JSONDecodeError, IOError):
732
+ pass
733
+ return 'default'
734
+
421
735
  def _ensure_graph_tables(self):
422
- """Create graph tables if they don't exist."""
736
+ """Create graph tables if they don't exist, or recreate if schema is incomplete."""
423
737
  conn = sqlite3.connect(self.db_path)
424
738
  cursor = conn.cursor()
425
739
 
740
+ # Check if existing tables have correct schema (not just id column)
741
+ for table_name, required_cols in [
742
+ ('graph_nodes', {'memory_id', 'entities'}),
743
+ ('graph_edges', {'source_memory_id', 'target_memory_id', 'weight'}),
744
+ ('graph_clusters', {'name', 'member_count'}),
745
+ ]:
746
+ cursor.execute(f"PRAGMA table_info({table_name})")
747
+ existing_cols = {row[1] for row in cursor.fetchall()}
748
+ if existing_cols and not required_cols.issubset(existing_cols):
749
+ # Table exists but has incomplete schema — drop and recreate
750
+ logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
751
+ cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
752
+
426
753
  # Graph nodes table
427
754
  cursor.execute('''
428
755
  CREATE TABLE IF NOT EXISTS graph_nodes (
@@ -458,13 +785,24 @@ class GraphEngine:
458
785
  id INTEGER PRIMARY KEY AUTOINCREMENT,
459
786
  name TEXT NOT NULL,
460
787
  description TEXT,
788
+ summary TEXT,
461
789
  member_count INTEGER DEFAULT 0,
462
790
  avg_importance REAL,
791
+ parent_cluster_id INTEGER,
792
+ depth INTEGER DEFAULT 0,
463
793
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
464
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
794
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
795
+ FOREIGN KEY (parent_cluster_id) REFERENCES graph_clusters(id) ON DELETE SET NULL
465
796
  )
466
797
  ''')
467
798
 
799
+ # Safe column additions for existing databases
800
+ for col, col_type in [('summary', 'TEXT'), ('parent_cluster_id', 'INTEGER'), ('depth', 'INTEGER DEFAULT 0')]:
801
+ try:
802
+ cursor.execute(f'ALTER TABLE graph_clusters ADD COLUMN {col} {col_type}')
803
+ except sqlite3.OperationalError:
804
+ pass
805
+
468
806
  # Add cluster_id to memories if not exists
469
807
  try:
470
808
  cursor.execute('ALTER TABLE memories ADD COLUMN cluster_id INTEGER')
@@ -516,11 +854,14 @@ class GraphEngine:
516
854
  'fix': "Run 'superlocalmemoryv2:status' first to initialize the database, or add some memories."
517
855
  }
518
856
 
519
- # Load all memories
857
+ # Load memories for active profile only
858
+ active_profile = self._get_active_profile()
859
+ logger.info(f"Building graph for profile: {active_profile}")
520
860
  memories = cursor.execute('''
521
861
  SELECT id, content, summary FROM memories
862
+ WHERE profile = ?
522
863
  ORDER BY id
523
- ''').fetchall()
864
+ ''', (active_profile,)).fetchall()
524
865
 
525
866
  if len(memories) == 0:
526
867
  logger.warning("No memories found")
@@ -553,11 +894,29 @@ class GraphEngine:
553
894
  'fix': "Use incremental updates or reduce memory count with compression."
554
895
  }
555
896
 
556
- # Clear existing graph data
557
- cursor.execute('DELETE FROM graph_edges')
558
- cursor.execute('DELETE FROM graph_nodes')
559
- cursor.execute('DELETE FROM graph_clusters')
560
- cursor.execute('UPDATE memories SET cluster_id = NULL')
897
+ # Clear existing graph data for this profile's memories
898
+ profile_memory_ids = [m[0] for m in memories]
899
+ if profile_memory_ids:
900
+ placeholders = ','.join('?' * len(profile_memory_ids))
901
+ cursor.execute(f'''
902
+ DELETE FROM graph_edges
903
+ WHERE source_memory_id IN ({placeholders})
904
+ OR target_memory_id IN ({placeholders})
905
+ ''', profile_memory_ids + profile_memory_ids)
906
+ cursor.execute(f'''
907
+ DELETE FROM graph_nodes
908
+ WHERE memory_id IN ({placeholders})
909
+ ''', profile_memory_ids)
910
+ # Remove orphaned clusters (no remaining members)
911
+ cursor.execute('''
912
+ DELETE FROM graph_clusters
913
+ WHERE id NOT IN (
914
+ SELECT DISTINCT cluster_id FROM memories
915
+ WHERE cluster_id IS NOT NULL
916
+ )
917
+ ''')
918
+ cursor.execute('UPDATE memories SET cluster_id = NULL WHERE profile = ?',
919
+ (active_profile,))
561
920
  conn.commit()
562
921
 
563
922
  logger.info(f"Processing {len(memories)} memories")
@@ -587,9 +946,16 @@ class GraphEngine:
587
946
  memory_ids, vectors, entities_list
588
947
  )
589
948
 
590
- # Detect communities
949
+ # Detect communities (flat Leiden)
591
950
  clusters_count = self.cluster_builder.detect_communities()
592
951
 
952
+ # Hierarchical sub-clustering on large communities
953
+ hierarchical_stats = self.cluster_builder.hierarchical_cluster()
954
+ subclusters = hierarchical_stats.get('subclusters_created', 0)
955
+
956
+ # Generate TF-IDF structured summaries for all clusters
957
+ summaries = self.cluster_builder.generate_cluster_summaries()
958
+
593
959
  elapsed = time.time() - start_time
594
960
 
595
961
  stats = {
@@ -598,6 +964,9 @@ class GraphEngine:
598
964
  'nodes': len(memory_ids),
599
965
  'edges': edges_count,
600
966
  'clusters': clusters_count,
967
+ 'subclusters': subclusters,
968
+ 'max_depth': hierarchical_stats.get('depth_reached', 0),
969
+ 'summaries_generated': summaries,
601
970
  'time_seconds': round(elapsed, 2)
602
971
  }
603
972
 
@@ -646,7 +1015,7 @@ class GraphEngine:
646
1015
 
647
1016
  def get_related(self, memory_id: int, max_hops: int = 2) -> List[Dict]:
648
1017
  """
649
- Get memories connected to this memory via graph edges.
1018
+ Get memories connected to this memory via graph edges (active profile only).
650
1019
 
651
1020
  Args:
652
1021
  memory_id: Source memory ID
@@ -657,18 +1026,21 @@ class GraphEngine:
657
1026
  """
658
1027
  conn = sqlite3.connect(self.db_path)
659
1028
  cursor = conn.cursor()
1029
+ active_profile = self._get_active_profile()
660
1030
 
661
1031
  try:
662
- # Get 1-hop neighbors
1032
+ # Get 1-hop neighbors (filtered to active profile)
663
1033
  edges = cursor.execute('''
664
- SELECT target_memory_id, relationship_type, weight, shared_entities
665
- FROM graph_edges
666
- WHERE source_memory_id = ?
1034
+ SELECT ge.target_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
1035
+ FROM graph_edges ge
1036
+ JOIN memories m ON ge.target_memory_id = m.id
1037
+ WHERE ge.source_memory_id = ? AND m.profile = ?
667
1038
  UNION
668
- SELECT source_memory_id, relationship_type, weight, shared_entities
669
- FROM graph_edges
670
- WHERE target_memory_id = ?
671
- ''', (memory_id, memory_id)).fetchall()
1039
+ SELECT ge.source_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
1040
+ FROM graph_edges ge
1041
+ JOIN memories m ON ge.source_memory_id = m.id
1042
+ WHERE ge.target_memory_id = ? AND m.profile = ?
1043
+ ''', (memory_id, active_profile, memory_id, active_profile)).fetchall()
672
1044
 
673
1045
  results = []
674
1046
  seen_ids = {memory_id}
@@ -743,7 +1115,7 @@ class GraphEngine:
743
1115
 
744
1116
  def get_cluster_members(self, cluster_id: int) -> List[Dict]:
745
1117
  """
746
- Get all memories in a cluster.
1118
+ Get all memories in a cluster (filtered by active profile).
747
1119
 
748
1120
  Args:
749
1121
  cluster_id: Cluster ID
@@ -753,14 +1125,15 @@ class GraphEngine:
753
1125
  """
754
1126
  conn = sqlite3.connect(self.db_path)
755
1127
  cursor = conn.cursor()
1128
+ active_profile = self._get_active_profile()
756
1129
 
757
1130
  try:
758
1131
  memories = cursor.execute('''
759
1132
  SELECT id, summary, importance, tags, created_at
760
1133
  FROM memories
761
- WHERE cluster_id = ?
1134
+ WHERE cluster_id = ? AND profile = ?
762
1135
  ORDER BY importance DESC
763
- ''', (cluster_id,)).fetchall()
1136
+ ''', (cluster_id, active_profile)).fetchall()
764
1137
 
765
1138
  return [
766
1139
  {
@@ -814,12 +1187,14 @@ class GraphEngine:
814
1187
  VALUES (?, ?, ?)
815
1188
  ''', (memory_id, json.dumps(new_entities), json.dumps(new_vector.tolist())))
816
1189
 
817
- # Compare to existing memories
1190
+ # Compare to existing memories in the same profile
1191
+ active_profile = self._get_active_profile()
818
1192
  existing = cursor.execute('''
819
- SELECT memory_id, embedding_vector, entities
820
- FROM graph_nodes
821
- WHERE memory_id != ?
822
- ''', (memory_id,)).fetchall()
1193
+ SELECT gn.memory_id, gn.embedding_vector, gn.entities
1194
+ FROM graph_nodes gn
1195
+ JOIN memories m ON gn.memory_id = m.id
1196
+ WHERE gn.memory_id != ? AND m.profile = ?
1197
+ ''', (memory_id, active_profile)).fetchall()
823
1198
 
824
1199
  edges_added = 0
825
1200
 
@@ -871,32 +1246,60 @@ class GraphEngine:
871
1246
  conn.close()
872
1247
 
873
1248
  def get_stats(self) -> Dict[str, any]:
874
- """Get graph statistics."""
1249
+ """Get graph statistics for the active profile."""
875
1250
  conn = sqlite3.connect(self.db_path)
876
1251
  cursor = conn.cursor()
1252
+ active_profile = self._get_active_profile()
877
1253
 
878
1254
  try:
879
- nodes = cursor.execute('SELECT COUNT(*) FROM graph_nodes').fetchone()[0]
880
- edges = cursor.execute('SELECT COUNT(*) FROM graph_edges').fetchone()[0]
881
- clusters = cursor.execute('SELECT COUNT(*) FROM graph_clusters').fetchone()[0]
1255
+ # Count nodes for active profile's memories
1256
+ nodes = cursor.execute('''
1257
+ SELECT COUNT(*) FROM graph_nodes
1258
+ WHERE memory_id IN (SELECT id FROM memories WHERE profile = ?)
1259
+ ''', (active_profile,)).fetchone()[0]
882
1260
 
883
- # Cluster breakdown
1261
+ # Count edges where at least one end is in active profile
1262
+ edges = cursor.execute('''
1263
+ SELECT COUNT(*) FROM graph_edges
1264
+ WHERE source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
1265
+ ''', (active_profile,)).fetchone()[0]
1266
+
1267
+ # Clusters that have members in active profile
1268
+ clusters = cursor.execute('''
1269
+ SELECT COUNT(DISTINCT cluster_id) FROM memories
1270
+ WHERE cluster_id IS NOT NULL AND profile = ?
1271
+ ''', (active_profile,)).fetchone()[0]
1272
+
1273
+ # Cluster breakdown for active profile (including hierarchy)
884
1274
  cluster_info = cursor.execute('''
885
- SELECT name, member_count, avg_importance
886
- FROM graph_clusters
887
- ORDER BY member_count DESC
888
- LIMIT 10
889
- ''').fetchall()
1275
+ SELECT gc.name, gc.member_count, gc.avg_importance,
1276
+ gc.summary, gc.parent_cluster_id, gc.depth
1277
+ FROM graph_clusters gc
1278
+ WHERE gc.id IN (
1279
+ SELECT DISTINCT cluster_id FROM memories
1280
+ WHERE cluster_id IS NOT NULL AND profile = ?
1281
+ )
1282
+ ORDER BY gc.depth ASC, gc.member_count DESC
1283
+ LIMIT 20
1284
+ ''', (active_profile,)).fetchall()
1285
+
1286
+ # Count hierarchical depth
1287
+ max_depth = max((c[5] or 0 for c in cluster_info), default=0) if cluster_info else 0
890
1288
 
891
1289
  return {
1290
+ 'profile': active_profile,
892
1291
  'nodes': nodes,
893
1292
  'edges': edges,
894
1293
  'clusters': clusters,
1294
+ 'max_depth': max_depth,
895
1295
  'top_clusters': [
896
1296
  {
897
1297
  'name': c[0],
898
1298
  'members': c[1],
899
- 'avg_importance': round(c[2], 1)
1299
+ 'avg_importance': round(c[2], 1) if c[2] else 5.0,
1300
+ 'summary': c[3],
1301
+ 'parent_cluster_id': c[4],
1302
+ 'depth': c[5] or 0
900
1303
  }
901
1304
  for c in cluster_info
902
1305
  ]
@@ -911,7 +1314,7 @@ def main():
911
1314
  import argparse
912
1315
 
913
1316
  parser = argparse.ArgumentParser(description='GraphEngine - Knowledge Graph Management')
914
- parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster'],
1317
+ parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster', 'hierarchical', 'summaries'],
915
1318
  help='Command to execute')
916
1319
  parser.add_argument('--memory-id', type=int, help='Memory ID for related/add commands')
917
1320
  parser.add_argument('--cluster-id', type=int, help='Cluster ID for cluster command')
@@ -965,6 +1368,18 @@ def main():
965
1368
  summary = mem['summary'] or '[No summary]'
966
1369
  print(f" {summary[:100]}...")
967
1370
 
1371
+ elif args.command == 'hierarchical':
1372
+ print("Running hierarchical sub-clustering...")
1373
+ cluster_builder = ClusterBuilder(engine.db_path)
1374
+ stats = cluster_builder.hierarchical_cluster()
1375
+ print(json.dumps(stats, indent=2))
1376
+
1377
+ elif args.command == 'summaries':
1378
+ print("Generating cluster summaries...")
1379
+ cluster_builder = ClusterBuilder(engine.db_path)
1380
+ count = cluster_builder.generate_cluster_summaries()
1381
+ print(f"Generated summaries for {count} clusters")
1382
+
968
1383
 
969
1384
  if __name__ == '__main__':
970
1385
  main()