superlocalmemory 2.7.6 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.sh +59 -0
  7. package/mcp_server.py +83 -7
  8. package/package.json +1 -8
  9. package/scripts/generate-thumbnails.py +3 -5
  10. package/skills/slm-build-graph/SKILL.md +1 -1
  11. package/skills/slm-list-recent/SKILL.md +1 -1
  12. package/skills/slm-recall/SKILL.md +1 -1
  13. package/skills/slm-remember/SKILL.md +1 -1
  14. package/skills/slm-show-patterns/SKILL.md +1 -1
  15. package/skills/slm-status/SKILL.md +1 -1
  16. package/skills/slm-switch-profile/SKILL.md +1 -1
  17. package/src/agent_registry.py +7 -18
  18. package/src/auth_middleware.py +3 -5
  19. package/src/auto_backup.py +3 -7
  20. package/src/behavioral/__init__.py +49 -0
  21. package/src/behavioral/behavioral_listener.py +203 -0
  22. package/src/behavioral/behavioral_patterns.py +275 -0
  23. package/src/behavioral/cross_project_transfer.py +206 -0
  24. package/src/behavioral/outcome_inference.py +194 -0
  25. package/src/behavioral/outcome_tracker.py +193 -0
  26. package/src/behavioral/tests/__init__.py +4 -0
  27. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  28. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  29. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  30. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  31. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  32. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  33. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  34. package/src/cache_manager.py +4 -6
  35. package/src/compliance/__init__.py +48 -0
  36. package/src/compliance/abac_engine.py +149 -0
  37. package/src/compliance/abac_middleware.py +116 -0
  38. package/src/compliance/audit_db.py +215 -0
  39. package/src/compliance/audit_logger.py +148 -0
  40. package/src/compliance/retention_manager.py +289 -0
  41. package/src/compliance/retention_scheduler.py +186 -0
  42. package/src/compliance/tests/__init__.py +4 -0
  43. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  44. package/src/compliance/tests/test_abac_engine.py +124 -0
  45. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  46. package/src/compliance/tests/test_audit_db.py +123 -0
  47. package/src/compliance/tests/test_audit_logger.py +98 -0
  48. package/src/compliance/tests/test_mcp_audit.py +128 -0
  49. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  50. package/src/compliance/tests/test_retention_manager.py +131 -0
  51. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  52. package/src/db_connection_manager.py +2 -12
  53. package/src/embedding_engine.py +61 -669
  54. package/src/embeddings/__init__.py +47 -0
  55. package/src/embeddings/cache.py +70 -0
  56. package/src/embeddings/cli.py +113 -0
  57. package/src/embeddings/constants.py +47 -0
  58. package/src/embeddings/database.py +91 -0
  59. package/src/embeddings/engine.py +247 -0
  60. package/src/embeddings/model_loader.py +145 -0
  61. package/src/event_bus.py +3 -13
  62. package/src/graph/__init__.py +36 -0
  63. package/src/graph/build_helpers.py +74 -0
  64. package/src/graph/cli.py +87 -0
  65. package/src/graph/cluster_builder.py +188 -0
  66. package/src/graph/cluster_summary.py +148 -0
  67. package/src/graph/constants.py +47 -0
  68. package/src/graph/edge_builder.py +162 -0
  69. package/src/graph/entity_extractor.py +95 -0
  70. package/src/graph/graph_core.py +226 -0
  71. package/src/graph/graph_search.py +231 -0
  72. package/src/graph/hierarchical.py +207 -0
  73. package/src/graph/schema.py +99 -0
  74. package/src/graph_engine.py +45 -1451
  75. package/src/hnsw_index.py +3 -7
  76. package/src/hybrid_search.py +36 -683
  77. package/src/learning/__init__.py +27 -12
  78. package/src/learning/adaptive_ranker.py +50 -12
  79. package/src/learning/cross_project_aggregator.py +2 -12
  80. package/src/learning/engagement_tracker.py +2 -12
  81. package/src/learning/feature_extractor.py +175 -43
  82. package/src/learning/feedback_collector.py +7 -12
  83. package/src/learning/learning_db.py +180 -12
  84. package/src/learning/project_context_manager.py +2 -12
  85. package/src/learning/source_quality_scorer.py +2 -12
  86. package/src/learning/synthetic_bootstrap.py +2 -12
  87. package/src/learning/tests/__init__.py +2 -0
  88. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  89. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  90. package/src/learning/tests/test_aggregator.py +2 -6
  91. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  92. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  93. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  94. package/src/learning/tests/test_feedback_collector.py +2 -6
  95. package/src/learning/tests/test_learning_db.py +2 -6
  96. package/src/learning/tests/test_learning_db_v28.py +110 -0
  97. package/src/learning/tests/test_learning_init_v28.py +48 -0
  98. package/src/learning/tests/test_outcome_signals.py +48 -0
  99. package/src/learning/tests/test_project_context.py +2 -6
  100. package/src/learning/tests/test_schema_migration.py +319 -0
  101. package/src/learning/tests/test_signal_inference.py +11 -13
  102. package/src/learning/tests/test_source_quality.py +2 -6
  103. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  104. package/src/learning/tests/test_workflow_miner.py +2 -6
  105. package/src/learning/workflow_pattern_miner.py +2 -12
  106. package/src/lifecycle/__init__.py +54 -0
  107. package/src/lifecycle/bounded_growth.py +239 -0
  108. package/src/lifecycle/compaction_engine.py +226 -0
  109. package/src/lifecycle/lifecycle_engine.py +302 -0
  110. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  111. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  112. package/src/lifecycle/retention_policy.py +285 -0
  113. package/src/lifecycle/tests/__init__.py +4 -0
  114. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  115. package/src/lifecycle/tests/test_compaction.py +179 -0
  116. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  117. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  118. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  119. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  120. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  121. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  122. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  123. package/src/mcp_tools_v28.py +280 -0
  124. package/src/memory-profiles.py +2 -12
  125. package/src/memory-reset.py +2 -12
  126. package/src/memory_compression.py +2 -12
  127. package/src/memory_store_v2.py +76 -20
  128. package/src/migrate_v1_to_v2.py +2 -12
  129. package/src/pattern_learner.py +29 -975
  130. package/src/patterns/__init__.py +24 -0
  131. package/src/patterns/analyzers.py +247 -0
  132. package/src/patterns/learner.py +267 -0
  133. package/src/patterns/scoring.py +167 -0
  134. package/src/patterns/store.py +223 -0
  135. package/src/patterns/terminology.py +138 -0
  136. package/src/provenance_tracker.py +4 -14
  137. package/src/query_optimizer.py +4 -6
  138. package/src/rate_limiter.py +2 -6
  139. package/src/search/__init__.py +20 -0
  140. package/src/search/cli.py +77 -0
  141. package/src/search/constants.py +26 -0
  142. package/src/search/engine.py +239 -0
  143. package/src/search/fusion.py +122 -0
  144. package/src/search/index_loader.py +112 -0
  145. package/src/search/methods.py +162 -0
  146. package/src/search_engine_v2.py +4 -6
  147. package/src/setup_validator.py +7 -13
  148. package/src/subscription_manager.py +2 -12
  149. package/src/tree/__init__.py +59 -0
  150. package/src/tree/builder.py +183 -0
  151. package/src/tree/nodes.py +196 -0
  152. package/src/tree/queries.py +252 -0
  153. package/src/tree/schema.py +76 -0
  154. package/src/tree_manager.py +10 -711
  155. package/src/trust/__init__.py +45 -0
  156. package/src/trust/constants.py +66 -0
  157. package/src/trust/queries.py +157 -0
  158. package/src/trust/schema.py +95 -0
  159. package/src/trust/scorer.py +299 -0
  160. package/src/trust/signals.py +95 -0
  161. package/src/trust_scorer.py +39 -697
  162. package/src/webhook_dispatcher.py +2 -12
  163. package/ui/app.js +1 -1
  164. package/ui/js/agents.js +1 -1
  165. package/ui_server.py +2 -14
  166. package/ATTRIBUTION.md +0 -140
  167. package/docs/ARCHITECTURE-V2.5.md +0 -190
  168. package/docs/GRAPH-ENGINE.md +0 -503
  169. package/docs/architecture-diagram.drawio +0 -405
  170. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,145 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Model loading and backend encoder methods for EmbeddingEngine.
5
+ """
6
+ import time
7
+ import logging
8
+ from typing import List
9
+
10
+ import numpy as np
11
+
12
+ from embeddings.constants import (
13
+ SENTENCE_TRANSFORMERS_AVAILABLE,
14
+ SKLEARN_AVAILABLE,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ModelLoaderMixin:
21
+ """
22
+ Mixin that handles model initialization and raw encoding backends.
23
+
24
+ Expects the host class to have:
25
+ - self.use_transformers: bool
26
+ - self.model_cache_path: Path
27
+ - self.model_name: str
28
+ - self.device: str
29
+ - self.model: Optional[SentenceTransformer]
30
+ - self.dimension: int
31
+ - self.tfidf_vectorizer
32
+ - self.tfidf_fitted: bool
33
+ """
34
+
35
+ def _load_model(self):
36
+ """Load sentence transformer model or fallback to TF-IDF."""
37
+ if not self.use_transformers:
38
+ logger.warning(
39
+ "sentence-transformers unavailable. Install with: "
40
+ "pip install sentence-transformers"
41
+ )
42
+ self._init_fallback()
43
+ return
44
+
45
+ try:
46
+ from sentence_transformers import SentenceTransformer
47
+
48
+ # Create model cache directory
49
+ self.model_cache_path.mkdir(parents=True, exist_ok=True)
50
+
51
+ logger.info(f"Loading model: {self.model_name}")
52
+ start_time = time.time()
53
+
54
+ # Load model with local cache
55
+ self.model = SentenceTransformer(
56
+ self.model_name,
57
+ device=self.device,
58
+ cache_folder=str(self.model_cache_path)
59
+ )
60
+
61
+ # Get actual dimension
62
+ self.dimension = self.model.get_sentence_embedding_dimension()
63
+
64
+ elapsed = time.time() - start_time
65
+ logger.info(
66
+ f"Loaded {self.model_name} ({self.dimension}D) in {elapsed:.2f}s"
67
+ )
68
+
69
+ except Exception as e:
70
+ logger.error(f"Failed to load sentence transformer: {e}")
71
+ logger.info("Falling back to TF-IDF")
72
+ self.use_transformers = False
73
+ self._init_fallback()
74
+
75
+ def _init_fallback(self):
76
+ """Initialize TF-IDF fallback."""
77
+ if not SKLEARN_AVAILABLE:
78
+ logger.error(
79
+ "sklearn unavailable - no fallback available. "
80
+ "Install: pip install scikit-learn"
81
+ )
82
+ return
83
+
84
+ from sklearn.feature_extraction.text import TfidfVectorizer
85
+
86
+ logger.info("Using TF-IDF fallback (dimension will be dynamic)")
87
+ self.tfidf_vectorizer = TfidfVectorizer(
88
+ max_features=384, # Match sentence transformer dimension
89
+ stop_words='english',
90
+ ngram_range=(1, 2),
91
+ min_df=1
92
+ )
93
+ self.dimension = 384
94
+
95
+ def _encode_transformer(
96
+ self,
97
+ texts: List[str],
98
+ batch_size: int,
99
+ show_progress: bool
100
+ ) -> np.ndarray:
101
+ """Generate embeddings using sentence transformer."""
102
+ try:
103
+ start_time = time.time()
104
+
105
+ embeddings = self.model.encode(
106
+ texts,
107
+ batch_size=batch_size,
108
+ show_progress_bar=show_progress,
109
+ convert_to_numpy=True,
110
+ normalize_embeddings=False # We'll normalize separately
111
+ )
112
+
113
+ elapsed = time.time() - start_time
114
+ rate = len(texts) / elapsed if elapsed > 0 else 0
115
+ logger.debug(f"Encoded {len(texts)} texts in {elapsed:.2f}s ({rate:.0f} texts/sec)")
116
+
117
+ return embeddings
118
+
119
+ except Exception as e:
120
+ logger.error(f"Transformer encoding failed: {e}")
121
+ raise
122
+
123
+ def _encode_tfidf(self, texts: List[str]) -> np.ndarray:
124
+ """Generate embeddings using TF-IDF fallback."""
125
+ try:
126
+ if not self.tfidf_fitted:
127
+ # Fit on first use
128
+ logger.info("Fitting TF-IDF vectorizer...")
129
+ self.tfidf_vectorizer.fit(texts)
130
+ self.tfidf_fitted = True
131
+
132
+ embeddings = self.tfidf_vectorizer.transform(texts).toarray()
133
+
134
+ # Pad or truncate to target dimension
135
+ if embeddings.shape[1] < self.dimension:
136
+ padding = np.zeros((embeddings.shape[0], self.dimension - embeddings.shape[1]))
137
+ embeddings = np.hstack([embeddings, padding])
138
+ elif embeddings.shape[1] > self.dimension:
139
+ embeddings = embeddings[:, :self.dimension]
140
+
141
+ return embeddings
142
+
143
+ except Exception as e:
144
+ logger.error(f"TF-IDF encoding failed: {e}")
145
+ raise
package/src/event_bus.py CHANGED
@@ -1,16 +1,6 @@
1
1
  #!/usr/bin/env python3
2
- """
3
- SuperLocalMemory V2 - Event Bus
4
- Copyright (c) 2026 Varun Pratap Bhardwaj
5
- Licensed under MIT License
6
-
7
- Repository: https://github.com/varun369/SuperLocalMemoryV2
8
- Author: Varun Pratap Bhardwaj (Solution Architect)
9
-
10
- NOTICE: This software is protected by MIT License.
11
- Attribution must be preserved in all copies or derivatives.
12
- """
13
-
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
14
4
  """
15
5
  EventBus — Real-time event broadcasting for memory operations.
16
6
 
@@ -222,7 +212,7 @@ class EventBus:
222
212
  payload: Event-specific data (dict, serialized to JSON)
223
213
  memory_id: Associated memory ID (if applicable)
224
214
  source_agent: Agent that triggered the event
225
- source_protocol: Protocol used (mcp, cli, rest, python, a2a)
215
+ source_protocol: Protocol used (mcp, cli, rest, python)
226
216
  importance: Event importance 1-10 (affects retention)
227
217
 
228
218
  Returns:
@@ -0,0 +1,36 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
3
+ """graph package - Knowledge Graph Clustering for SuperLocalMemory V2
4
+
5
+ Re-exports all public classes, constants, and functions so that
6
+ ``from graph import GraphEngine`` (or any other symbol) works.
7
+ """
8
+ from graph.constants import (
9
+ MAX_MEMORIES_FOR_GRAPH,
10
+ SKLEARN_AVAILABLE,
11
+ IGRAPH_AVAILABLE,
12
+ MEMORY_DIR,
13
+ DB_PATH,
14
+ )
15
+ from graph.entity_extractor import EntityExtractor, ClusterNamer
16
+ from graph.edge_builder import EdgeBuilder
17
+ from graph.cluster_builder import ClusterBuilder
18
+ from graph.graph_core import GraphEngine
19
+ from graph.cli import main
20
+
21
+ __all__ = [
22
+ # Constants
23
+ "MAX_MEMORIES_FOR_GRAPH",
24
+ "SKLEARN_AVAILABLE",
25
+ "IGRAPH_AVAILABLE",
26
+ "MEMORY_DIR",
27
+ "DB_PATH",
28
+ # Classes
29
+ "EntityExtractor",
30
+ "ClusterNamer",
31
+ "EdgeBuilder",
32
+ "ClusterBuilder",
33
+ "GraphEngine",
34
+ # Functions
35
+ "main",
36
+ ]
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Helper functions for the graph build process.
5
+
6
+ Provides sampling and cleanup utilities used during full graph builds.
7
+ """
8
+ from graph.constants import logger, MAX_MEMORIES_FOR_GRAPH
9
+
10
+
11
+ def apply_sampling(cursor, memories, active_profile):
12
+ """Apply intelligent sampling if memory count exceeds cap.
13
+
14
+ Returns a (possibly truncated) list of memory tuples.
15
+ """
16
+ if len(memories) > MAX_MEMORIES_FOR_GRAPH:
17
+ logger.warning(
18
+ "Memory count (%d) exceeds graph cap (%d). Using intelligent sampling.",
19
+ len(memories), MAX_MEMORIES_FOR_GRAPH
20
+ )
21
+ recent_count = int(MAX_MEMORIES_FOR_GRAPH * 0.6)
22
+ important_count = int(MAX_MEMORIES_FOR_GRAPH * 0.4)
23
+
24
+ recent_memories = cursor.execute('''
25
+ SELECT id, content, summary FROM memories
26
+ WHERE profile = ? ORDER BY created_at DESC LIMIT ?
27
+ ''', (active_profile, recent_count)).fetchall()
28
+
29
+ important_memories = cursor.execute('''
30
+ SELECT id, content, summary FROM memories
31
+ WHERE profile = ? ORDER BY importance DESC, access_count DESC LIMIT ?
32
+ ''', (active_profile, important_count)).fetchall()
33
+
34
+ seen_ids = set()
35
+ sampled = []
36
+ for m in recent_memories + important_memories:
37
+ if m[0] not in seen_ids:
38
+ seen_ids.add(m[0])
39
+ sampled.append(m)
40
+ memories = sampled[:MAX_MEMORIES_FOR_GRAPH]
41
+ logger.info("Sampled %d memories for graph build", len(memories))
42
+
43
+ elif len(memories) > MAX_MEMORIES_FOR_GRAPH * 0.8:
44
+ logger.warning(
45
+ "Approaching graph cap: %d/%d memories (%.0f%%). "
46
+ "Consider running memory compression.",
47
+ len(memories), MAX_MEMORIES_FOR_GRAPH,
48
+ len(memories) / MAX_MEMORIES_FOR_GRAPH * 100
49
+ )
50
+ return memories
51
+
52
+
53
+ def clear_profile_graph_data(cursor, conn, memories, active_profile):
54
+ """Clear existing graph data for a profile's memories."""
55
+ profile_memory_ids = [m[0] for m in memories]
56
+ if profile_memory_ids:
57
+ placeholders = ','.join('?' * len(profile_memory_ids))
58
+ cursor.execute(f'''
59
+ DELETE FROM graph_edges
60
+ WHERE source_memory_id IN ({placeholders})
61
+ OR target_memory_id IN ({placeholders})
62
+ ''', profile_memory_ids + profile_memory_ids)
63
+ cursor.execute(f'''
64
+ DELETE FROM graph_nodes WHERE memory_id IN ({placeholders})
65
+ ''', profile_memory_ids)
66
+ cursor.execute('''
67
+ DELETE FROM graph_clusters
68
+ WHERE id NOT IN (
69
+ SELECT DISTINCT cluster_id FROM memories WHERE cluster_id IS NOT NULL
70
+ )
71
+ ''')
72
+ cursor.execute('UPDATE memories SET cluster_id = NULL WHERE profile = ?',
73
+ (active_profile,))
74
+ conn.commit()
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """CLI interface for manual graph operations.
5
+
6
+ Provides a command-line interface for building graphs, viewing stats,
7
+ finding related memories, inspecting clusters, and generating summaries.
8
+ """
9
+ import json
10
+
11
+
12
+ def main():
13
+ """CLI interface for manual graph operations."""
14
+ import argparse
15
+ from graph.graph_core import GraphEngine
16
+ from graph.cluster_builder import ClusterBuilder
17
+
18
+ parser = argparse.ArgumentParser(description='GraphEngine - Knowledge Graph Management')
19
+ parser.add_argument('command', choices=['build', 'stats', 'related', 'cluster', 'hierarchical', 'summaries'],
20
+ help='Command to execute')
21
+ parser.add_argument('--memory-id', type=int, help='Memory ID for related/add commands')
22
+ parser.add_argument('--cluster-id', type=int, help='Cluster ID for cluster command')
23
+ parser.add_argument('--min-similarity', type=float, default=0.3,
24
+ help='Minimum similarity for edges (default: 0.3)')
25
+ parser.add_argument('--hops', type=int, default=2, help='Max hops for related (default: 2)')
26
+
27
+ args = parser.parse_args()
28
+
29
+ engine = GraphEngine()
30
+
31
+ if args.command == 'build':
32
+ print("Building knowledge graph...")
33
+ stats = engine.build_graph(min_similarity=args.min_similarity)
34
+ print(json.dumps(stats, indent=2))
35
+
36
+ elif args.command == 'stats':
37
+ print("Graph Statistics:")
38
+ stats = engine.get_stats()
39
+ print(json.dumps(stats, indent=2))
40
+
41
+ elif args.command == 'related':
42
+ if not args.memory_id:
43
+ print("Error: --memory-id required for 'related' command")
44
+ return
45
+
46
+ print(f"Finding memories related to #{args.memory_id}...")
47
+ related = engine.get_related(args.memory_id, max_hops=args.hops)
48
+
49
+ if not related:
50
+ print("No related memories found")
51
+ else:
52
+ for idx, mem in enumerate(related, 1):
53
+ print(f"\n{idx}. Memory #{mem['id']} ({mem['hops']}-hop, weight={mem['weight']:.3f})")
54
+ print(f" Relationship: {mem['relationship']}")
55
+ summary = mem['summary'] or '[No summary]'
56
+ print(f" Summary: {summary[:100]}...")
57
+ if mem['shared_entities']:
58
+ print(f" Shared: {', '.join(mem['shared_entities'][:5])}")
59
+
60
+ elif args.command == 'cluster':
61
+ if not args.cluster_id:
62
+ print("Error: --cluster-id required for 'cluster' command")
63
+ return
64
+
65
+ print(f"Cluster #{args.cluster_id} members:")
66
+ members = engine.get_cluster_members(args.cluster_id)
67
+
68
+ for idx, mem in enumerate(members, 1):
69
+ print(f"\n{idx}. Memory #{mem['id']} (importance={mem['importance']})")
70
+ summary = mem['summary'] or '[No summary]'
71
+ print(f" {summary[:100]}...")
72
+
73
+ elif args.command == 'hierarchical':
74
+ print("Running hierarchical sub-clustering...")
75
+ cluster_builder = ClusterBuilder(engine.db_path)
76
+ stats = cluster_builder.hierarchical_cluster()
77
+ print(json.dumps(stats, indent=2))
78
+
79
+ elif args.command == 'summaries':
80
+ print("Generating cluster summaries...")
81
+ cluster_builder = ClusterBuilder(engine.db_path)
82
+ count = cluster_builder.generate_cluster_summaries()
83
+ print(f"Generated summaries for {count} clusters")
84
+
85
+
86
+ if __name__ == '__main__':
87
+ main()
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Community detection and cluster management for the graph engine.
5
+
6
+ Implements Leiden algorithm based community detection. Hierarchical
7
+ sub-clustering is delegated to the ``hierarchical`` module.
8
+ """
9
+ import sqlite3
10
+ import json
11
+ from typing import List, Dict
12
+ from collections import Counter
13
+
14
+ from graph.constants import logger, IGRAPH_AVAILABLE, MEMORY_DIR
15
+ from graph.cluster_summary import generate_cluster_summaries as _generate_summaries
16
+ from graph.hierarchical import hierarchical_cluster as _hierarchical_cluster
17
+
18
+
19
+ class ClusterBuilder:
20
+ """Detect memory communities using Leiden algorithm."""
21
+
22
+ def __init__(self, db_path):
23
+ """Initialize cluster builder."""
24
+ self.db_path = db_path
25
+
26
+ def _get_active_profile(self) -> str:
27
+ """Get the currently active profile name from config."""
28
+ config_file = MEMORY_DIR / "profiles.json"
29
+ if config_file.exists():
30
+ try:
31
+ with open(config_file, 'r') as f:
32
+ config = json.load(f)
33
+ return config.get('active_profile', 'default')
34
+ except (json.JSONDecodeError, IOError):
35
+ pass
36
+ return 'default'
37
+
38
+ def detect_communities(self) -> int:
39
+ """
40
+ Run Leiden algorithm to find memory clusters (active profile only).
41
+
42
+ Returns:
43
+ Number of clusters created
44
+ """
45
+ if not IGRAPH_AVAILABLE:
46
+ logger.warning("igraph/leidenalg not installed. Graph clustering disabled. Install with: pip3 install python-igraph leidenalg")
47
+ return 0
48
+ import igraph as ig
49
+ import leidenalg
50
+
51
+ conn = sqlite3.connect(self.db_path)
52
+ cursor = conn.cursor()
53
+ active_profile = self._get_active_profile()
54
+
55
+ try:
56
+ # Load edges for active profile's memories only
57
+ edges = cursor.execute('''
58
+ SELECT ge.source_memory_id, ge.target_memory_id, ge.weight
59
+ FROM graph_edges ge
60
+ WHERE ge.source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
61
+ AND ge.target_memory_id IN (SELECT id FROM memories WHERE profile = ?)
62
+ ''', (active_profile, active_profile)).fetchall()
63
+
64
+ if not edges:
65
+ logger.warning("No edges found - cannot build clusters")
66
+ return 0
67
+
68
+ # Build memory ID mapping
69
+ memory_ids = set()
70
+ for source, target, _ in edges:
71
+ memory_ids.add(source)
72
+ memory_ids.add(target)
73
+
74
+ memory_ids = sorted(list(memory_ids))
75
+ memory_id_to_vertex = {mid: idx for idx, mid in enumerate(memory_ids)}
76
+ vertex_to_memory_id = {idx: mid for mid, idx in memory_id_to_vertex.items()}
77
+
78
+ # Create igraph graph
79
+ g = ig.Graph()
80
+ g.add_vertices(len(memory_ids))
81
+
82
+ edge_list = []
83
+ edge_weights = []
84
+ for source, target, weight in edges:
85
+ edge_list.append((memory_id_to_vertex[source], memory_id_to_vertex[target]))
86
+ edge_weights.append(weight)
87
+
88
+ g.add_edges(edge_list)
89
+
90
+ # Run Leiden algorithm
91
+ logger.info(f"Running Leiden on {len(memory_ids)} nodes, {len(edges)} edges")
92
+ partition = leidenalg.find_partition(
93
+ g, leidenalg.ModularityVertexPartition,
94
+ weights=edge_weights, n_iterations=100, seed=42
95
+ )
96
+
97
+ clusters_created = 0
98
+ for cluster_idx, community in enumerate(partition):
99
+ if len(community) < 2:
100
+ continue
101
+
102
+ cluster_memory_ids = [vertex_to_memory_id[v] for v in community]
103
+ avg_importance = self._get_avg_importance(cursor, cluster_memory_ids)
104
+ cluster_name = self._generate_cluster_name(cursor, cluster_memory_ids)
105
+
106
+ result = cursor.execute('''
107
+ INSERT INTO graph_clusters (name, member_count, avg_importance)
108
+ VALUES (?, ?, ?)
109
+ ''', (cluster_name, len(cluster_memory_ids), avg_importance))
110
+
111
+ cluster_id = result.lastrowid
112
+ cursor.executemany('''
113
+ UPDATE memories SET cluster_id = ? WHERE id = ?
114
+ ''', [(cluster_id, mid) for mid in cluster_memory_ids])
115
+
116
+ clusters_created += 1
117
+ logger.info(f"Cluster {cluster_id}: '{cluster_name}' ({len(cluster_memory_ids)} members)")
118
+
119
+ conn.commit()
120
+ logger.info(f"Created {clusters_created} clusters")
121
+ return clusters_created
122
+
123
+ except Exception as e:
124
+ logger.error(f"Community detection failed: {e}")
125
+ conn.rollback()
126
+ return 0
127
+ finally:
128
+ conn.close()
129
+
130
+ def _get_avg_importance(self, cursor, memory_ids: List[int]) -> float:
131
+ """Calculate average importance for cluster."""
132
+ placeholders = ','.join('?' * len(memory_ids))
133
+ result = cursor.execute(f'''
134
+ SELECT AVG(importance) FROM memories WHERE id IN ({placeholders})
135
+ ''', memory_ids).fetchone()
136
+ return result[0] if result and result[0] else 5.0
137
+
138
+ def _generate_cluster_name(self, cursor, memory_ids: List[int]) -> str:
139
+ """Generate cluster name from member entities (TF-IDF approach)."""
140
+ placeholders = ','.join('?' * len(memory_ids))
141
+ nodes = cursor.execute(f'''
142
+ SELECT entities FROM graph_nodes WHERE memory_id IN ({placeholders})
143
+ ''', memory_ids).fetchall()
144
+
145
+ all_entities = []
146
+ for node in nodes:
147
+ if node[0]:
148
+ all_entities.extend(json.loads(node[0]))
149
+
150
+ if not all_entities:
151
+ return f"Cluster (ID auto-assigned)"
152
+
153
+ entity_counts = Counter(all_entities)
154
+ top_entities = [e for e, _ in entity_counts.most_common(3)]
155
+
156
+ if len(top_entities) >= 2:
157
+ name = f"{top_entities[0].title()} & {top_entities[1].title()}"
158
+ elif len(top_entities) == 1:
159
+ name = f"{top_entities[0].title()} Contexts"
160
+ else:
161
+ name = "Mixed Contexts"
162
+
163
+ return name[:100]
164
+
165
+ def hierarchical_cluster(self, min_subcluster_size: int = 5, max_depth: int = 3) -> Dict[str, any]:
166
+ """
167
+ Run recursive Leiden clustering -- cluster the clusters.
168
+
169
+ Delegates to the hierarchical module.
170
+
171
+ Args:
172
+ min_subcluster_size: Minimum members to attempt sub-clustering (default 5)
173
+ max_depth: Maximum recursion depth (default 3)
174
+
175
+ Returns:
176
+ Dictionary with hierarchical clustering statistics
177
+ """
178
+ return _hierarchical_cluster(
179
+ self.db_path,
180
+ get_avg_importance_fn=self._get_avg_importance,
181
+ generate_cluster_name_fn=self._generate_cluster_name,
182
+ min_subcluster_size=min_subcluster_size,
183
+ max_depth=max_depth,
184
+ )
185
+
186
+ def generate_cluster_summaries(self) -> int:
187
+ """Generate TF-IDF structured summaries for all clusters."""
188
+ return _generate_summaries(self.db_path)