superlocalmemory 2.7.5 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.ps1 +226 -0
  7. package/install.sh +59 -0
  8. package/mcp_server.py +83 -7
  9. package/package.json +3 -10
  10. package/scripts/generate-thumbnails.py +3 -5
  11. package/skills/slm-build-graph/SKILL.md +1 -1
  12. package/skills/slm-list-recent/SKILL.md +1 -1
  13. package/skills/slm-recall/SKILL.md +1 -1
  14. package/skills/slm-remember/SKILL.md +1 -1
  15. package/skills/slm-show-patterns/SKILL.md +1 -1
  16. package/skills/slm-status/SKILL.md +1 -1
  17. package/skills/slm-switch-profile/SKILL.md +1 -1
  18. package/src/agent_registry.py +7 -18
  19. package/src/auth_middleware.py +3 -5
  20. package/src/auto_backup.py +3 -7
  21. package/src/behavioral/__init__.py +49 -0
  22. package/src/behavioral/behavioral_listener.py +203 -0
  23. package/src/behavioral/behavioral_patterns.py +275 -0
  24. package/src/behavioral/cross_project_transfer.py +206 -0
  25. package/src/behavioral/outcome_inference.py +194 -0
  26. package/src/behavioral/outcome_tracker.py +193 -0
  27. package/src/behavioral/tests/__init__.py +4 -0
  28. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  29. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  30. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  31. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  32. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  33. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  34. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  35. package/src/cache_manager.py +4 -6
  36. package/src/compliance/__init__.py +48 -0
  37. package/src/compliance/abac_engine.py +149 -0
  38. package/src/compliance/abac_middleware.py +116 -0
  39. package/src/compliance/audit_db.py +215 -0
  40. package/src/compliance/audit_logger.py +148 -0
  41. package/src/compliance/retention_manager.py +289 -0
  42. package/src/compliance/retention_scheduler.py +186 -0
  43. package/src/compliance/tests/__init__.py +4 -0
  44. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  45. package/src/compliance/tests/test_abac_engine.py +124 -0
  46. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  47. package/src/compliance/tests/test_audit_db.py +123 -0
  48. package/src/compliance/tests/test_audit_logger.py +98 -0
  49. package/src/compliance/tests/test_mcp_audit.py +128 -0
  50. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  51. package/src/compliance/tests/test_retention_manager.py +131 -0
  52. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  53. package/src/db_connection_manager.py +2 -12
  54. package/src/embedding_engine.py +61 -669
  55. package/src/embeddings/__init__.py +47 -0
  56. package/src/embeddings/cache.py +70 -0
  57. package/src/embeddings/cli.py +113 -0
  58. package/src/embeddings/constants.py +47 -0
  59. package/src/embeddings/database.py +91 -0
  60. package/src/embeddings/engine.py +247 -0
  61. package/src/embeddings/model_loader.py +145 -0
  62. package/src/event_bus.py +3 -13
  63. package/src/graph/__init__.py +36 -0
  64. package/src/graph/build_helpers.py +74 -0
  65. package/src/graph/cli.py +87 -0
  66. package/src/graph/cluster_builder.py +188 -0
  67. package/src/graph/cluster_summary.py +148 -0
  68. package/src/graph/constants.py +47 -0
  69. package/src/graph/edge_builder.py +162 -0
  70. package/src/graph/entity_extractor.py +95 -0
  71. package/src/graph/graph_core.py +226 -0
  72. package/src/graph/graph_search.py +231 -0
  73. package/src/graph/hierarchical.py +207 -0
  74. package/src/graph/schema.py +99 -0
  75. package/src/graph_engine.py +45 -1451
  76. package/src/hnsw_index.py +3 -7
  77. package/src/hybrid_search.py +36 -683
  78. package/src/learning/__init__.py +27 -12
  79. package/src/learning/adaptive_ranker.py +50 -12
  80. package/src/learning/cross_project_aggregator.py +2 -12
  81. package/src/learning/engagement_tracker.py +2 -12
  82. package/src/learning/feature_extractor.py +175 -43
  83. package/src/learning/feedback_collector.py +7 -12
  84. package/src/learning/learning_db.py +180 -12
  85. package/src/learning/project_context_manager.py +2 -12
  86. package/src/learning/source_quality_scorer.py +2 -12
  87. package/src/learning/synthetic_bootstrap.py +2 -12
  88. package/src/learning/tests/__init__.py +2 -0
  89. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  90. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  91. package/src/learning/tests/test_aggregator.py +2 -6
  92. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  93. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  94. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  95. package/src/learning/tests/test_feedback_collector.py +2 -6
  96. package/src/learning/tests/test_learning_db.py +2 -6
  97. package/src/learning/tests/test_learning_db_v28.py +110 -0
  98. package/src/learning/tests/test_learning_init_v28.py +48 -0
  99. package/src/learning/tests/test_outcome_signals.py +48 -0
  100. package/src/learning/tests/test_project_context.py +2 -6
  101. package/src/learning/tests/test_schema_migration.py +319 -0
  102. package/src/learning/tests/test_signal_inference.py +11 -13
  103. package/src/learning/tests/test_source_quality.py +2 -6
  104. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  105. package/src/learning/tests/test_workflow_miner.py +2 -6
  106. package/src/learning/workflow_pattern_miner.py +2 -12
  107. package/src/lifecycle/__init__.py +54 -0
  108. package/src/lifecycle/bounded_growth.py +239 -0
  109. package/src/lifecycle/compaction_engine.py +226 -0
  110. package/src/lifecycle/lifecycle_engine.py +302 -0
  111. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  112. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  113. package/src/lifecycle/retention_policy.py +285 -0
  114. package/src/lifecycle/tests/__init__.py +4 -0
  115. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  116. package/src/lifecycle/tests/test_compaction.py +179 -0
  117. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  118. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  119. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  120. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  121. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  122. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  123. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  124. package/src/mcp_tools_v28.py +280 -0
  125. package/src/memory-profiles.py +2 -12
  126. package/src/memory-reset.py +2 -12
  127. package/src/memory_compression.py +2 -12
  128. package/src/memory_store_v2.py +76 -20
  129. package/src/migrate_v1_to_v2.py +2 -12
  130. package/src/pattern_learner.py +29 -975
  131. package/src/patterns/__init__.py +24 -0
  132. package/src/patterns/analyzers.py +247 -0
  133. package/src/patterns/learner.py +267 -0
  134. package/src/patterns/scoring.py +167 -0
  135. package/src/patterns/store.py +223 -0
  136. package/src/patterns/terminology.py +138 -0
  137. package/src/provenance_tracker.py +4 -14
  138. package/src/query_optimizer.py +4 -6
  139. package/src/rate_limiter.py +2 -6
  140. package/src/search/__init__.py +20 -0
  141. package/src/search/cli.py +77 -0
  142. package/src/search/constants.py +26 -0
  143. package/src/search/engine.py +239 -0
  144. package/src/search/fusion.py +122 -0
  145. package/src/search/index_loader.py +112 -0
  146. package/src/search/methods.py +162 -0
  147. package/src/search_engine_v2.py +4 -6
  148. package/src/setup_validator.py +7 -13
  149. package/src/subscription_manager.py +2 -12
  150. package/src/tree/__init__.py +59 -0
  151. package/src/tree/builder.py +183 -0
  152. package/src/tree/nodes.py +196 -0
  153. package/src/tree/queries.py +252 -0
  154. package/src/tree/schema.py +76 -0
  155. package/src/tree_manager.py +10 -711
  156. package/src/trust/__init__.py +45 -0
  157. package/src/trust/constants.py +66 -0
  158. package/src/trust/queries.py +157 -0
  159. package/src/trust/schema.py +95 -0
  160. package/src/trust/scorer.py +299 -0
  161. package/src/trust/signals.py +95 -0
  162. package/src/trust_scorer.py +39 -697
  163. package/src/webhook_dispatcher.py +2 -12
  164. package/ui/app.js +1 -1
  165. package/ui/index.html +3 -0
  166. package/ui/js/agents.js +1 -1
  167. package/ui/js/core.js +21 -5
  168. package/ui/js/profiles.js +29 -7
  169. package/ui_server.py +2 -14
  170. package/ATTRIBUTION.md +0 -140
  171. package/docs/ARCHITECTURE-V2.5.md +0 -190
  172. package/docs/GRAPH-ENGINE.md +0 -503
  173. package/docs/architecture-diagram.drawio +0 -405
  174. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """CLI interface for testing the hybrid search engine.
5
+ """
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from search.engine import HybridSearchEngine
10
+
11
+
12
+ def main():
13
+ """Run the hybrid search CLI demo."""
14
+ print("Hybrid Search Engine - Demo")
15
+ print("=" * 60)
16
+
17
+ # Use test database or default
18
+ db_path = Path.home() / ".claude-memory" / "memory.db"
19
+
20
+ if not db_path.exists():
21
+ print(f"Error: Database not found at {db_path}")
22
+ print("Please run memory_store_v2.py to create database first.")
23
+ sys.exit(1)
24
+
25
+ # Initialize hybrid search
26
+ print(f"\nInitializing hybrid search engine...")
27
+ print(f"Database: {db_path}")
28
+
29
+ hybrid = HybridSearchEngine(db_path, enable_cache=True)
30
+
31
+ stats = hybrid.get_stats()
32
+ print(f"\nIndexed {stats['bm25']['num_documents']} memories")
33
+ print(f" Vocabulary: {stats['bm25']['vocabulary_size']} terms")
34
+ print(f" TF-IDF: {'Available' if stats['tfidf_available'] else 'Not available'}")
35
+ print(f" Graph: {'Available' if stats['graph_available'] else 'Not available'}")
36
+
37
+ # Test search
38
+ if len(sys.argv) > 1:
39
+ query = ' '.join(sys.argv[1:])
40
+ else:
41
+ query = "python web development"
42
+
43
+ print("\n" + "=" * 60)
44
+ print(f"Search Query: '{query}'")
45
+ print("=" * 60)
46
+
47
+ # Test different methods
48
+ methods = ["bm25", "hybrid"]
49
+
50
+ for method in methods:
51
+ print(f"\nMethod: {method.upper()}")
52
+ results = hybrid.search(query, limit=5, method=method)
53
+
54
+ print(f" Found {len(results)} results in {hybrid.last_search_time*1000:.2f}ms")
55
+
56
+ for i, mem in enumerate(results, 1):
57
+ print(f"\n [{i}] Score: {mem['score']:.3f} | ID: {mem['id']}")
58
+ if mem.get('category'):
59
+ print(f" Category: {mem['category']}")
60
+ if mem.get('tags'):
61
+ print(f" Tags: {', '.join(mem['tags'][:3])}")
62
+ print(f" Content: {mem['content'][:100]}...")
63
+
64
+ # Display final stats
65
+ print("\n" + "=" * 60)
66
+ print("Performance Summary:")
67
+ print("=" * 60)
68
+
69
+ final_stats = hybrid.get_stats()
70
+ print(f" Last search time: {final_stats['last_search_time_ms']:.2f}ms")
71
+ print(f" Last fusion time: {final_stats['last_fusion_time_ms']:.2f}ms")
72
+ print(f" Target: <50ms for 1K memories")
73
+
74
+ if 'cache' in final_stats:
75
+ cache_stats = final_stats['cache']
76
+ print(f"\n Cache hit rate: {cache_stats['hit_rate']*100:.1f}%")
77
+ print(f" Cache size: {cache_stats['current_size']}/{cache_stats['max_size']}")
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """SuperLocalMemory V2 - Hybrid Search System
5
+
6
+ Solution Architect & Original Creator
7
+
8
+ (see LICENSE file)
9
+
10
+ ATTRIBUTION REQUIRED: This notice must be preserved in all copies.
11
+ """
12
+ """
13
+ Shared imports and constants for the hybrid search package.
14
+ """
15
+
16
+ import time
17
+ import math
18
+ import json
19
+ import sqlite3
20
+ from collections import defaultdict
21
+ from typing import List, Dict, Tuple, Optional, Any, Set
22
+ from pathlib import Path
23
+
24
+ from search_engine_v2 import BM25SearchEngine
25
+ from query_optimizer import QueryOptimizer
26
+ from cache_manager import CacheManager
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """HybridSearchEngine - Main orchestrator for multi-method retrieval fusion.
5
+ """
6
+ import time
7
+ import json
8
+ import sqlite3
9
+ from pathlib import Path
10
+ from typing import List, Dict, Tuple, Optional, Any
11
+
12
+ from search_engine_v2 import BM25SearchEngine
13
+ from query_optimizer import QueryOptimizer
14
+ from cache_manager import CacheManager
15
+
16
+ from search.index_loader import IndexLoaderMixin
17
+ from search.methods import SearchMethodsMixin
18
+ from search.fusion import FusionMixin
19
+
20
+
21
+ class HybridSearchEngine(IndexLoaderMixin, SearchMethodsMixin, FusionMixin):
22
+ """
23
+ Hybrid search combining BM25, graph traversal, and semantic search.
24
+
25
+ Provides flexible retrieval strategies based on query type and
26
+ available resources.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ db_path: Path,
32
+ bm25_engine: Optional[BM25SearchEngine] = None,
33
+ query_optimizer: Optional[QueryOptimizer] = None,
34
+ cache_manager: Optional[CacheManager] = None,
35
+ enable_cache: bool = True
36
+ ):
37
+ """
38
+ Initialize hybrid search engine.
39
+
40
+ Args:
41
+ db_path: Path to memory database
42
+ bm25_engine: Pre-configured BM25 engine (will create if None)
43
+ query_optimizer: Query optimizer instance (will create if None)
44
+ cache_manager: Cache manager instance (will create if None)
45
+ enable_cache: Enable result caching
46
+ """
47
+ self.db_path = db_path
48
+
49
+ # Initialize components
50
+ self.bm25 = bm25_engine or BM25SearchEngine()
51
+ self.optimizer = query_optimizer or QueryOptimizer()
52
+ self.cache = cache_manager if enable_cache else None
53
+
54
+ # Graph engine (lazy load to avoid circular dependencies)
55
+ self._graph_engine = None
56
+
57
+ # TF-IDF fallback (from memory_store_v2)
58
+ self._tfidf_vectorizer = None
59
+ self._tfidf_vectors = None
60
+ self._memory_ids = []
61
+
62
+ # Performance tracking
63
+ self.last_search_time = 0.0
64
+ self.last_fusion_time = 0.0
65
+
66
+ # Load index
67
+ self._load_index()
68
+
69
+ def search(
70
+ self,
71
+ query: str,
72
+ limit: int = 10,
73
+ method: str = "hybrid",
74
+ weights: Optional[Dict[str, float]] = None,
75
+ use_cache: bool = True
76
+ ) -> List[Dict[str, Any]]:
77
+ """
78
+ Hybrid search with multiple retrieval methods.
79
+
80
+ Args:
81
+ query: Search query
82
+ limit: Maximum results
83
+ method: Fusion method ("hybrid", "weighted", "rrf", "bm25", "semantic", "graph")
84
+ weights: Custom weights for weighted fusion (default: balanced)
85
+ use_cache: Use cache for results
86
+
87
+ Returns:
88
+ List of memory dictionaries with scores and match details
89
+ """
90
+ start_time = time.time()
91
+
92
+ # Check cache
93
+ if use_cache and self.cache:
94
+ cached = self.cache.get(query, limit=limit, method=method)
95
+ if cached is not None:
96
+ self.last_search_time = time.time() - start_time
97
+ return cached
98
+
99
+ # Default weights
100
+ if weights is None:
101
+ weights = {
102
+ 'bm25': 0.4,
103
+ 'semantic': 0.3,
104
+ 'graph': 0.3
105
+ }
106
+
107
+ # Single method search
108
+ if method == "bm25":
109
+ raw_results = self.search_bm25(query, limit)
110
+ elif method == "semantic":
111
+ raw_results = self.search_semantic(query, limit)
112
+ elif method == "graph":
113
+ raw_results = self.search_graph(query, limit)
114
+
115
+ # Multi-method fusion
116
+ else:
117
+ fusion_start = time.time()
118
+
119
+ # Get results from all methods
120
+ results_dict = {}
121
+
122
+ if weights.get('bm25', 0) > 0:
123
+ results_dict['bm25'] = self.search_bm25(query, limit=limit*2)
124
+
125
+ if weights.get('semantic', 0) > 0:
126
+ results_dict['semantic'] = self.search_semantic(query, limit=limit*2)
127
+
128
+ if weights.get('graph', 0) > 0:
129
+ results_dict['graph'] = self.search_graph(query, limit=limit*2)
130
+
131
+ # Fusion
132
+ if method == "rrf":
133
+ raw_results = self._reciprocal_rank_fusion(list(results_dict.values()))
134
+ else: # weighted or hybrid
135
+ raw_results = self._weighted_fusion(results_dict, weights)
136
+
137
+ self.last_fusion_time = time.time() - fusion_start
138
+
139
+ # Limit results
140
+ raw_results = raw_results[:limit]
141
+
142
+ # Fetch full memory details
143
+ results = self._fetch_memory_details(raw_results, query)
144
+
145
+ # Cache results
146
+ if use_cache and self.cache:
147
+ self.cache.put(query, results, limit=limit, method=method)
148
+
149
+ self.last_search_time = time.time() - start_time
150
+
151
+ return results
152
+
153
+ def _fetch_memory_details(
154
+ self,
155
+ raw_results: List[Tuple[int, float]],
156
+ query: str
157
+ ) -> List[Dict[str, Any]]:
158
+ """
159
+ Fetch full memory details for result IDs.
160
+
161
+ Args:
162
+ raw_results: List of (memory_id, score) tuples
163
+ query: Original query (for context)
164
+
165
+ Returns:
166
+ List of memory dictionaries with full details
167
+ """
168
+ if not raw_results:
169
+ return []
170
+
171
+ memory_ids = [mem_id for mem_id, _ in raw_results]
172
+ id_to_score = {mem_id: score for mem_id, score in raw_results}
173
+
174
+ conn = sqlite3.connect(self.db_path)
175
+ cursor = conn.cursor()
176
+
177
+ # Fetch memories
178
+ placeholders = ','.join(['?'] * len(memory_ids))
179
+ cursor.execute(f'''
180
+ SELECT id, content, summary, project_path, project_name, tags,
181
+ category, parent_id, tree_path, depth, memory_type,
182
+ importance, created_at, cluster_id, last_accessed, access_count
183
+ FROM memories
184
+ WHERE id IN ({placeholders})
185
+ ''', memory_ids)
186
+
187
+ rows = cursor.fetchall()
188
+ conn.close()
189
+
190
+ # Build result dictionaries
191
+ results = []
192
+ for row in rows:
193
+ mem_id = row[0]
194
+ results.append({
195
+ 'id': mem_id,
196
+ 'content': row[1],
197
+ 'summary': row[2],
198
+ 'project_path': row[3],
199
+ 'project_name': row[4],
200
+ 'tags': json.loads(row[5]) if row[5] else [],
201
+ 'category': row[6],
202
+ 'parent_id': row[7],
203
+ 'tree_path': row[8],
204
+ 'depth': row[9],
205
+ 'memory_type': row[10],
206
+ 'importance': row[11],
207
+ 'created_at': row[12],
208
+ 'cluster_id': row[13],
209
+ 'last_accessed': row[14],
210
+ 'access_count': row[15],
211
+ 'score': id_to_score.get(mem_id, 0.0),
212
+ 'match_type': 'hybrid'
213
+ })
214
+
215
+ # Sort by score
216
+ results.sort(key=lambda x: x['score'], reverse=True)
217
+
218
+ return results
219
+
220
+ def get_stats(self) -> Dict[str, Any]:
221
+ """
222
+ Get hybrid search statistics.
223
+
224
+ Returns:
225
+ Dictionary with performance stats
226
+ """
227
+ stats = {
228
+ 'bm25': self.bm25.get_stats(),
229
+ 'optimizer': self.optimizer.get_stats(),
230
+ 'last_search_time_ms': self.last_search_time * 1000,
231
+ 'last_fusion_time_ms': self.last_fusion_time * 1000,
232
+ 'tfidf_available': self._tfidf_vectorizer is not None,
233
+ 'graph_available': self._graph_engine is not None
234
+ }
235
+
236
+ if self.cache:
237
+ stats['cache'] = self.cache.get_stats()
238
+
239
+ return stats
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Score fusion strategies for combining multi-method search results.
5
+ """
6
+ from collections import defaultdict
7
+ from typing import List, Dict, Tuple
8
+
9
+
10
+ class FusionMixin:
11
+ """
12
+ Mixin providing score normalization and fusion strategies.
13
+
14
+ No external dependencies -- operates purely on (id, score) tuples.
15
+ """
16
+
17
+ def _normalize_scores(
18
+ self,
19
+ results: List[Tuple[int, float]]
20
+ ) -> List[Tuple[int, float]]:
21
+ """
22
+ Normalize scores to [0, 1] range using min-max normalization.
23
+
24
+ Args:
25
+ results: List of (id, score) tuples
26
+
27
+ Returns:
28
+ Normalized results
29
+ """
30
+ if not results:
31
+ return []
32
+
33
+ scores = [score for _, score in results]
34
+ min_score = min(scores)
35
+ max_score = max(scores)
36
+
37
+ if max_score == min_score:
38
+ # All scores equal - return uniform scores
39
+ return [(id, 1.0) for id, _ in results]
40
+
41
+ normalized = []
42
+ for mem_id, score in results:
43
+ norm_score = (score - min_score) / (max_score - min_score)
44
+ normalized.append((mem_id, norm_score))
45
+
46
+ return normalized
47
+
48
+ def _reciprocal_rank_fusion(
49
+ self,
50
+ results_list: List[List[Tuple[int, float]]],
51
+ k: int = 60
52
+ ) -> List[Tuple[int, float]]:
53
+ """
54
+ Combine multiple result lists using Reciprocal Rank Fusion.
55
+
56
+ RRF formula: score(d) = sum 1 / (k + rank(d))
57
+
58
+ RRF is rank-based and doesn't depend on score magnitudes,
59
+ making it robust to different scoring scales.
60
+
61
+ Args:
62
+ results_list: List of result lists from different methods
63
+ k: RRF constant (default: 60, standard value)
64
+
65
+ Returns:
66
+ Fused results sorted by RRF score
67
+ """
68
+ # Build rank maps for each method
69
+ rrf_scores = defaultdict(float)
70
+
71
+ for results in results_list:
72
+ for rank, (mem_id, _) in enumerate(results, start=1):
73
+ rrf_scores[mem_id] += 1.0 / (k + rank)
74
+
75
+ # Convert to sorted list
76
+ fused = [(mem_id, score) for mem_id, score in rrf_scores.items()]
77
+ fused.sort(key=lambda x: x[1], reverse=True)
78
+
79
+ return fused
80
+
81
+ def _weighted_fusion(
82
+ self,
83
+ results_dict: Dict[str, List[Tuple[int, float]]],
84
+ weights: Dict[str, float]
85
+ ) -> List[Tuple[int, float]]:
86
+ """
87
+ Combine results using weighted score fusion.
88
+
89
+ Normalizes scores from each method then combines with weights.
90
+
91
+ Args:
92
+ results_dict: Dictionary mapping method name to results
93
+ weights: Dictionary mapping method name to weight
94
+
95
+ Returns:
96
+ Fused results sorted by combined score
97
+ """
98
+ # Normalize scores for each method
99
+ normalized = {}
100
+ for method, results in results_dict.items():
101
+ normalized[method] = self._normalize_scores(results)
102
+
103
+ # Combine with weights
104
+ combined_scores = defaultdict(float)
105
+ max_weight_sum = defaultdict(float) # Track possible max score per doc
106
+
107
+ for method, results in normalized.items():
108
+ weight = weights.get(method, 0.0)
109
+
110
+ for mem_id, score in results:
111
+ combined_scores[mem_id] += weight * score
112
+ max_weight_sum[mem_id] += weight
113
+
114
+ # Normalize by actual weights (some docs may not appear in all methods)
115
+ fused = []
116
+ for mem_id, score in combined_scores.items():
117
+ normalized_score = score / max_weight_sum[mem_id] if max_weight_sum[mem_id] > 0 else 0
118
+ fused.append((mem_id, normalized_score))
119
+
120
+ fused.sort(key=lambda x: x[1], reverse=True)
121
+
122
+ return fused
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Index loading and graph engine lazy-loading for hybrid search.
5
+ """
6
+ import json
7
+ import sqlite3
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ from search_engine_v2 import BM25SearchEngine
12
+ from query_optimizer import QueryOptimizer
13
+
14
+
15
+ class IndexLoaderMixin:
16
+ """
17
+ Mixin that provides index loading and graph engine lazy-loading.
18
+
19
+ Expects the host class to have:
20
+ - self.db_path: Path
21
+ - self.bm25: BM25SearchEngine
22
+ - self.optimizer: QueryOptimizer
23
+ - self._graph_engine: Optional[GraphEngine]
24
+ - self._tfidf_vectorizer
25
+ - self._tfidf_vectors
26
+ - self._memory_ids: list
27
+ """
28
+
29
+ def _load_index(self):
30
+ """
31
+ Load documents from database and build search indexes.
32
+ """
33
+ conn = sqlite3.connect(self.db_path)
34
+ cursor = conn.cursor()
35
+
36
+ # Fetch all memories
37
+ cursor.execute('''
38
+ SELECT id, content, summary, tags
39
+ FROM memories
40
+ ORDER BY id
41
+ ''')
42
+
43
+ rows = cursor.fetchall()
44
+ conn.close()
45
+
46
+ if not rows:
47
+ return
48
+
49
+ # Build BM25 index
50
+ doc_ids = [row[0] for row in rows]
51
+ documents = []
52
+ vocabulary = set()
53
+
54
+ for row in rows:
55
+ # Combine content + summary + tags for indexing
56
+ text_parts = [row[1]] # content
57
+
58
+ if row[2]: # summary
59
+ text_parts.append(row[2])
60
+
61
+ if row[3]: # tags (JSON)
62
+ try:
63
+ tags = json.loads(row[3])
64
+ text_parts.extend(tags)
65
+ except Exception:
66
+ pass
67
+
68
+ doc_text = ' '.join(text_parts)
69
+ documents.append(doc_text)
70
+
71
+ # Build vocabulary for spell correction
72
+ tokens = self.bm25._tokenize(doc_text)
73
+ vocabulary.update(tokens)
74
+
75
+ # Index with BM25
76
+ self.bm25.index_documents(documents, doc_ids)
77
+ self._memory_ids = doc_ids
78
+
79
+ # Initialize optimizer with vocabulary
80
+ self.optimizer.vocabulary = vocabulary
81
+
82
+ # Build co-occurrence for query expansion
83
+ tokenized_docs = [self.bm25._tokenize(doc) for doc in documents]
84
+ self.optimizer.build_cooccurrence_matrix(tokenized_docs)
85
+
86
+ # Try to load TF-IDF (optional semantic search)
87
+ try:
88
+ from sklearn.feature_extraction.text import TfidfVectorizer
89
+ from sklearn.metrics.pairwise import cosine_similarity
90
+ import numpy as np
91
+
92
+ self._tfidf_vectorizer = TfidfVectorizer(
93
+ max_features=5000,
94
+ stop_words='english',
95
+ ngram_range=(1, 2)
96
+ )
97
+ self._tfidf_vectors = self._tfidf_vectorizer.fit_transform(documents)
98
+
99
+ except ImportError:
100
+ # sklearn not available - skip semantic search
101
+ pass
102
+
103
+ def _load_graph_engine(self):
104
+ """Lazy load graph engine to avoid circular imports."""
105
+ if self._graph_engine is None:
106
+ try:
107
+ from graph_engine import GraphEngine
108
+ self._graph_engine = GraphEngine(self.db_path)
109
+ except ImportError:
110
+ # Graph engine not available
111
+ pass
112
+ return self._graph_engine