superlocalmemory 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/ATTRIBUTION.md +140 -0
  2. package/CHANGELOG.md +1749 -0
  3. package/LICENSE +21 -0
  4. package/README.md +600 -0
  5. package/bin/aider-smart +72 -0
  6. package/bin/slm +202 -0
  7. package/bin/slm-npm +73 -0
  8. package/bin/slm.bat +195 -0
  9. package/bin/slm.cmd +10 -0
  10. package/bin/superlocalmemoryv2:list +3 -0
  11. package/bin/superlocalmemoryv2:profile +3 -0
  12. package/bin/superlocalmemoryv2:recall +3 -0
  13. package/bin/superlocalmemoryv2:remember +3 -0
  14. package/bin/superlocalmemoryv2:reset +3 -0
  15. package/bin/superlocalmemoryv2:status +3 -0
  16. package/completions/slm.bash +58 -0
  17. package/completions/slm.zsh +76 -0
  18. package/configs/antigravity-mcp.json +13 -0
  19. package/configs/chatgpt-desktop-mcp.json +7 -0
  20. package/configs/claude-desktop-mcp.json +15 -0
  21. package/configs/codex-mcp.toml +13 -0
  22. package/configs/cody-commands.json +29 -0
  23. package/configs/continue-mcp.yaml +14 -0
  24. package/configs/continue-skills.yaml +26 -0
  25. package/configs/cursor-mcp.json +15 -0
  26. package/configs/gemini-cli-mcp.json +11 -0
  27. package/configs/jetbrains-mcp.json +11 -0
  28. package/configs/opencode-mcp.json +12 -0
  29. package/configs/perplexity-mcp.json +9 -0
  30. package/configs/vscode-copilot-mcp.json +12 -0
  31. package/configs/windsurf-mcp.json +16 -0
  32. package/configs/zed-mcp.json +12 -0
  33. package/docs/ARCHITECTURE.md +877 -0
  34. package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
  35. package/docs/COMPETITIVE-ANALYSIS.md +210 -0
  36. package/docs/COMPRESSION-README.md +390 -0
  37. package/docs/GRAPH-ENGINE.md +503 -0
  38. package/docs/MCP-MANUAL-SETUP.md +720 -0
  39. package/docs/MCP-TROUBLESHOOTING.md +787 -0
  40. package/docs/PATTERN-LEARNING.md +363 -0
  41. package/docs/PROFILES-GUIDE.md +453 -0
  42. package/docs/RESET-GUIDE.md +353 -0
  43. package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
  44. package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
  45. package/docs/UI-SERVER.md +254 -0
  46. package/docs/UNIVERSAL-INTEGRATION.md +432 -0
  47. package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
  48. package/docs/WINDOWS-INSTALL-README.txt +34 -0
  49. package/docs/WINDOWS-POST-INSTALL.txt +45 -0
  50. package/docs/example_graph_usage.py +148 -0
  51. package/hooks/memory-list-skill.js +130 -0
  52. package/hooks/memory-profile-skill.js +284 -0
  53. package/hooks/memory-recall-skill.js +109 -0
  54. package/hooks/memory-remember-skill.js +127 -0
  55. package/hooks/memory-reset-skill.js +274 -0
  56. package/install-skills.sh +436 -0
  57. package/install.ps1 +417 -0
  58. package/install.sh +755 -0
  59. package/mcp_server.py +585 -0
  60. package/package.json +94 -0
  61. package/requirements-core.txt +24 -0
  62. package/requirements.txt +10 -0
  63. package/scripts/postinstall.js +126 -0
  64. package/scripts/preuninstall.js +57 -0
  65. package/skills/slm-build-graph/SKILL.md +423 -0
  66. package/skills/slm-list-recent/SKILL.md +348 -0
  67. package/skills/slm-recall/SKILL.md +325 -0
  68. package/skills/slm-remember/SKILL.md +194 -0
  69. package/skills/slm-status/SKILL.md +363 -0
  70. package/skills/slm-switch-profile/SKILL.md +442 -0
  71. package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
  72. package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
  73. package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
  74. package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
  75. package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
  76. package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
  77. package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
  78. package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
  79. package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
  80. package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
  81. package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
  82. package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
  83. package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
  84. package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
  85. package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
  86. package/src/cache_manager.py +520 -0
  87. package/src/embedding_engine.py +671 -0
  88. package/src/graph_engine.py +970 -0
  89. package/src/hnsw_index.py +626 -0
  90. package/src/hybrid_search.py +693 -0
  91. package/src/memory-profiles.py +518 -0
  92. package/src/memory-reset.py +485 -0
  93. package/src/memory_compression.py +999 -0
  94. package/src/memory_store_v2.py +1088 -0
  95. package/src/migrate_v1_to_v2.py +638 -0
  96. package/src/pattern_learner.py +898 -0
  97. package/src/query_optimizer.py +513 -0
  98. package/src/search_engine_v2.py +403 -0
  99. package/src/setup_validator.py +479 -0
  100. package/src/tree_manager.py +720 -0
@@ -0,0 +1,693 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SuperLocalMemory V2 - Hybrid Search System
4
+
5
+ Copyright (c) 2026 Varun Pratap Bhardwaj
6
+ Solution Architect & Original Creator
7
+
8
+ Licensed under MIT License (see LICENSE file)
9
+ Repository: https://github.com/varun369/SuperLocalMemoryV2
10
+
11
+ ATTRIBUTION REQUIRED: This notice must be preserved in all copies.
12
+ """
13
+
14
+ """
15
+ Hybrid Search System - Multi-Method Retrieval Fusion
16
+
17
+ Combines multiple search methods for optimal retrieval quality:
18
+
19
+ 1. BM25 Keyword Search: Lexical matching with relevance ranking
20
+ - Fast exact term matching
21
+ - Good for technical queries with specific terms
22
+ - Weight: 0.4 (40%)
23
+
24
+ 2. Graph-Based Traversal: Relationship-aware search
25
+ - Finds related memories via knowledge graph
26
+ - Good for conceptual/thematic queries
27
+ - Weight: 0.3 (30%)
28
+
29
+ 3. TF-IDF Semantic Search: Distributional similarity
30
+ - Captures semantic relationships
31
+ - Good for natural language queries
32
+ - Weight: 0.3 (30%)
33
+
34
+ 4. Optional Embedding Search: Dense vector similarity
35
+ - Best semantic understanding (if available)
36
+ - Requires sentence-transformers
37
+ - Can replace or augment TF-IDF
38
+
39
+ Fusion Methods:
40
+ - Reciprocal Rank Fusion (RRF): Rank-based combination
41
+ - Weighted Score Fusion: Normalized score combination
42
+ - Hybrid: Adaptive based on query characteristics
43
+
44
+ Performance Target: <50ms for 1K memories (hybrid mode)
45
+
46
+ Usage:
47
+ hybrid = HybridSearchEngine(memory_store, bm25_engine, graph_engine)
48
+ results = hybrid.search(
49
+ query="authentication bug",
50
+ method="weighted",
51
+ weights={'bm25': 0.4, 'graph': 0.3, 'semantic': 0.3}
52
+ )
53
+ """
54
+
55
+ import time
56
+ import math
57
+ from collections import defaultdict
58
+ from typing import List, Dict, Tuple, Optional, Any, Set
59
+ from pathlib import Path
60
+ import sqlite3
61
+
62
+ # Import local modules
63
+ from search_engine_v2 import BM25SearchEngine
64
+ from query_optimizer import QueryOptimizer
65
+ from cache_manager import CacheManager
66
+
67
+
68
+ class HybridSearchEngine:
69
+ """
70
+ Hybrid search combining BM25, graph traversal, and semantic search.
71
+
72
+ Provides flexible retrieval strategies based on query type and
73
+ available resources.
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ db_path: Path,
79
+ bm25_engine: Optional[BM25SearchEngine] = None,
80
+ query_optimizer: Optional[QueryOptimizer] = None,
81
+ cache_manager: Optional[CacheManager] = None,
82
+ enable_cache: bool = True
83
+ ):
84
+ """
85
+ Initialize hybrid search engine.
86
+
87
+ Args:
88
+ db_path: Path to memory database
89
+ bm25_engine: Pre-configured BM25 engine (will create if None)
90
+ query_optimizer: Query optimizer instance (will create if None)
91
+ cache_manager: Cache manager instance (will create if None)
92
+ enable_cache: Enable result caching
93
+ """
94
+ self.db_path = db_path
95
+
96
+ # Initialize components
97
+ self.bm25 = bm25_engine or BM25SearchEngine()
98
+ self.optimizer = query_optimizer or QueryOptimizer()
99
+ self.cache = cache_manager if enable_cache else None
100
+
101
+ # Graph engine (lazy load to avoid circular dependencies)
102
+ self._graph_engine = None
103
+
104
+ # TF-IDF fallback (from memory_store_v2)
105
+ self._tfidf_vectorizer = None
106
+ self._tfidf_vectors = None
107
+ self._memory_ids = []
108
+
109
+ # Performance tracking
110
+ self.last_search_time = 0.0
111
+ self.last_fusion_time = 0.0
112
+
113
+ # Load index
114
+ self._load_index()
115
+
116
+ def _load_index(self):
117
+ """
118
+ Load documents from database and build search indexes.
119
+ """
120
+ conn = sqlite3.connect(self.db_path)
121
+ cursor = conn.cursor()
122
+
123
+ # Fetch all memories
124
+ cursor.execute('''
125
+ SELECT id, content, summary, tags
126
+ FROM memories
127
+ ORDER BY id
128
+ ''')
129
+
130
+ rows = cursor.fetchall()
131
+ conn.close()
132
+
133
+ if not rows:
134
+ return
135
+
136
+ # Build BM25 index
137
+ doc_ids = [row[0] for row in rows]
138
+ documents = []
139
+ vocabulary = set()
140
+
141
+ for row in rows:
142
+ # Combine content + summary + tags for indexing
143
+ text_parts = [row[1]] # content
144
+
145
+ if row[2]: # summary
146
+ text_parts.append(row[2])
147
+
148
+ if row[3]: # tags (JSON)
149
+ import json
150
+ try:
151
+ tags = json.loads(row[3])
152
+ text_parts.extend(tags)
153
+ except:
154
+ pass
155
+
156
+ doc_text = ' '.join(text_parts)
157
+ documents.append(doc_text)
158
+
159
+ # Build vocabulary for spell correction
160
+ tokens = self.bm25._tokenize(doc_text)
161
+ vocabulary.update(tokens)
162
+
163
+ # Index with BM25
164
+ self.bm25.index_documents(documents, doc_ids)
165
+ self._memory_ids = doc_ids
166
+
167
+ # Initialize optimizer with vocabulary
168
+ self.optimizer.vocabulary = vocabulary
169
+
170
+ # Build co-occurrence for query expansion
171
+ tokenized_docs = [self.bm25._tokenize(doc) for doc in documents]
172
+ self.optimizer.build_cooccurrence_matrix(tokenized_docs)
173
+
174
+ # Try to load TF-IDF (optional semantic search)
175
+ try:
176
+ from sklearn.feature_extraction.text import TfidfVectorizer
177
+ from sklearn.metrics.pairwise import cosine_similarity
178
+ import numpy as np
179
+
180
+ self._tfidf_vectorizer = TfidfVectorizer(
181
+ max_features=5000,
182
+ stop_words='english',
183
+ ngram_range=(1, 2)
184
+ )
185
+ self._tfidf_vectors = self._tfidf_vectorizer.fit_transform(documents)
186
+
187
+ except ImportError:
188
+ # sklearn not available - skip semantic search
189
+ pass
190
+
191
+ def _load_graph_engine(self):
192
+ """Lazy load graph engine to avoid circular imports."""
193
+ if self._graph_engine is None:
194
+ try:
195
+ from graph_engine import GraphEngine
196
+ self._graph_engine = GraphEngine(self.db_path)
197
+ except ImportError:
198
+ # Graph engine not available
199
+ pass
200
+ return self._graph_engine
201
+
202
+ def search_bm25(
203
+ self,
204
+ query: str,
205
+ limit: int = 10,
206
+ score_threshold: float = 0.0
207
+ ) -> List[Tuple[int, float]]:
208
+ """
209
+ Search using BM25 keyword matching.
210
+
211
+ Args:
212
+ query: Search query
213
+ limit: Maximum results
214
+ score_threshold: Minimum score threshold
215
+
216
+ Returns:
217
+ List of (memory_id, score) tuples
218
+ """
219
+ # Optimize query
220
+ optimized = self.optimizer.optimize(
221
+ query,
222
+ enable_spell_correction=True,
223
+ enable_expansion=False # Expansion can hurt precision
224
+ )
225
+
226
+ # Search with BM25
227
+ results = self.bm25.search(optimized, limit, score_threshold)
228
+
229
+ return results
230
+
231
+ def search_semantic(
232
+ self,
233
+ query: str,
234
+ limit: int = 10,
235
+ score_threshold: float = 0.05
236
+ ) -> List[Tuple[int, float]]:
237
+ """
238
+ Search using TF-IDF semantic similarity.
239
+
240
+ Args:
241
+ query: Search query
242
+ limit: Maximum results
243
+ score_threshold: Minimum similarity threshold
244
+
245
+ Returns:
246
+ List of (memory_id, score) tuples
247
+ """
248
+ if self._tfidf_vectorizer is None or self._tfidf_vectors is None:
249
+ return []
250
+
251
+ try:
252
+ from sklearn.metrics.pairwise import cosine_similarity
253
+ import numpy as np
254
+
255
+ # Vectorize query
256
+ query_vec = self._tfidf_vectorizer.transform([query])
257
+
258
+ # Calculate similarities
259
+ similarities = cosine_similarity(query_vec, self._tfidf_vectors).flatten()
260
+
261
+ # Get top results above threshold
262
+ results = []
263
+ for idx, score in enumerate(similarities):
264
+ if score >= score_threshold:
265
+ memory_id = self._memory_ids[idx]
266
+ results.append((memory_id, float(score)))
267
+
268
+ # Sort by score and limit
269
+ results.sort(key=lambda x: x[1], reverse=True)
270
+ return results[:limit]
271
+
272
+ except Exception as e:
273
+ # Fallback gracefully
274
+ return []
275
+
276
+ def search_graph(
277
+ self,
278
+ query: str,
279
+ limit: int = 10,
280
+ max_depth: int = 2
281
+ ) -> List[Tuple[int, float]]:
282
+ """
283
+ Search using graph traversal from initial matches.
284
+
285
+ Strategy:
286
+ 1. Get seed memories from BM25
287
+ 2. Traverse graph to find related memories
288
+ 3. Score by distance from seed nodes
289
+
290
+ Args:
291
+ query: Search query
292
+ limit: Maximum results
293
+ max_depth: Maximum graph traversal depth
294
+
295
+ Returns:
296
+ List of (memory_id, score) tuples
297
+ """
298
+ graph = self._load_graph_engine()
299
+ if graph is None:
300
+ return []
301
+
302
+ # Get seed memories from BM25
303
+ seed_results = self.search_bm25(query, limit=5)
304
+ if not seed_results:
305
+ return []
306
+
307
+ seed_ids = [mem_id for mem_id, _ in seed_results]
308
+
309
+ # Traverse graph from seed nodes
310
+ visited = set(seed_ids)
311
+ results = []
312
+
313
+ # BFS traversal
314
+ queue = [(mem_id, 1.0, 0) for mem_id in seed_ids] # (id, score, depth)
315
+
316
+ while queue and len(results) < limit:
317
+ current_id, current_score, depth = queue.pop(0)
318
+
319
+ if depth > max_depth:
320
+ continue
321
+
322
+ # Add to results
323
+ if current_id not in [r[0] for r in results]:
324
+ results.append((current_id, current_score))
325
+
326
+ # Get related memories from graph
327
+ try:
328
+ related = graph.get_related_memories(current_id, limit=5)
329
+
330
+ for rel_id, similarity in related:
331
+ if rel_id not in visited:
332
+ visited.add(rel_id)
333
+ # Decay score by depth
334
+ new_score = current_score * similarity * (0.7 ** depth)
335
+ queue.append((rel_id, new_score, depth + 1))
336
+
337
+ except:
338
+ # Graph operation failed - skip
339
+ continue
340
+
341
+ return results[:limit]
342
+
343
+ def _normalize_scores(
344
+ self,
345
+ results: List[Tuple[int, float]]
346
+ ) -> List[Tuple[int, float]]:
347
+ """
348
+ Normalize scores to [0, 1] range using min-max normalization.
349
+
350
+ Args:
351
+ results: List of (id, score) tuples
352
+
353
+ Returns:
354
+ Normalized results
355
+ """
356
+ if not results:
357
+ return []
358
+
359
+ scores = [score for _, score in results]
360
+ min_score = min(scores)
361
+ max_score = max(scores)
362
+
363
+ if max_score == min_score:
364
+ # All scores equal - return uniform scores
365
+ return [(id, 1.0) for id, _ in results]
366
+
367
+ normalized = []
368
+ for mem_id, score in results:
369
+ norm_score = (score - min_score) / (max_score - min_score)
370
+ normalized.append((mem_id, norm_score))
371
+
372
+ return normalized
373
+
374
+ def _reciprocal_rank_fusion(
375
+ self,
376
+ results_list: List[List[Tuple[int, float]]],
377
+ k: int = 60
378
+ ) -> List[Tuple[int, float]]:
379
+ """
380
+ Combine multiple result lists using Reciprocal Rank Fusion.
381
+
382
+ RRF formula: score(d) = Σ 1 / (k + rank(d))
383
+
384
+ RRF is rank-based and doesn't depend on score magnitudes,
385
+ making it robust to different scoring scales.
386
+
387
+ Args:
388
+ results_list: List of result lists from different methods
389
+ k: RRF constant (default: 60, standard value)
390
+
391
+ Returns:
392
+ Fused results sorted by RRF score
393
+ """
394
+ # Build rank maps for each method
395
+ rrf_scores = defaultdict(float)
396
+
397
+ for results in results_list:
398
+ for rank, (mem_id, _) in enumerate(results, start=1):
399
+ rrf_scores[mem_id] += 1.0 / (k + rank)
400
+
401
+ # Convert to sorted list
402
+ fused = [(mem_id, score) for mem_id, score in rrf_scores.items()]
403
+ fused.sort(key=lambda x: x[1], reverse=True)
404
+
405
+ return fused
406
+
407
+ def _weighted_fusion(
408
+ self,
409
+ results_dict: Dict[str, List[Tuple[int, float]]],
410
+ weights: Dict[str, float]
411
+ ) -> List[Tuple[int, float]]:
412
+ """
413
+ Combine results using weighted score fusion.
414
+
415
+ Normalizes scores from each method then combines with weights.
416
+
417
+ Args:
418
+ results_dict: Dictionary mapping method name to results
419
+ weights: Dictionary mapping method name to weight
420
+
421
+ Returns:
422
+ Fused results sorted by combined score
423
+ """
424
+ # Normalize scores for each method
425
+ normalized = {}
426
+ for method, results in results_dict.items():
427
+ normalized[method] = self._normalize_scores(results)
428
+
429
+ # Combine with weights
430
+ combined_scores = defaultdict(float)
431
+ max_weight_sum = defaultdict(float) # Track possible max score per doc
432
+
433
+ for method, results in normalized.items():
434
+ weight = weights.get(method, 0.0)
435
+
436
+ for mem_id, score in results:
437
+ combined_scores[mem_id] += weight * score
438
+ max_weight_sum[mem_id] += weight
439
+
440
+ # Normalize by actual weights (some docs may not appear in all methods)
441
+ fused = []
442
+ for mem_id, score in combined_scores.items():
443
+ normalized_score = score / max_weight_sum[mem_id] if max_weight_sum[mem_id] > 0 else 0
444
+ fused.append((mem_id, normalized_score))
445
+
446
+ fused.sort(key=lambda x: x[1], reverse=True)
447
+
448
+ return fused
449
+
450
+ def search(
451
+ self,
452
+ query: str,
453
+ limit: int = 10,
454
+ method: str = "hybrid",
455
+ weights: Optional[Dict[str, float]] = None,
456
+ use_cache: bool = True
457
+ ) -> List[Dict[str, Any]]:
458
+ """
459
+ Hybrid search with multiple retrieval methods.
460
+
461
+ Args:
462
+ query: Search query
463
+ limit: Maximum results
464
+ method: Fusion method ("hybrid", "weighted", "rrf", "bm25", "semantic", "graph")
465
+ weights: Custom weights for weighted fusion (default: balanced)
466
+ use_cache: Use cache for results
467
+
468
+ Returns:
469
+ List of memory dictionaries with scores and match details
470
+ """
471
+ start_time = time.time()
472
+
473
+ # Check cache
474
+ if use_cache and self.cache:
475
+ cached = self.cache.get(query, limit=limit, method=method)
476
+ if cached is not None:
477
+ self.last_search_time = time.time() - start_time
478
+ return cached
479
+
480
+ # Default weights
481
+ if weights is None:
482
+ weights = {
483
+ 'bm25': 0.4,
484
+ 'semantic': 0.3,
485
+ 'graph': 0.3
486
+ }
487
+
488
+ # Single method search
489
+ if method == "bm25":
490
+ raw_results = self.search_bm25(query, limit)
491
+ elif method == "semantic":
492
+ raw_results = self.search_semantic(query, limit)
493
+ elif method == "graph":
494
+ raw_results = self.search_graph(query, limit)
495
+
496
+ # Multi-method fusion
497
+ else:
498
+ fusion_start = time.time()
499
+
500
+ # Get results from all methods
501
+ results_dict = {}
502
+
503
+ if weights.get('bm25', 0) > 0:
504
+ results_dict['bm25'] = self.search_bm25(query, limit=limit*2)
505
+
506
+ if weights.get('semantic', 0) > 0:
507
+ results_dict['semantic'] = self.search_semantic(query, limit=limit*2)
508
+
509
+ if weights.get('graph', 0) > 0:
510
+ results_dict['graph'] = self.search_graph(query, limit=limit*2)
511
+
512
+ # Fusion
513
+ if method == "rrf":
514
+ raw_results = self._reciprocal_rank_fusion(list(results_dict.values()))
515
+ else: # weighted or hybrid
516
+ raw_results = self._weighted_fusion(results_dict, weights)
517
+
518
+ self.last_fusion_time = time.time() - fusion_start
519
+
520
+ # Limit results
521
+ raw_results = raw_results[:limit]
522
+
523
+ # Fetch full memory details
524
+ results = self._fetch_memory_details(raw_results, query)
525
+
526
+ # Cache results
527
+ if use_cache and self.cache:
528
+ self.cache.put(query, results, limit=limit, method=method)
529
+
530
+ self.last_search_time = time.time() - start_time
531
+
532
+ return results
533
+
534
+ def _fetch_memory_details(
535
+ self,
536
+ raw_results: List[Tuple[int, float]],
537
+ query: str
538
+ ) -> List[Dict[str, Any]]:
539
+ """
540
+ Fetch full memory details for result IDs.
541
+
542
+ Args:
543
+ raw_results: List of (memory_id, score) tuples
544
+ query: Original query (for context)
545
+
546
+ Returns:
547
+ List of memory dictionaries with full details
548
+ """
549
+ if not raw_results:
550
+ return []
551
+
552
+ memory_ids = [mem_id for mem_id, _ in raw_results]
553
+ id_to_score = {mem_id: score for mem_id, score in raw_results}
554
+
555
+ conn = sqlite3.connect(self.db_path)
556
+ cursor = conn.cursor()
557
+
558
+ # Fetch memories
559
+ placeholders = ','.join(['?'] * len(memory_ids))
560
+ cursor.execute(f'''
561
+ SELECT id, content, summary, project_path, project_name, tags,
562
+ category, parent_id, tree_path, depth, memory_type,
563
+ importance, created_at, cluster_id, last_accessed, access_count
564
+ FROM memories
565
+ WHERE id IN ({placeholders})
566
+ ''', memory_ids)
567
+
568
+ rows = cursor.fetchall()
569
+ conn.close()
570
+
571
+ # Build result dictionaries
572
+ results = []
573
+ for row in rows:
574
+ import json
575
+
576
+ mem_id = row[0]
577
+ results.append({
578
+ 'id': mem_id,
579
+ 'content': row[1],
580
+ 'summary': row[2],
581
+ 'project_path': row[3],
582
+ 'project_name': row[4],
583
+ 'tags': json.loads(row[5]) if row[5] else [],
584
+ 'category': row[6],
585
+ 'parent_id': row[7],
586
+ 'tree_path': row[8],
587
+ 'depth': row[9],
588
+ 'memory_type': row[10],
589
+ 'importance': row[11],
590
+ 'created_at': row[12],
591
+ 'cluster_id': row[13],
592
+ 'last_accessed': row[14],
593
+ 'access_count': row[15],
594
+ 'score': id_to_score.get(mem_id, 0.0),
595
+ 'match_type': 'hybrid'
596
+ })
597
+
598
+ # Sort by score
599
+ results.sort(key=lambda x: x['score'], reverse=True)
600
+
601
+ return results
602
+
603
+ def get_stats(self) -> Dict[str, Any]:
604
+ """
605
+ Get hybrid search statistics.
606
+
607
+ Returns:
608
+ Dictionary with performance stats
609
+ """
610
+ stats = {
611
+ 'bm25': self.bm25.get_stats(),
612
+ 'optimizer': self.optimizer.get_stats(),
613
+ 'last_search_time_ms': self.last_search_time * 1000,
614
+ 'last_fusion_time_ms': self.last_fusion_time * 1000,
615
+ 'tfidf_available': self._tfidf_vectorizer is not None,
616
+ 'graph_available': self._graph_engine is not None
617
+ }
618
+
619
+ if self.cache:
620
+ stats['cache'] = self.cache.get_stats()
621
+
622
+ return stats
623
+
624
+
625
+ # CLI interface for testing
626
+ if __name__ == "__main__":
627
+ import sys
628
+ from pathlib import Path
629
+
630
+ print("Hybrid Search Engine - Demo")
631
+ print("=" * 60)
632
+
633
+ # Use test database or default
634
+ db_path = Path.home() / ".claude-memory" / "memory.db"
635
+
636
+ if not db_path.exists():
637
+ print(f"Error: Database not found at {db_path}")
638
+ print("Please run memory_store_v2.py to create database first.")
639
+ sys.exit(1)
640
+
641
+ # Initialize hybrid search
642
+ print(f"\nInitializing hybrid search engine...")
643
+ print(f"Database: {db_path}")
644
+
645
+ hybrid = HybridSearchEngine(db_path, enable_cache=True)
646
+
647
+ stats = hybrid.get_stats()
648
+ print(f"\nāœ“ Indexed {stats['bm25']['num_documents']} memories")
649
+ print(f" Vocabulary: {stats['bm25']['vocabulary_size']} terms")
650
+ print(f" TF-IDF: {'Available' if stats['tfidf_available'] else 'Not available'}")
651
+ print(f" Graph: {'Available' if stats['graph_available'] else 'Not available'}")
652
+
653
+ # Test search
654
+ if len(sys.argv) > 1:
655
+ query = ' '.join(sys.argv[1:])
656
+ else:
657
+ query = "python web development"
658
+
659
+ print("\n" + "=" * 60)
660
+ print(f"Search Query: '{query}'")
661
+ print("=" * 60)
662
+
663
+ # Test different methods
664
+ methods = ["bm25", "hybrid"]
665
+
666
+ for method in methods:
667
+ print(f"\nMethod: {method.upper()}")
668
+ results = hybrid.search(query, limit=5, method=method)
669
+
670
+ print(f" Found {len(results)} results in {hybrid.last_search_time*1000:.2f}ms")
671
+
672
+ for i, mem in enumerate(results, 1):
673
+ print(f"\n [{i}] Score: {mem['score']:.3f} | ID: {mem['id']}")
674
+ if mem.get('category'):
675
+ print(f" Category: {mem['category']}")
676
+ if mem.get('tags'):
677
+ print(f" Tags: {', '.join(mem['tags'][:3])}")
678
+ print(f" Content: {mem['content'][:100]}...")
679
+
680
+ # Display final stats
681
+ print("\n" + "=" * 60)
682
+ print("Performance Summary:")
683
+ print("=" * 60)
684
+
685
+ final_stats = hybrid.get_stats()
686
+ print(f" Last search time: {final_stats['last_search_time_ms']:.2f}ms")
687
+ print(f" Last fusion time: {final_stats['last_fusion_time_ms']:.2f}ms")
688
+ print(f" Target: <50ms for 1K memories {'āœ“' if final_stats['last_search_time_ms'] < 50 else 'āœ—'}")
689
+
690
+ if 'cache' in final_stats:
691
+ cache_stats = final_stats['cache']
692
+ print(f"\n Cache hit rate: {cache_stats['hit_rate']*100:.1f}%")
693
+ print(f" Cache size: {cache_stats['current_size']}/{cache_stats['max_size']}")