superlocalmemory 3.3.20 → 3.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +9 -1
- package/src/superlocalmemory/cli/commands.py +138 -22
- package/src/superlocalmemory/cli/daemon.py +372 -0
- package/src/superlocalmemory/cli/main.py +8 -0
- package/src/superlocalmemory/cli/pending_store.py +158 -0
- package/src/superlocalmemory/cli/setup_wizard.py +39 -6
- package/src/superlocalmemory/code_graph/__init__.py +46 -0
- package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
- package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
- package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
- package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
- package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
- package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
- package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
- package/src/superlocalmemory/code_graph/changes.py +363 -0
- package/src/superlocalmemory/code_graph/communities.py +299 -0
- package/src/superlocalmemory/code_graph/config.py +88 -0
- package/src/superlocalmemory/code_graph/database.py +482 -0
- package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
- package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
- package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
- package/src/superlocalmemory/code_graph/flows.py +350 -0
- package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
- package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
- package/src/superlocalmemory/code_graph/graph_store.py +158 -0
- package/src/superlocalmemory/code_graph/incremental.py +200 -0
- package/src/superlocalmemory/code_graph/models.py +130 -0
- package/src/superlocalmemory/code_graph/parser.py +507 -0
- package/src/superlocalmemory/code_graph/resolver.py +321 -0
- package/src/superlocalmemory/code_graph/search.py +460 -0
- package/src/superlocalmemory/code_graph/service.py +95 -0
- package/src/superlocalmemory/code_graph/watcher.py +207 -0
- package/src/superlocalmemory/core/embedding_worker.py +4 -2
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +32 -0
- package/src/superlocalmemory/core/engine_wiring.py +5 -0
- package/src/superlocalmemory/core/store_pipeline.py +23 -1
- package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
- package/src/superlocalmemory/infra/event_bus.py +5 -0
- package/src/superlocalmemory/mcp/server.py +23 -0
- package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
- package/src/superlocalmemory/retrieval/engine.py +137 -2
- package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
- package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
- package/src/superlocalmemory/retrieval/strategy.py +16 -0
- package/src/superlocalmemory/server/api.py +4 -2
- package/src/superlocalmemory/server/ui.py +5 -2
- package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
- package/src/superlocalmemory/ui/index.html +1879 -0
- package/src/superlocalmemory/ui/js/agents.js +192 -0
- package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
- package/src/superlocalmemory/ui/js/behavioral.js +276 -0
- package/src/superlocalmemory/ui/js/clusters.js +206 -0
- package/src/superlocalmemory/ui/js/compliance.js +252 -0
- package/src/superlocalmemory/ui/js/core.js +246 -0
- package/src/superlocalmemory/ui/js/dashboard.js +110 -0
- package/src/superlocalmemory/ui/js/events.js +178 -0
- package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
- package/src/superlocalmemory/ui/js/feedback.js +333 -0
- package/src/superlocalmemory/ui/js/graph-core.js +447 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
- package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
- package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
- package/src/superlocalmemory/ui/js/ide-status.js +102 -0
- package/src/superlocalmemory/ui/js/init.js +45 -0
- package/src/superlocalmemory/ui/js/learning.js +435 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
- package/src/superlocalmemory/ui/js/math-health.js +98 -0
- package/src/superlocalmemory/ui/js/memories.js +264 -0
- package/src/superlocalmemory/ui/js/modal.js +357 -0
- package/src/superlocalmemory/ui/js/patterns.js +93 -0
- package/src/superlocalmemory/ui/js/profiles.js +236 -0
- package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
- package/src/superlocalmemory/ui/js/search.js +59 -0
- package/src/superlocalmemory/ui/js/settings.js +224 -0
- package/src/superlocalmemory/ui/js/timeline.js +32 -0
- package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""HybridSearch — FTS5 + vec0 + Reciprocal Rank Fusion.
|
|
6
|
+
|
|
7
|
+
Provides text search (FTS5 BM25), optional semantic search (vec0 cosine),
|
|
8
|
+
and hybrid fusion via RRF (k=60). Kind boosting for functions/methods.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from superlocalmemory.code_graph.database import CodeGraphDatabase
|
|
20
|
+
from superlocalmemory.code_graph.models import NodeKind
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Result dataclass
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class SearchResult:
|
|
31
|
+
"""Single hybrid search result."""
|
|
32
|
+
node_id: str
|
|
33
|
+
qualified_name: str
|
|
34
|
+
name: str
|
|
35
|
+
kind: str
|
|
36
|
+
file_path: str
|
|
37
|
+
line_start: int
|
|
38
|
+
score: float
|
|
39
|
+
match_source: str # "fts5", "vector", "both", "keyword"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Constants
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
_RRF_K = 60
|
|
47
|
+
_FTS5_META_CHARS = re.compile(r'([*"():\^{}+|])')
|
|
48
|
+
_MAX_QUERY_LEN = 500
|
|
49
|
+
|
|
50
|
+
# Kind boost multipliers
|
|
51
|
+
_KIND_BOOST: dict[str, float] = {
|
|
52
|
+
NodeKind.FUNCTION.value: 1.3,
|
|
53
|
+
NodeKind.METHOD.value: 1.3,
|
|
54
|
+
NodeKind.CLASS.value: 1.1,
|
|
55
|
+
NodeKind.FILE.value: 0.8,
|
|
56
|
+
NodeKind.MODULE.value: 0.9,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# HybridSearch
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
class HybridSearch:
|
|
65
|
+
"""FTS5 + vec0 hybrid search over graph nodes.
|
|
66
|
+
|
|
67
|
+
Falls back gracefully:
|
|
68
|
+
- If vec0 unavailable: FTS5-only
|
|
69
|
+
- If FTS5 returns nothing: keyword LIKE fallback
|
|
70
|
+
- If graph is empty: returns []
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, db: CodeGraphDatabase) -> None:
|
|
74
|
+
self._db = db
|
|
75
|
+
|
|
76
|
+
# ------------------------------------------------------------------
|
|
77
|
+
# Public API
|
|
78
|
+
# ------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
def search(self, query: str, limit: int = 20) -> list[SearchResult]:
|
|
81
|
+
"""FTS5 text search on graph_nodes_fts.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
query: Search query string.
|
|
85
|
+
limit: Maximum results to return.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of SearchResult sorted by BM25 rank (best first).
|
|
89
|
+
"""
|
|
90
|
+
if not query or not query.strip():
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
fts_results = self._fts5_search(query)
|
|
94
|
+
if not fts_results:
|
|
95
|
+
return self._keyword_fallback(query, limit)
|
|
96
|
+
|
|
97
|
+
return self._build_results(
|
|
98
|
+
scores=fts_results,
|
|
99
|
+
fts_ids=set(fts_results.keys()),
|
|
100
|
+
vec_ids=set(),
|
|
101
|
+
limit=limit,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def search_semantic(
|
|
105
|
+
self, query_embedding: list[float], limit: int = 20
|
|
106
|
+
) -> list[SearchResult]:
|
|
107
|
+
"""vec0 cosine similarity search (if embeddings exist).
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
query_embedding: Pre-computed query embedding vector.
|
|
111
|
+
limit: Maximum results to return.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
List of SearchResult sorted by cosine similarity.
|
|
115
|
+
"""
|
|
116
|
+
vec_results = self._vec0_search(query_embedding)
|
|
117
|
+
if not vec_results:
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
return self._build_results(
|
|
121
|
+
scores=vec_results,
|
|
122
|
+
fts_ids=set(),
|
|
123
|
+
vec_ids=set(vec_results.keys()),
|
|
124
|
+
limit=limit,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def search_hybrid(
|
|
128
|
+
self,
|
|
129
|
+
query: str,
|
|
130
|
+
limit: int = 20,
|
|
131
|
+
query_embedding: list[float] | None = None,
|
|
132
|
+
) -> list[SearchResult]:
|
|
133
|
+
"""FTS5 + vec0 merged via Reciprocal Rank Fusion (k=60).
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
query: Text query for FTS5.
|
|
137
|
+
limit: Maximum results to return.
|
|
138
|
+
query_embedding: Optional embedding for vec0 search.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of SearchResult with RRF-fused scores.
|
|
142
|
+
"""
|
|
143
|
+
if not query or not query.strip():
|
|
144
|
+
return []
|
|
145
|
+
|
|
146
|
+
fts_results = self._fts5_search(query)
|
|
147
|
+
vec_results: dict[str, float] = {}
|
|
148
|
+
if query_embedding is not None:
|
|
149
|
+
vec_results = self._vec0_search(query_embedding)
|
|
150
|
+
|
|
151
|
+
fts_ids = set(fts_results.keys())
|
|
152
|
+
vec_ids = set(vec_results.keys())
|
|
153
|
+
|
|
154
|
+
# If both empty, fall back to keyword
|
|
155
|
+
if not fts_results and not vec_results:
|
|
156
|
+
return self._keyword_fallback(query, limit)
|
|
157
|
+
|
|
158
|
+
# RRF fusion
|
|
159
|
+
rrf_scores = self._rrf_fuse(fts_results, vec_results)
|
|
160
|
+
|
|
161
|
+
# Kind boosting
|
|
162
|
+
rrf_scores = self._apply_kind_boost(rrf_scores, query)
|
|
163
|
+
|
|
164
|
+
return self._build_results(
|
|
165
|
+
scores=rrf_scores,
|
|
166
|
+
fts_ids=fts_ids,
|
|
167
|
+
vec_ids=vec_ids,
|
|
168
|
+
limit=limit,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# ------------------------------------------------------------------
|
|
172
|
+
# Internal: FTS5
|
|
173
|
+
# ------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
def _fts5_search(self, query: str) -> dict[str, float]:
|
|
176
|
+
"""Run FTS5 BM25 search. Returns {node_id: score}."""
|
|
177
|
+
sanitized = _sanitize_fts_query(query)
|
|
178
|
+
if not sanitized:
|
|
179
|
+
return {}
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
rows = self._db.execute(
|
|
183
|
+
"""SELECT gn.node_id, gn.name, gn.kind, fts.rank
|
|
184
|
+
FROM graph_nodes_fts fts
|
|
185
|
+
JOIN graph_nodes gn ON gn.rowid = fts.rowid
|
|
186
|
+
WHERE graph_nodes_fts MATCH ?
|
|
187
|
+
ORDER BY fts.rank
|
|
188
|
+
LIMIT 100""",
|
|
189
|
+
(sanitized,),
|
|
190
|
+
)
|
|
191
|
+
except Exception as exc:
|
|
192
|
+
logger.debug("FTS5 search failed: %s", exc)
|
|
193
|
+
return {}
|
|
194
|
+
|
|
195
|
+
# rank is negative in FTS5 (lower = better), negate for higher=better
|
|
196
|
+
results: dict[str, float] = {}
|
|
197
|
+
for row in rows:
|
|
198
|
+
results[row["node_id"]] = -row["rank"]
|
|
199
|
+
return results
|
|
200
|
+
|
|
201
|
+
# ------------------------------------------------------------------
|
|
202
|
+
# Internal: vec0
|
|
203
|
+
# ------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
def _vec0_search(self, embedding: list[float]) -> dict[str, float]:
|
|
206
|
+
"""Run vec0 cosine similarity search. Returns {node_id: similarity}."""
|
|
207
|
+
try:
|
|
208
|
+
import json
|
|
209
|
+
rows = self._db.execute(
|
|
210
|
+
"""SELECT node_id, distance
|
|
211
|
+
FROM code_node_embeddings
|
|
212
|
+
WHERE embedding MATCH ?
|
|
213
|
+
ORDER BY distance
|
|
214
|
+
LIMIT 100""",
|
|
215
|
+
(json.dumps(embedding),),
|
|
216
|
+
)
|
|
217
|
+
except Exception as exc:
|
|
218
|
+
logger.debug("vec0 search failed (expected if no embeddings): %s", exc)
|
|
219
|
+
return {}
|
|
220
|
+
|
|
221
|
+
results: dict[str, float] = {}
|
|
222
|
+
for row in rows:
|
|
223
|
+
# cosine distance: 0=identical, 2=opposite. similarity = 1.0 - distance
|
|
224
|
+
results[row["node_id"]] = 1.0 - row["distance"]
|
|
225
|
+
return results
|
|
226
|
+
|
|
227
|
+
# ------------------------------------------------------------------
|
|
228
|
+
# Internal: Keyword fallback
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
|
|
231
|
+
def _keyword_fallback(self, query: str, limit: int) -> list[SearchResult]:
|
|
232
|
+
"""LIKE-based keyword search as last resort."""
|
|
233
|
+
words = query.lower().split()[:10] # Cap at 10 words
|
|
234
|
+
if not words:
|
|
235
|
+
return []
|
|
236
|
+
|
|
237
|
+
conditions = []
|
|
238
|
+
params: list[str] = []
|
|
239
|
+
for word in words:
|
|
240
|
+
conditions.append(
|
|
241
|
+
"(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)"
|
|
242
|
+
)
|
|
243
|
+
pattern = f"%{word}%"
|
|
244
|
+
params.extend([pattern, pattern])
|
|
245
|
+
|
|
246
|
+
where_clause = " AND ".join(conditions)
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
rows = self._db.execute(
|
|
250
|
+
f"""SELECT node_id, name, qualified_name, kind, file_path, line_start
|
|
251
|
+
FROM graph_nodes
|
|
252
|
+
WHERE {where_clause}
|
|
253
|
+
LIMIT 100""",
|
|
254
|
+
tuple(params),
|
|
255
|
+
)
|
|
256
|
+
except Exception as exc:
|
|
257
|
+
logger.debug("Keyword fallback failed: %s", exc)
|
|
258
|
+
return []
|
|
259
|
+
|
|
260
|
+
results: list[SearchResult] = []
|
|
261
|
+
for row in rows:
|
|
262
|
+
score = _keyword_score(row["name"], row["qualified_name"], words)
|
|
263
|
+
results.append(SearchResult(
|
|
264
|
+
node_id=row["node_id"],
|
|
265
|
+
qualified_name=row["qualified_name"],
|
|
266
|
+
name=row["name"],
|
|
267
|
+
kind=row["kind"],
|
|
268
|
+
file_path=row["file_path"],
|
|
269
|
+
line_start=row["line_start"],
|
|
270
|
+
score=score,
|
|
271
|
+
match_source="keyword",
|
|
272
|
+
))
|
|
273
|
+
|
|
274
|
+
results.sort(key=lambda r: -r.score)
|
|
275
|
+
return results[:limit]
|
|
276
|
+
|
|
277
|
+
# ------------------------------------------------------------------
|
|
278
|
+
# Internal: RRF fusion
|
|
279
|
+
# ------------------------------------------------------------------
|
|
280
|
+
|
|
281
|
+
def _rrf_fuse(
|
|
282
|
+
self,
|
|
283
|
+
fts_results: dict[str, float],
|
|
284
|
+
vec_results: dict[str, float],
|
|
285
|
+
) -> dict[str, float]:
|
|
286
|
+
"""Reciprocal Rank Fusion with k=60."""
|
|
287
|
+
rrf_scores: dict[str, float] = defaultdict(float)
|
|
288
|
+
|
|
289
|
+
# Sort FTS results by score descending
|
|
290
|
+
fts_ranked = sorted(fts_results.items(), key=lambda x: -x[1])
|
|
291
|
+
for rank_pos, (node_id, _) in enumerate(fts_ranked):
|
|
292
|
+
rrf_scores[node_id] += 1.0 / (_RRF_K + rank_pos + 1)
|
|
293
|
+
|
|
294
|
+
# Sort vec results by score descending
|
|
295
|
+
vec_ranked = sorted(vec_results.items(), key=lambda x: -x[1])
|
|
296
|
+
for rank_pos, (node_id, _) in enumerate(vec_ranked):
|
|
297
|
+
rrf_scores[node_id] += 1.0 / (_RRF_K + rank_pos + 1)
|
|
298
|
+
|
|
299
|
+
return dict(rrf_scores)
|
|
300
|
+
|
|
301
|
+
# ------------------------------------------------------------------
|
|
302
|
+
# Internal: Kind boosting
|
|
303
|
+
# ------------------------------------------------------------------
|
|
304
|
+
|
|
305
|
+
def _apply_kind_boost(
|
|
306
|
+
self, scores: dict[str, float], query: str
|
|
307
|
+
) -> dict[str, float]:
|
|
308
|
+
"""Apply kind-based score boosting."""
|
|
309
|
+
if not scores:
|
|
310
|
+
return scores
|
|
311
|
+
|
|
312
|
+
# Load kind info for scored nodes
|
|
313
|
+
node_ids = list(scores.keys())
|
|
314
|
+
kind_map = self._load_node_kinds(node_ids)
|
|
315
|
+
|
|
316
|
+
boosted = dict(scores)
|
|
317
|
+
for node_id, score in scores.items():
|
|
318
|
+
kind = kind_map.get(node_id)
|
|
319
|
+
if kind:
|
|
320
|
+
boost = _KIND_BOOST.get(kind, 1.0)
|
|
321
|
+
boosted[node_id] = score * boost
|
|
322
|
+
|
|
323
|
+
# Additional pattern-based boosts
|
|
324
|
+
is_pascal = bool(re.match(r'^[A-Z][a-z]+([A-Z][a-z]+)+$', query))
|
|
325
|
+
is_snake = bool(re.match(r'^[a-z]+(_[a-z]+)+$', query))
|
|
326
|
+
|
|
327
|
+
if is_pascal:
|
|
328
|
+
for node_id in boosted:
|
|
329
|
+
if kind_map.get(node_id) == NodeKind.CLASS.value:
|
|
330
|
+
boosted[node_id] *= 1.5
|
|
331
|
+
|
|
332
|
+
if is_snake:
|
|
333
|
+
for node_id in boosted:
|
|
334
|
+
kind = kind_map.get(node_id)
|
|
335
|
+
if kind in (NodeKind.FUNCTION.value, NodeKind.METHOD.value):
|
|
336
|
+
boosted[node_id] *= 1.5
|
|
337
|
+
|
|
338
|
+
return boosted
|
|
339
|
+
|
|
340
|
+
def _load_node_kinds(self, node_ids: list[str]) -> dict[str, str]:
|
|
341
|
+
"""Load kind for a list of node IDs."""
|
|
342
|
+
if not node_ids:
|
|
343
|
+
return {}
|
|
344
|
+
|
|
345
|
+
placeholders = ",".join("?" for _ in node_ids)
|
|
346
|
+
try:
|
|
347
|
+
rows = self._db.execute(
|
|
348
|
+
f"SELECT node_id, kind FROM graph_nodes WHERE node_id IN ({placeholders})",
|
|
349
|
+
tuple(node_ids),
|
|
350
|
+
)
|
|
351
|
+
except Exception:
|
|
352
|
+
return {}
|
|
353
|
+
|
|
354
|
+
return {row["node_id"]: row["kind"] for row in rows}
|
|
355
|
+
|
|
356
|
+
# ------------------------------------------------------------------
|
|
357
|
+
# Internal: Build results
|
|
358
|
+
# ------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
def _build_results(
|
|
361
|
+
self,
|
|
362
|
+
scores: dict[str, float],
|
|
363
|
+
fts_ids: set[str],
|
|
364
|
+
vec_ids: set[str],
|
|
365
|
+
limit: int,
|
|
366
|
+
) -> list[SearchResult]:
|
|
367
|
+
"""Load node data and build SearchResult list."""
|
|
368
|
+
if not scores:
|
|
369
|
+
return []
|
|
370
|
+
|
|
371
|
+
# Sort by score descending
|
|
372
|
+
ranked = sorted(scores.items(), key=lambda x: -x[1])[:limit]
|
|
373
|
+
node_ids = [nid for nid, _ in ranked]
|
|
374
|
+
|
|
375
|
+
# Load node details
|
|
376
|
+
node_map = self._load_node_details(node_ids)
|
|
377
|
+
|
|
378
|
+
results: list[SearchResult] = []
|
|
379
|
+
for node_id, score in ranked:
|
|
380
|
+
node = node_map.get(node_id)
|
|
381
|
+
if node is None:
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
if node_id in fts_ids and node_id in vec_ids:
|
|
385
|
+
match_source = "both"
|
|
386
|
+
elif node_id in fts_ids:
|
|
387
|
+
match_source = "fts5"
|
|
388
|
+
elif node_id in vec_ids:
|
|
389
|
+
match_source = "vector"
|
|
390
|
+
else:
|
|
391
|
+
match_source = "keyword"
|
|
392
|
+
|
|
393
|
+
results.append(SearchResult(
|
|
394
|
+
node_id=node_id,
|
|
395
|
+
qualified_name=node["qualified_name"],
|
|
396
|
+
name=node["name"],
|
|
397
|
+
kind=node["kind"],
|
|
398
|
+
file_path=node["file_path"],
|
|
399
|
+
line_start=node["line_start"],
|
|
400
|
+
score=score,
|
|
401
|
+
match_source=match_source,
|
|
402
|
+
))
|
|
403
|
+
|
|
404
|
+
return results
|
|
405
|
+
|
|
406
|
+
def _load_node_details(self, node_ids: list[str]) -> dict[str, dict[str, Any]]:
|
|
407
|
+
"""Load details for a set of node IDs."""
|
|
408
|
+
if not node_ids:
|
|
409
|
+
return {}
|
|
410
|
+
|
|
411
|
+
placeholders = ",".join("?" for _ in node_ids)
|
|
412
|
+
try:
|
|
413
|
+
rows = self._db.execute(
|
|
414
|
+
f"""SELECT node_id, name, qualified_name, kind, file_path, line_start
|
|
415
|
+
FROM graph_nodes WHERE node_id IN ({placeholders})""",
|
|
416
|
+
tuple(node_ids),
|
|
417
|
+
)
|
|
418
|
+
except Exception:
|
|
419
|
+
return {}
|
|
420
|
+
|
|
421
|
+
return {row["node_id"]: dict(row) for row in rows}
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
# ---------------------------------------------------------------------------
|
|
425
|
+
# Module-level helpers
|
|
426
|
+
# ---------------------------------------------------------------------------
|
|
427
|
+
|
|
428
|
+
def _sanitize_fts_query(query: str) -> str:
|
|
429
|
+
"""Sanitize query for FTS5 MATCH.
|
|
430
|
+
|
|
431
|
+
Escapes metacharacters, wraps each token in double quotes,
|
|
432
|
+
truncates to _MAX_QUERY_LEN.
|
|
433
|
+
"""
|
|
434
|
+
# Strip control chars
|
|
435
|
+
cleaned = "".join(ch for ch in query if ch >= " ")
|
|
436
|
+
# Escape FTS5 metacharacters
|
|
437
|
+
cleaned = _FTS5_META_CHARS.sub(r'"\1"', cleaned)
|
|
438
|
+
# Split and wrap tokens
|
|
439
|
+
tokens = cleaned.split()
|
|
440
|
+
if not tokens:
|
|
441
|
+
return ""
|
|
442
|
+
wrapped = " ".join(f'"{t}"' for t in tokens[:20])
|
|
443
|
+
return wrapped[:_MAX_QUERY_LEN]
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _keyword_score(name: str, qualified_name: str, words: list[str]) -> float:
|
|
447
|
+
"""Compute a keyword match score."""
|
|
448
|
+
score = 0.0
|
|
449
|
+
name_lower = name.lower()
|
|
450
|
+
qname_lower = qualified_name.lower()
|
|
451
|
+
for word in words:
|
|
452
|
+
if word == name_lower:
|
|
453
|
+
score += 3.0
|
|
454
|
+
elif name_lower.startswith(word):
|
|
455
|
+
score += 2.0
|
|
456
|
+
elif word in name_lower:
|
|
457
|
+
score += 1.0
|
|
458
|
+
if word in qname_lower:
|
|
459
|
+
score += 0.5
|
|
460
|
+
return score
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""CodeGraphService — the main orchestrator.
|
|
6
|
+
|
|
7
|
+
Lazy initialization. DB created on first access.
|
|
8
|
+
Other phases flesh out the methods.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from superlocalmemory.code_graph.config import CodeGraphConfig
|
|
18
|
+
from superlocalmemory.code_graph.database import CodeGraphDatabase
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CodeGraphNotEnabledError(Exception):
|
|
24
|
+
"""Raised when code graph operations are attempted but config.enabled=False."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CodeGraphService:
|
|
28
|
+
"""Main entry point for the CodeGraph module.
|
|
29
|
+
|
|
30
|
+
Provides lazy DB initialization and delegates to sub-modules
|
|
31
|
+
(parser, graph_engine, search, bridge) as they are implemented.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
config: CodeGraphConfig,
|
|
37
|
+
slm_base_dir: Path | None = None,
|
|
38
|
+
) -> None:
|
|
39
|
+
self._config = config
|
|
40
|
+
self._slm_base_dir = slm_base_dir
|
|
41
|
+
self._db: CodeGraphDatabase | None = None
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def config(self) -> CodeGraphConfig:
|
|
45
|
+
return self._config
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def db(self) -> CodeGraphDatabase:
|
|
49
|
+
"""Lazy DB access. Creates code_graph.db on first call."""
|
|
50
|
+
if self._db is None:
|
|
51
|
+
db_path = self._config.get_db_path(self._slm_base_dir)
|
|
52
|
+
self._db = CodeGraphDatabase(db_path)
|
|
53
|
+
logger.info("CodeGraph DB initialized at %s", db_path)
|
|
54
|
+
return self._db
|
|
55
|
+
|
|
56
|
+
def ensure_enabled(self) -> None:
|
|
57
|
+
"""Guard: raise if code graph is not enabled."""
|
|
58
|
+
if not self._config.enabled:
|
|
59
|
+
raise CodeGraphNotEnabledError(
|
|
60
|
+
"Code graph not enabled. Set code_graph.enabled = true in config."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def get_stats(self) -> dict[str, Any]:
|
|
64
|
+
"""Graph statistics. Works even if graph not built (returns zeros)."""
|
|
65
|
+
if self._db is None:
|
|
66
|
+
db_path = self._config.get_db_path(self._slm_base_dir)
|
|
67
|
+
if not db_path.exists():
|
|
68
|
+
return {"nodes": 0, "edges": 0, "files": 0, "built": False}
|
|
69
|
+
# DB exists but not loaded yet — load it
|
|
70
|
+
_ = self.db
|
|
71
|
+
|
|
72
|
+
stats = self.db.get_stats()
|
|
73
|
+
stats["built"] = stats["nodes"] > 0 or stats["files"] > 0
|
|
74
|
+
stats["db_path"] = str(self.db.db_path)
|
|
75
|
+
stats["repo_root"] = str(self._config.repo_root)
|
|
76
|
+
return stats
|
|
77
|
+
|
|
78
|
+
# ------------------------------------------------------------------
|
|
79
|
+
# Placeholder methods for future phases
|
|
80
|
+
# ------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
# Phase 1: Parser
|
|
83
|
+
# def build(self, repo_path: Path | None = None) -> dict: ...
|
|
84
|
+
# def update(self, changed_files: list[str] | None = None) -> dict: ...
|
|
85
|
+
|
|
86
|
+
# Phase 2: Graph Engine
|
|
87
|
+
# def get_blast_radius(self, changed_files: list[str], ...) -> dict: ...
|
|
88
|
+
# def query(self, pattern: str, target: str, ...) -> dict: ...
|
|
89
|
+
|
|
90
|
+
# Phase 3: Search & Analysis
|
|
91
|
+
# def search(self, query: str, ...) -> dict: ...
|
|
92
|
+
# def detect_changes(self, ...) -> dict: ...
|
|
93
|
+
|
|
94
|
+
# Phase 4: Bridge
|
|
95
|
+
# def resolve_entities(self, fact_text: str, ...) -> list: ...
|