codemesh 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemesh/__init__.py +5 -0
- codemesh/__main__.py +8 -0
- codemesh/cli/__init__.py +3 -0
- codemesh/cli/init.py +208 -0
- codemesh/cli/install_cmd.py +208 -0
- codemesh/cli/main.py +469 -0
- codemesh/context/__init__.py +3 -0
- codemesh/context/builder.py +388 -0
- codemesh/db/__init__.py +3 -0
- codemesh/db/connection.py +66 -0
- codemesh/db/queries.py +696 -0
- codemesh/db/schema.py +125 -0
- codemesh/embedding/__init__.py +3 -0
- codemesh/extraction/__init__.py +7 -0
- codemesh/extraction/languages/__init__.py +95 -0
- codemesh/extraction/languages/c_family.py +614 -0
- codemesh/extraction/languages/go.py +397 -0
- codemesh/extraction/languages/java.py +603 -0
- codemesh/extraction/languages/python.py +718 -0
- codemesh/extraction/languages/rust.py +435 -0
- codemesh/extraction/languages/swift.py +464 -0
- codemesh/extraction/languages/typescript.py +1222 -0
- codemesh/extraction/orchestrator.py +218 -0
- codemesh/graph/__init__.py +8 -0
- codemesh/graph/query_manager.py +117 -0
- codemesh/graph/traverser.py +107 -0
- codemesh/indexer.py +240 -0
- codemesh/mcp/__init__.py +3 -0
- codemesh/mcp/server.py +60 -0
- codemesh/mcp/tools.py +605 -0
- codemesh/querier.py +269 -0
- codemesh/resolution/__init__.py +7 -0
- codemesh/resolution/frameworks/__init__.py +15 -0
- codemesh/resolution/frameworks/django.py +30 -0
- codemesh/resolution/frameworks/fastapi.py +23 -0
- codemesh/resolution/import_resolver.py +69 -0
- codemesh/resolution/name_matcher.py +30 -0
- codemesh/resolution/resolver.py +268 -0
- codemesh/retrieval/__init__.py +7 -0
- codemesh/search/__init__.py +3 -0
- codemesh/sync/__init__.py +3 -0
- codemesh/sync/watcher.py +135 -0
- codemesh/types.py +148 -0
- codemesh/viz/__init__.py +0 -0
- codemesh/viz/graph_builder.py +162 -0
- codemesh/viz/server.py +122 -0
- codemesh/viz/templates/index.html +359 -0
- codemesh-0.1.1.dist-info/METADATA +337 -0
- codemesh-0.1.1.dist-info/RECORD +52 -0
- codemesh-0.1.1.dist-info/WHEEL +4 -0
- codemesh-0.1.1.dist-info/entry_points.txt +2 -0
- codemesh-0.1.1.dist-info/licenses/LICENSE +21 -0
codemesh/db/queries.py
ADDED
|
@@ -0,0 +1,696 @@
|
|
|
1
|
+
"""Database query helpers: node CRUD, FTS5 search with BM25 scoring."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import re
|
|
7
|
+
import sqlite3
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from codemesh.types import Language, Node, NodeKind
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def row_to_node(row: sqlite3.Row) -> Node:
|
|
14
|
+
"""Convert a database row to a Node."""
|
|
15
|
+
return Node(
|
|
16
|
+
id=row["id"],
|
|
17
|
+
kind=NodeKind(row["kind"]),
|
|
18
|
+
name=row["name"],
|
|
19
|
+
qualified_name=row["qualified_name"],
|
|
20
|
+
file_path=Path(row["file_path"]),
|
|
21
|
+
language=Language(row["language"]),
|
|
22
|
+
start_line=row["start_line"],
|
|
23
|
+
end_line=row["end_line"],
|
|
24
|
+
start_column=row["start_column"],
|
|
25
|
+
end_column=row["end_column"],
|
|
26
|
+
docstring=row["docstring"] or "",
|
|
27
|
+
signature=row["signature"] or "",
|
|
28
|
+
visibility=row["visibility"] or "public",
|
|
29
|
+
parent_id=row["parent_id"],
|
|
30
|
+
is_exported=bool(row["is_exported"]) if "is_exported" in row else False,
|
|
31
|
+
is_async=bool(row["is_async"]) if "is_async" in row else False,
|
|
32
|
+
is_static=bool(row["is_static"]) if "is_static" in row else False,
|
|
33
|
+
is_abstract=bool(row["is_abstract"]) if "is_abstract" in row else False,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_node(conn: sqlite3.Connection, node_id: str) -> Node | None:
|
|
38
|
+
"""Fetch a node by ID."""
|
|
39
|
+
row = conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone()
|
|
40
|
+
return row_to_node(row) if row else None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_all_nodes(conn: sqlite3.Connection) -> list[Node]:
|
|
44
|
+
"""Fetch all nodes."""
|
|
45
|
+
rows = conn.execute("SELECT * FROM nodes").fetchall()
|
|
46
|
+
return [row_to_node(r) for r in rows]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_all_node_names(conn: sqlite3.Connection) -> list[str]:
|
|
50
|
+
"""Fetch all distinct node names (for fuzzy fallback)."""
|
|
51
|
+
rows = conn.execute("SELECT DISTINCT name FROM nodes").fetchall()
|
|
52
|
+
return [r[0] for r in rows]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_all_edges(conn: sqlite3.Connection) -> list:
|
|
56
|
+
"""Fetch all edges."""
|
|
57
|
+
from codemesh.types import Edge, EdgeKind
|
|
58
|
+
|
|
59
|
+
rows = conn.execute("SELECT * FROM edges").fetchall()
|
|
60
|
+
edges = []
|
|
61
|
+
for r in rows:
|
|
62
|
+
with contextlib.suppress(Exception):
|
|
63
|
+
edges.append(
|
|
64
|
+
Edge(
|
|
65
|
+
id=r["id"],
|
|
66
|
+
source_id=r["source_id"],
|
|
67
|
+
target_id=r["target_id"],
|
|
68
|
+
kind=EdgeKind(r["kind"]),
|
|
69
|
+
confidence=r["confidence"],
|
|
70
|
+
weight_source=r["weight_source"],
|
|
71
|
+
line=r["line"],
|
|
72
|
+
column=r["column"],
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
return edges
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_edges_by_source(conn: sqlite3.Connection, source_id: str) -> list:
|
|
79
|
+
"""Fetch edges by source node ID."""
|
|
80
|
+
rows = conn.execute("SELECT * FROM edges WHERE source_id = ?", (source_id,)).fetchall()
|
|
81
|
+
from codemesh.types import Edge, EdgeKind
|
|
82
|
+
|
|
83
|
+
edges = []
|
|
84
|
+
for r in rows:
|
|
85
|
+
with contextlib.suppress(Exception):
|
|
86
|
+
edges.append(
|
|
87
|
+
Edge(
|
|
88
|
+
id=r["id"],
|
|
89
|
+
source_id=r["source_id"],
|
|
90
|
+
target_id=r["target_id"],
|
|
91
|
+
kind=EdgeKind(r["kind"]),
|
|
92
|
+
confidence=r["confidence"],
|
|
93
|
+
weight_source=r["weight_source"],
|
|
94
|
+
line=r["line"],
|
|
95
|
+
column=r["column"],
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
return edges
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_edges_by_target(conn: sqlite3.Connection, target_id: str) -> list:
|
|
102
|
+
"""Fetch edges by target node ID."""
|
|
103
|
+
rows = conn.execute("SELECT * FROM edges WHERE target_id = ?", (target_id,)).fetchall()
|
|
104
|
+
from codemesh.types import Edge, EdgeKind
|
|
105
|
+
|
|
106
|
+
edges = []
|
|
107
|
+
for r in rows:
|
|
108
|
+
with contextlib.suppress(Exception):
|
|
109
|
+
edges.append(
|
|
110
|
+
Edge(
|
|
111
|
+
id=r["id"],
|
|
112
|
+
source_id=r["source_id"],
|
|
113
|
+
target_id=r["target_id"],
|
|
114
|
+
kind=EdgeKind(r["kind"]),
|
|
115
|
+
confidence=r["confidence"],
|
|
116
|
+
weight_source=r["weight_source"],
|
|
117
|
+
line=r["line"],
|
|
118
|
+
column=r["column"],
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
return edges
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def count_nodes(conn: sqlite3.Connection) -> int:
|
|
125
|
+
"""Count total nodes."""
|
|
126
|
+
row = conn.execute("SELECT COUNT(*) FROM nodes").fetchone()
|
|
127
|
+
return row[0] if row else 0
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def count_edges(conn: sqlite3.Connection) -> int:
|
|
131
|
+
"""Count total edges."""
|
|
132
|
+
row = conn.execute("SELECT COUNT(*) FROM edges").fetchone()
|
|
133
|
+
return row[0] if row else 0
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def insert_node(conn: sqlite3.Connection, node: Node) -> None:
|
|
137
|
+
"""Insert or replace a node."""
|
|
138
|
+
content_hash = node.metadata.get("content_hash", "") if node.metadata else ""
|
|
139
|
+
conn.execute(
|
|
140
|
+
"""
|
|
141
|
+
INSERT OR REPLACE INTO nodes
|
|
142
|
+
(id, kind, name, qualified_name, file_path, language,
|
|
143
|
+
start_line, end_line, start_column, end_column,
|
|
144
|
+
docstring, signature, visibility, parent_id, metadata,
|
|
145
|
+
is_exported, is_async, is_static, is_abstract, content_hash)
|
|
146
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
147
|
+
""",
|
|
148
|
+
(
|
|
149
|
+
node.id,
|
|
150
|
+
node.kind.value,
|
|
151
|
+
node.name,
|
|
152
|
+
node.qualified_name,
|
|
153
|
+
str(node.file_path),
|
|
154
|
+
node.language.value,
|
|
155
|
+
node.start_line,
|
|
156
|
+
node.end_line,
|
|
157
|
+
node.start_column,
|
|
158
|
+
node.end_column,
|
|
159
|
+
node.docstring,
|
|
160
|
+
node.signature,
|
|
161
|
+
node.visibility,
|
|
162
|
+
node.parent_id,
|
|
163
|
+
"{}",
|
|
164
|
+
int(node.is_exported),
|
|
165
|
+
int(node.is_async),
|
|
166
|
+
int(node.is_static),
|
|
167
|
+
int(node.is_abstract),
|
|
168
|
+
content_hash,
|
|
169
|
+
),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def insert_edge(conn: sqlite3.Connection, edge) -> None:
|
|
174
|
+
"""Insert or replace an edge. Accepts an Edge object or individual params."""
|
|
175
|
+
from codemesh.types import Edge
|
|
176
|
+
|
|
177
|
+
if isinstance(edge, Edge):
|
|
178
|
+
conn.execute(
|
|
179
|
+
"""
|
|
180
|
+
INSERT OR REPLACE INTO edges
|
|
181
|
+
(id, source_id, target_id, kind, confidence,
|
|
182
|
+
weight_source, line, column, metadata,
|
|
183
|
+
resolved_target, type_context)
|
|
184
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
185
|
+
""",
|
|
186
|
+
(
|
|
187
|
+
edge.id,
|
|
188
|
+
edge.source_id,
|
|
189
|
+
edge.target_id,
|
|
190
|
+
edge.kind.value,
|
|
191
|
+
edge.confidence,
|
|
192
|
+
edge.weight_source,
|
|
193
|
+
edge.line,
|
|
194
|
+
edge.column,
|
|
195
|
+
"{}",
|
|
196
|
+
getattr(edge, "resolved_target", None) or "",
|
|
197
|
+
getattr(edge, "type_context", None) or "",
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
# Backward-compatible: treat as edge_id (legacy callers)
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# ── FTS5 BM25 Search ─────────────────────────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
# Stop words to filter from search queries
|
|
208
|
+
_STOP_WORDS = {
|
|
209
|
+
"the",
|
|
210
|
+
"a",
|
|
211
|
+
"an",
|
|
212
|
+
"and",
|
|
213
|
+
"or",
|
|
214
|
+
"but",
|
|
215
|
+
"in",
|
|
216
|
+
"on",
|
|
217
|
+
"at",
|
|
218
|
+
"to",
|
|
219
|
+
"for",
|
|
220
|
+
"of",
|
|
221
|
+
"with",
|
|
222
|
+
"by",
|
|
223
|
+
"from",
|
|
224
|
+
"is",
|
|
225
|
+
"it",
|
|
226
|
+
"that",
|
|
227
|
+
"this",
|
|
228
|
+
"are",
|
|
229
|
+
"was",
|
|
230
|
+
"be",
|
|
231
|
+
"has",
|
|
232
|
+
"had",
|
|
233
|
+
"have",
|
|
234
|
+
"do",
|
|
235
|
+
"does",
|
|
236
|
+
"did",
|
|
237
|
+
"will",
|
|
238
|
+
"would",
|
|
239
|
+
"could",
|
|
240
|
+
"should",
|
|
241
|
+
"may",
|
|
242
|
+
"might",
|
|
243
|
+
"can",
|
|
244
|
+
"shall",
|
|
245
|
+
"not",
|
|
246
|
+
"no",
|
|
247
|
+
"all",
|
|
248
|
+
"each",
|
|
249
|
+
"every",
|
|
250
|
+
"how",
|
|
251
|
+
"what",
|
|
252
|
+
"where",
|
|
253
|
+
"when",
|
|
254
|
+
"who",
|
|
255
|
+
"which",
|
|
256
|
+
"why",
|
|
257
|
+
"i",
|
|
258
|
+
"me",
|
|
259
|
+
"my",
|
|
260
|
+
"we",
|
|
261
|
+
"our",
|
|
262
|
+
"you",
|
|
263
|
+
"your",
|
|
264
|
+
"he",
|
|
265
|
+
"she",
|
|
266
|
+
"they",
|
|
267
|
+
"show",
|
|
268
|
+
"give",
|
|
269
|
+
"tell",
|
|
270
|
+
"been",
|
|
271
|
+
"done",
|
|
272
|
+
"made",
|
|
273
|
+
"used",
|
|
274
|
+
"using",
|
|
275
|
+
"work",
|
|
276
|
+
"works",
|
|
277
|
+
"found",
|
|
278
|
+
"also",
|
|
279
|
+
"into",
|
|
280
|
+
"then",
|
|
281
|
+
"than",
|
|
282
|
+
"just",
|
|
283
|
+
"more",
|
|
284
|
+
"some",
|
|
285
|
+
"such",
|
|
286
|
+
"over",
|
|
287
|
+
"only",
|
|
288
|
+
"out",
|
|
289
|
+
"its",
|
|
290
|
+
"so",
|
|
291
|
+
"up",
|
|
292
|
+
"as",
|
|
293
|
+
"if",
|
|
294
|
+
"look",
|
|
295
|
+
"need",
|
|
296
|
+
"needs",
|
|
297
|
+
"want",
|
|
298
|
+
"happen",
|
|
299
|
+
"happens",
|
|
300
|
+
"affect",
|
|
301
|
+
"affected",
|
|
302
|
+
"break",
|
|
303
|
+
"breaks",
|
|
304
|
+
"failing",
|
|
305
|
+
"implemented",
|
|
306
|
+
"implement",
|
|
307
|
+
"code",
|
|
308
|
+
"file",
|
|
309
|
+
"files",
|
|
310
|
+
"function",
|
|
311
|
+
"method",
|
|
312
|
+
"class",
|
|
313
|
+
"type",
|
|
314
|
+
"fix",
|
|
315
|
+
"bug",
|
|
316
|
+
"called",
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _split_camel_case(text: str) -> str:
|
|
321
|
+
"""Split camelCase/PascalCase into space-separated words.
|
|
322
|
+
E.g., 'getUserName' → 'get User Name'
|
|
323
|
+
"""
|
|
324
|
+
# Insert space between lowercase and uppercase
|
|
325
|
+
text = re.sub(r"([a-z])([A-Z])", r"\1 \2", text)
|
|
326
|
+
# Insert space between consecutive uppercase and uppercase+lowercase
|
|
327
|
+
text = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", text)
|
|
328
|
+
return text
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _get_stem_variants(term: str) -> list[str]:
|
|
332
|
+
"""Generate stem variants for FTS prefix matching.
|
|
333
|
+
E.g., 'caching' → ['cach', 'cache'], 'eviction' → ['evict']
|
|
334
|
+
"""
|
|
335
|
+
variants = []
|
|
336
|
+
t = term.lower()
|
|
337
|
+
if len(t) <= 3:
|
|
338
|
+
return variants
|
|
339
|
+
|
|
340
|
+
# -ing: caching→cach/cache, handling→handl/handle
|
|
341
|
+
if t.endswith("ing") and len(t) > 5:
|
|
342
|
+
base = t[:-3]
|
|
343
|
+
variants.append(base)
|
|
344
|
+
variants.append(base + "e")
|
|
345
|
+
if len(base) >= 2 and base[-1] == base[-2]:
|
|
346
|
+
variants.append(base[:-1])
|
|
347
|
+
|
|
348
|
+
# -tion/-sion: eviction→evict
|
|
349
|
+
if (t.endswith("tion") or t.endswith("sion")) and len(t) > 5:
|
|
350
|
+
variants.append(t[:-3])
|
|
351
|
+
|
|
352
|
+
# -ment: management→manage
|
|
353
|
+
if t.endswith("ment") and len(t) > 6:
|
|
354
|
+
variants.append(t[:-4])
|
|
355
|
+
|
|
356
|
+
# -ies: entries→entry
|
|
357
|
+
if t.endswith("ies") and len(t) > 4:
|
|
358
|
+
variants.append(t[:-3] + "y")
|
|
359
|
+
# -es: processes→process
|
|
360
|
+
elif t.endswith("es") and len(t) > 4:
|
|
361
|
+
variants.append(t[:-2])
|
|
362
|
+
# -s: errors→error (skip -ss endings)
|
|
363
|
+
elif t.endswith("s") and not t.endswith("ss") and len(t) > 4:
|
|
364
|
+
variants.append(t[:-1])
|
|
365
|
+
|
|
366
|
+
# -ed: handled→handle
|
|
367
|
+
if t.endswith("ed") and not t.endswith("eed") and len(t) > 4:
|
|
368
|
+
variants.append(t[:-1])
|
|
369
|
+
variants.append(t[:-2])
|
|
370
|
+
if t.endswith("ied") and len(t) > 5:
|
|
371
|
+
variants.append(t[:-3] + "y")
|
|
372
|
+
|
|
373
|
+
# -er: builder→build
|
|
374
|
+
if t.endswith("er") and len(t) > 4:
|
|
375
|
+
base = t[:-2]
|
|
376
|
+
variants.append(base)
|
|
377
|
+
variants.append(base + "e")
|
|
378
|
+
if len(base) >= 2 and base[-1] == base[-2]:
|
|
379
|
+
variants.append(base[:-1])
|
|
380
|
+
|
|
381
|
+
return [v for v in variants if len(v) >= 3 and v != t]
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _extract_search_terms(query: str) -> list[str]:
|
|
385
|
+
"""Extract meaningful search terms from a natural language query.
|
|
386
|
+
Splits camelCase, PascalCase, snake_case into individual tokens.
|
|
387
|
+
Filters stop words. Generates stem variants for FTS prefix matching.
|
|
388
|
+
"""
|
|
389
|
+
tokens: set[str] = set()
|
|
390
|
+
|
|
391
|
+
# Preserve compound identifiers before splitting
|
|
392
|
+
for m in re.finditer(r"\b([a-zA-Z][a-zA-Z0-9]*(?:[A-Z][a-z]+)+)\b", query):
|
|
393
|
+
if m.group(1) and len(m.group(1)) >= 3:
|
|
394
|
+
tokens.add(m.group(1).lower())
|
|
395
|
+
|
|
396
|
+
# Split camelCase/PascalCase
|
|
397
|
+
camel_split = _split_camel_case(query)
|
|
398
|
+
# Replace underscores and dots with spaces
|
|
399
|
+
normalised = re.sub(r"[_.]+", " ", camel_split)
|
|
400
|
+
# Split on non-alphanumeric
|
|
401
|
+
words = re.split(r"[^a-zA-Z0-9]+", normalised)
|
|
402
|
+
|
|
403
|
+
for word in words:
|
|
404
|
+
lower = word.lower()
|
|
405
|
+
if len(lower) < 3:
|
|
406
|
+
continue
|
|
407
|
+
if lower in _STOP_WORDS:
|
|
408
|
+
continue
|
|
409
|
+
tokens.add(lower)
|
|
410
|
+
|
|
411
|
+
# Generate stem variants
|
|
412
|
+
stems: set[str] = set()
|
|
413
|
+
for token in list(tokens):
|
|
414
|
+
for variant in _get_stem_variants(token):
|
|
415
|
+
if variant not in tokens and variant not in _STOP_WORDS:
|
|
416
|
+
stems.add(variant)
|
|
417
|
+
tokens |= stems
|
|
418
|
+
|
|
419
|
+
return list(tokens)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _build_fts_query(terms: list[str]) -> str:
|
|
423
|
+
"""Build FTS5 query string with prefix matching.
|
|
424
|
+
E.g., ['auth', 'service'] → '"auth"* OR "service"*'
|
|
425
|
+
"""
|
|
426
|
+
parts = []
|
|
427
|
+
for term in terms:
|
|
428
|
+
# Escape special FTS5 characters
|
|
429
|
+
cleaned = re.sub(r"['\"*():^]", "", term)
|
|
430
|
+
if cleaned:
|
|
431
|
+
parts.append(f'"{cleaned}"*')
|
|
432
|
+
return " OR ".join(parts) if parts else ""
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _bounded_edit_distance(a: str, b: str, max_dist: int) -> int:
|
|
436
|
+
"""Damerau-Levenshtein bounded edit distance. Returns max_dist+1 if exceeded."""
|
|
437
|
+
if a == b:
|
|
438
|
+
return 0
|
|
439
|
+
al, bl = len(a), len(b)
|
|
440
|
+
if abs(al - bl) > max_dist:
|
|
441
|
+
return max_dist + 1
|
|
442
|
+
if al == 0:
|
|
443
|
+
return bl
|
|
444
|
+
if bl == 0:
|
|
445
|
+
return al
|
|
446
|
+
|
|
447
|
+
prev = list(range(bl + 1))
|
|
448
|
+
cur = [0] * (bl + 1)
|
|
449
|
+
|
|
450
|
+
for i in range(1, al + 1):
|
|
451
|
+
cur[0] = i
|
|
452
|
+
row_min = cur[0]
|
|
453
|
+
for j in range(1, bl + 1):
|
|
454
|
+
cost = 0 if a[i - 1] == b[j - 1] else 1
|
|
455
|
+
cur[j] = min(cur[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost)
|
|
456
|
+
if cur[j] < row_min:
|
|
457
|
+
row_min = cur[j]
|
|
458
|
+
if row_min > max_dist:
|
|
459
|
+
return max_dist + 1
|
|
460
|
+
prev, cur = cur, prev
|
|
461
|
+
|
|
462
|
+
return prev[bl]
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _kind_bonus(kind: str) -> int:
|
|
466
|
+
"""Kind-based bonus for search ranking."""
|
|
467
|
+
bonuses = {
|
|
468
|
+
"function": 10,
|
|
469
|
+
"method": 10,
|
|
470
|
+
"class": 8,
|
|
471
|
+
"interface": 9,
|
|
472
|
+
"type_alias": 6,
|
|
473
|
+
"struct": 6,
|
|
474
|
+
"trait": 9,
|
|
475
|
+
"enum": 5,
|
|
476
|
+
"property": 3,
|
|
477
|
+
"field": 3,
|
|
478
|
+
"variable": 2,
|
|
479
|
+
"constant": 3,
|
|
480
|
+
"import": 1,
|
|
481
|
+
"file": 0,
|
|
482
|
+
}
|
|
483
|
+
return bonuses.get(kind, 0)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _exported_bonus(node) -> int:
|
|
487
|
+
"""Bonus for exported/public symbols."""
|
|
488
|
+
if hasattr(node, "is_exported") and node.is_exported:
|
|
489
|
+
return 5
|
|
490
|
+
if hasattr(node, "visibility") and node.visibility == "private":
|
|
491
|
+
return -3
|
|
492
|
+
return 0
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _name_match_bonus(node_name: str, query: str) -> int:
|
|
496
|
+
"""Bonus when a node's name matches the search query."""
|
|
497
|
+
name_lower = node_name.lower()
|
|
498
|
+
query_lower = query.lower()
|
|
499
|
+
|
|
500
|
+
# Exact match
|
|
501
|
+
if name_lower == query_lower:
|
|
502
|
+
return 80
|
|
503
|
+
|
|
504
|
+
# Name starts with query
|
|
505
|
+
if name_lower.startswith(query_lower):
|
|
506
|
+
ratio = len(query_lower) / len(name_lower) if name_lower else 0
|
|
507
|
+
return round(10 + 30 * ratio)
|
|
508
|
+
|
|
509
|
+
# Name contains query
|
|
510
|
+
if name_lower.find(query_lower) != -1:
|
|
511
|
+
return 10
|
|
512
|
+
|
|
513
|
+
return 0
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def search_nodes_fts(
|
|
517
|
+
conn: sqlite3.Connection, query: str, limit: int = 10
|
|
518
|
+
) -> list[tuple[Node, float]]:
|
|
519
|
+
"""Full-text search with BM25 + multi-signal scoring.
|
|
520
|
+
|
|
521
|
+
3-tier search strategy:
|
|
522
|
+
1. FTS5 with prefix matching and BM25 column weights
|
|
523
|
+
2. LIKE-based substring fallback (for camelCase matching)
|
|
524
|
+
3. Fuzzy edit-distance fallback (for typos)
|
|
525
|
+
|
|
526
|
+
Post-hoc scoring adds: kind bonus, name match bonus.
|
|
527
|
+
"""
|
|
528
|
+
if not query or not query.strip():
|
|
529
|
+
return []
|
|
530
|
+
|
|
531
|
+
terms = _extract_search_terms(query)
|
|
532
|
+
if not terms:
|
|
533
|
+
return []
|
|
534
|
+
|
|
535
|
+
# ── Tier 1: FTS5 with prefix matching ─────────────────────────────────
|
|
536
|
+
fts_query_str = _build_fts_query(terms)
|
|
537
|
+
results: list[tuple[Node, float]] = []
|
|
538
|
+
seen_ids: set[str] = set()
|
|
539
|
+
|
|
540
|
+
if fts_query_str:
|
|
541
|
+
fts_limit = max(limit * 5, 100)
|
|
542
|
+
try:
|
|
543
|
+
rows = conn.execute(
|
|
544
|
+
"""
|
|
545
|
+
SELECT nodes.*, bm25(nodes_fts, 0, 20, 5, 1, 2) as score
|
|
546
|
+
FROM nodes_fts
|
|
547
|
+
JOIN nodes ON nodes_fts.id = nodes.id
|
|
548
|
+
WHERE nodes_fts MATCH ?
|
|
549
|
+
ORDER BY score LIMIT ?
|
|
550
|
+
""",
|
|
551
|
+
(fts_query_str, fts_limit),
|
|
552
|
+
).fetchall()
|
|
553
|
+
for row in rows:
|
|
554
|
+
node = row_to_node(row)
|
|
555
|
+
score = abs(row["score"]) # bm25 returns negative scores
|
|
556
|
+
results.append((node, score))
|
|
557
|
+
seen_ids.add(node.id)
|
|
558
|
+
except Exception:
|
|
559
|
+
pass # FTS query failed, fall through
|
|
560
|
+
|
|
561
|
+
# ── Tier 2: LIKE-based substring search ───────────────────────────────
|
|
562
|
+
if len(results) < limit:
|
|
563
|
+
like_query = query.strip()
|
|
564
|
+
try:
|
|
565
|
+
rows = conn.execute(
|
|
566
|
+
"""
|
|
567
|
+
SELECT nodes.*,
|
|
568
|
+
CASE
|
|
569
|
+
WHEN lower(name) = lower(?) THEN 1.0
|
|
570
|
+
WHEN lower(name) LIKE lower(?) THEN 0.9
|
|
571
|
+
WHEN lower(qualified_name) LIKE lower(?) THEN 0.7
|
|
572
|
+
ELSE 0.5
|
|
573
|
+
END as score
|
|
574
|
+
FROM nodes
|
|
575
|
+
WHERE (lower(name) LIKE lower(?) OR lower(qualified_name) LIKE lower(?))
|
|
576
|
+
AND id NOT IN ({})
|
|
577
|
+
ORDER BY score DESC, length(name) ASC LIMIT ?
|
|
578
|
+
""".format(",".join("?" * len(seen_ids)) if seen_ids else "NULL"),
|
|
579
|
+
[
|
|
580
|
+
like_query,
|
|
581
|
+
f"{like_query}%",
|
|
582
|
+
f"%{like_query}%",
|
|
583
|
+
f"%{like_query}%",
|
|
584
|
+
f"{like_query}%",
|
|
585
|
+
]
|
|
586
|
+
+ list(seen_ids)
|
|
587
|
+
+ [limit * 3],
|
|
588
|
+
).fetchall()
|
|
589
|
+
for row in rows:
|
|
590
|
+
node = row_to_node(row)
|
|
591
|
+
results.append((node, row["score"]))
|
|
592
|
+
seen_ids.add(node.id)
|
|
593
|
+
except Exception:
|
|
594
|
+
pass
|
|
595
|
+
|
|
596
|
+
# ── Tier 3: Fuzzy edit-distance fallback ──────────────────────────────
|
|
597
|
+
if len(results) < limit and len(query.strip()) >= 3:
|
|
598
|
+
query_lower = query.strip().lower()
|
|
599
|
+
max_dist = 1 if len(query_lower) <= 4 else 2
|
|
600
|
+
all_names = get_all_node_names(conn)
|
|
601
|
+
candidates = []
|
|
602
|
+
for name in all_names:
|
|
603
|
+
dist = _bounded_edit_distance(name.lower(), query_lower, max_dist)
|
|
604
|
+
if dist <= max_dist:
|
|
605
|
+
candidates.append((name, dist))
|
|
606
|
+
candidates.sort(key=lambda x: x[1])
|
|
607
|
+
|
|
608
|
+
fuzzy_limit = max(limit * 2, 50)
|
|
609
|
+
for name, dist in candidates[:fuzzy_limit]:
|
|
610
|
+
if len(results) >= limit:
|
|
611
|
+
break
|
|
612
|
+
try:
|
|
613
|
+
rows = conn.execute(
|
|
614
|
+
"SELECT * FROM nodes WHERE name = ? LIMIT 5",
|
|
615
|
+
(name,),
|
|
616
|
+
).fetchall()
|
|
617
|
+
for row in rows:
|
|
618
|
+
if row["id"] not in seen_ids:
|
|
619
|
+
node = row_to_node(row)
|
|
620
|
+
results.append((node, 1.0 / (1 + dist)))
|
|
621
|
+
seen_ids.add(node.id)
|
|
622
|
+
except Exception:
|
|
623
|
+
pass
|
|
624
|
+
|
|
625
|
+
# ── Post-hoc scoring ──────────────────────────────────────────────────
|
|
626
|
+
if results:
|
|
627
|
+
scored = []
|
|
628
|
+
for node, score in results:
|
|
629
|
+
text = query.strip()
|
|
630
|
+
final_score = (
|
|
631
|
+
score
|
|
632
|
+
+ _kind_bonus(node.kind.value)
|
|
633
|
+
+ _name_match_bonus(node.name, text)
|
|
634
|
+
+ _exported_bonus(node)
|
|
635
|
+
)
|
|
636
|
+
scored.append((node, final_score))
|
|
637
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
638
|
+
results = scored[:limit]
|
|
639
|
+
|
|
640
|
+
return results
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
# — Delta indexing helpers —
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def get_nodes_by_file(conn: sqlite3.Connection, file_path: str) -> list:
|
|
647
|
+
"""Fetch all nodes for a given file path."""
|
|
648
|
+
rows = conn.execute("SELECT * FROM nodes WHERE file_path = ?", (file_path,)).fetchall()
|
|
649
|
+
return [row_to_node(r) for r in rows]
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def get_node_by_qualified_name(conn: sqlite3.Connection, qualified_name: str) -> Node | None:
|
|
653
|
+
"""Fetch a node by qualified name."""
|
|
654
|
+
row = conn.execute("SELECT * FROM nodes WHERE qualified_name = ?", (qualified_name,)).fetchone()
|
|
655
|
+
return row_to_node(row) if row else None
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def delete_node_and_edges(conn: sqlite3.Connection, node_id: str) -> None:
|
|
659
|
+
"""Delete a node and all edges touching it."""
|
|
660
|
+
conn.execute("DELETE FROM edges WHERE source_id = ? OR target_id = ?", (node_id, node_id))
|
|
661
|
+
conn.execute("DELETE FROM nodes WHERE id = ?", (node_id,))
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def delete_edges_by_source(conn: sqlite3.Connection, node_id: str) -> None:
|
|
665
|
+
"""Delete all outgoing edges from a node."""
|
|
666
|
+
conn.execute("DELETE FROM edges WHERE source_id = ?", (node_id,))
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def get_incoming_edges_to_node(conn: sqlite3.Connection, node_id: str) -> list:
|
|
670
|
+
"""Fetch all edges targeting a node."""
|
|
671
|
+
rows = conn.execute("SELECT * FROM edges WHERE target_id = ?", (node_id,)).fetchall()
|
|
672
|
+
return rows
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def insert_file_node_dep(conn: sqlite3.Connection, file_path: str, node_id: str) -> None:
|
|
676
|
+
"""Insert a file-to-node dependency mapping."""
|
|
677
|
+
conn.execute(
|
|
678
|
+
"INSERT OR IGNORE INTO file_node_deps (file_path, node_id) VALUES (?, ?)",
|
|
679
|
+
(file_path, node_id),
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def get_files_referencing_node(conn: sqlite3.Connection, node_id: str) -> list[str]:
|
|
684
|
+
"""Get file paths that reference a given node."""
|
|
685
|
+
rows = conn.execute(
|
|
686
|
+
"SELECT file_path FROM file_node_deps WHERE node_id = ?", (node_id,)
|
|
687
|
+
).fetchall()
|
|
688
|
+
return [r[0] for r in rows]
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def count_ghost_edges(conn: sqlite3.Connection) -> int:
|
|
692
|
+
"""Count edges whose target node no longer exists (integrity check)."""
|
|
693
|
+
row = conn.execute(
|
|
694
|
+
"SELECT COUNT(*) FROM edges WHERE target_id NOT IN (SELECT id FROM nodes)"
|
|
695
|
+
).fetchone()
|
|
696
|
+
return row[0] if row else 0
|