codemesh 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. codemesh/__init__.py +5 -0
  2. codemesh/__main__.py +8 -0
  3. codemesh/cli/__init__.py +3 -0
  4. codemesh/cli/init.py +208 -0
  5. codemesh/cli/install_cmd.py +208 -0
  6. codemesh/cli/main.py +469 -0
  7. codemesh/context/__init__.py +3 -0
  8. codemesh/context/builder.py +388 -0
  9. codemesh/db/__init__.py +3 -0
  10. codemesh/db/connection.py +66 -0
  11. codemesh/db/queries.py +696 -0
  12. codemesh/db/schema.py +125 -0
  13. codemesh/embedding/__init__.py +3 -0
  14. codemesh/extraction/__init__.py +7 -0
  15. codemesh/extraction/languages/__init__.py +95 -0
  16. codemesh/extraction/languages/c_family.py +614 -0
  17. codemesh/extraction/languages/go.py +397 -0
  18. codemesh/extraction/languages/java.py +603 -0
  19. codemesh/extraction/languages/python.py +718 -0
  20. codemesh/extraction/languages/rust.py +435 -0
  21. codemesh/extraction/languages/swift.py +464 -0
  22. codemesh/extraction/languages/typescript.py +1222 -0
  23. codemesh/extraction/orchestrator.py +218 -0
  24. codemesh/graph/__init__.py +8 -0
  25. codemesh/graph/query_manager.py +117 -0
  26. codemesh/graph/traverser.py +107 -0
  27. codemesh/indexer.py +240 -0
  28. codemesh/mcp/__init__.py +3 -0
  29. codemesh/mcp/server.py +60 -0
  30. codemesh/mcp/tools.py +605 -0
  31. codemesh/querier.py +269 -0
  32. codemesh/resolution/__init__.py +7 -0
  33. codemesh/resolution/frameworks/__init__.py +15 -0
  34. codemesh/resolution/frameworks/django.py +30 -0
  35. codemesh/resolution/frameworks/fastapi.py +23 -0
  36. codemesh/resolution/import_resolver.py +69 -0
  37. codemesh/resolution/name_matcher.py +30 -0
  38. codemesh/resolution/resolver.py +268 -0
  39. codemesh/retrieval/__init__.py +7 -0
  40. codemesh/search/__init__.py +3 -0
  41. codemesh/sync/__init__.py +3 -0
  42. codemesh/sync/watcher.py +135 -0
  43. codemesh/types.py +148 -0
  44. codemesh/viz/__init__.py +0 -0
  45. codemesh/viz/graph_builder.py +162 -0
  46. codemesh/viz/server.py +122 -0
  47. codemesh/viz/templates/index.html +359 -0
  48. codemesh-0.1.1.dist-info/METADATA +337 -0
  49. codemesh-0.1.1.dist-info/RECORD +52 -0
  50. codemesh-0.1.1.dist-info/WHEEL +4 -0
  51. codemesh-0.1.1.dist-info/entry_points.txt +2 -0
  52. codemesh-0.1.1.dist-info/licenses/LICENSE +21 -0
codemesh/db/queries.py ADDED
@@ -0,0 +1,696 @@
1
+ """Database query helpers: node CRUD, FTS5 search with BM25 scoring."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import re
7
+ import sqlite3
8
+ from pathlib import Path
9
+
10
+ from codemesh.types import Language, Node, NodeKind
11
+
12
+
13
+ def row_to_node(row: sqlite3.Row) -> Node:
14
+ """Convert a database row to a Node."""
15
+ return Node(
16
+ id=row["id"],
17
+ kind=NodeKind(row["kind"]),
18
+ name=row["name"],
19
+ qualified_name=row["qualified_name"],
20
+ file_path=Path(row["file_path"]),
21
+ language=Language(row["language"]),
22
+ start_line=row["start_line"],
23
+ end_line=row["end_line"],
24
+ start_column=row["start_column"],
25
+ end_column=row["end_column"],
26
+ docstring=row["docstring"] or "",
27
+ signature=row["signature"] or "",
28
+ visibility=row["visibility"] or "public",
29
+ parent_id=row["parent_id"],
30
+ is_exported=bool(row["is_exported"]) if "is_exported" in row else False,
31
+ is_async=bool(row["is_async"]) if "is_async" in row else False,
32
+ is_static=bool(row["is_static"]) if "is_static" in row else False,
33
+ is_abstract=bool(row["is_abstract"]) if "is_abstract" in row else False,
34
+ )
35
+
36
+
37
+ def get_node(conn: sqlite3.Connection, node_id: str) -> Node | None:
38
+ """Fetch a node by ID."""
39
+ row = conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone()
40
+ return row_to_node(row) if row else None
41
+
42
+
43
+ def get_all_nodes(conn: sqlite3.Connection) -> list[Node]:
44
+ """Fetch all nodes."""
45
+ rows = conn.execute("SELECT * FROM nodes").fetchall()
46
+ return [row_to_node(r) for r in rows]
47
+
48
+
49
+ def get_all_node_names(conn: sqlite3.Connection) -> list[str]:
50
+ """Fetch all distinct node names (for fuzzy fallback)."""
51
+ rows = conn.execute("SELECT DISTINCT name FROM nodes").fetchall()
52
+ return [r[0] for r in rows]
53
+
54
+
55
+ def get_all_edges(conn: sqlite3.Connection) -> list:
56
+ """Fetch all edges."""
57
+ from codemesh.types import Edge, EdgeKind
58
+
59
+ rows = conn.execute("SELECT * FROM edges").fetchall()
60
+ edges = []
61
+ for r in rows:
62
+ with contextlib.suppress(Exception):
63
+ edges.append(
64
+ Edge(
65
+ id=r["id"],
66
+ source_id=r["source_id"],
67
+ target_id=r["target_id"],
68
+ kind=EdgeKind(r["kind"]),
69
+ confidence=r["confidence"],
70
+ weight_source=r["weight_source"],
71
+ line=r["line"],
72
+ column=r["column"],
73
+ )
74
+ )
75
+ return edges
76
+
77
+
78
+ def get_edges_by_source(conn: sqlite3.Connection, source_id: str) -> list:
79
+ """Fetch edges by source node ID."""
80
+ rows = conn.execute("SELECT * FROM edges WHERE source_id = ?", (source_id,)).fetchall()
81
+ from codemesh.types import Edge, EdgeKind
82
+
83
+ edges = []
84
+ for r in rows:
85
+ with contextlib.suppress(Exception):
86
+ edges.append(
87
+ Edge(
88
+ id=r["id"],
89
+ source_id=r["source_id"],
90
+ target_id=r["target_id"],
91
+ kind=EdgeKind(r["kind"]),
92
+ confidence=r["confidence"],
93
+ weight_source=r["weight_source"],
94
+ line=r["line"],
95
+ column=r["column"],
96
+ )
97
+ )
98
+ return edges
99
+
100
+
101
+ def get_edges_by_target(conn: sqlite3.Connection, target_id: str) -> list:
102
+ """Fetch edges by target node ID."""
103
+ rows = conn.execute("SELECT * FROM edges WHERE target_id = ?", (target_id,)).fetchall()
104
+ from codemesh.types import Edge, EdgeKind
105
+
106
+ edges = []
107
+ for r in rows:
108
+ with contextlib.suppress(Exception):
109
+ edges.append(
110
+ Edge(
111
+ id=r["id"],
112
+ source_id=r["source_id"],
113
+ target_id=r["target_id"],
114
+ kind=EdgeKind(r["kind"]),
115
+ confidence=r["confidence"],
116
+ weight_source=r["weight_source"],
117
+ line=r["line"],
118
+ column=r["column"],
119
+ )
120
+ )
121
+ return edges
122
+
123
+
124
+ def count_nodes(conn: sqlite3.Connection) -> int:
125
+ """Count total nodes."""
126
+ row = conn.execute("SELECT COUNT(*) FROM nodes").fetchone()
127
+ return row[0] if row else 0
128
+
129
+
130
+ def count_edges(conn: sqlite3.Connection) -> int:
131
+ """Count total edges."""
132
+ row = conn.execute("SELECT COUNT(*) FROM edges").fetchone()
133
+ return row[0] if row else 0
134
+
135
+
136
+ def insert_node(conn: sqlite3.Connection, node: Node) -> None:
137
+ """Insert or replace a node."""
138
+ content_hash = node.metadata.get("content_hash", "") if node.metadata else ""
139
+ conn.execute(
140
+ """
141
+ INSERT OR REPLACE INTO nodes
142
+ (id, kind, name, qualified_name, file_path, language,
143
+ start_line, end_line, start_column, end_column,
144
+ docstring, signature, visibility, parent_id, metadata,
145
+ is_exported, is_async, is_static, is_abstract, content_hash)
146
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
147
+ """,
148
+ (
149
+ node.id,
150
+ node.kind.value,
151
+ node.name,
152
+ node.qualified_name,
153
+ str(node.file_path),
154
+ node.language.value,
155
+ node.start_line,
156
+ node.end_line,
157
+ node.start_column,
158
+ node.end_column,
159
+ node.docstring,
160
+ node.signature,
161
+ node.visibility,
162
+ node.parent_id,
163
+ "{}",
164
+ int(node.is_exported),
165
+ int(node.is_async),
166
+ int(node.is_static),
167
+ int(node.is_abstract),
168
+ content_hash,
169
+ ),
170
+ )
171
+
172
+
173
+ def insert_edge(conn: sqlite3.Connection, edge) -> None:
174
+ """Insert or replace an edge. Accepts an Edge object or individual params."""
175
+ from codemesh.types import Edge
176
+
177
+ if isinstance(edge, Edge):
178
+ conn.execute(
179
+ """
180
+ INSERT OR REPLACE INTO edges
181
+ (id, source_id, target_id, kind, confidence,
182
+ weight_source, line, column, metadata,
183
+ resolved_target, type_context)
184
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
185
+ """,
186
+ (
187
+ edge.id,
188
+ edge.source_id,
189
+ edge.target_id,
190
+ edge.kind.value,
191
+ edge.confidence,
192
+ edge.weight_source,
193
+ edge.line,
194
+ edge.column,
195
+ "{}",
196
+ getattr(edge, "resolved_target", None) or "",
197
+ getattr(edge, "type_context", None) or "",
198
+ ),
199
+ )
200
+ else:
201
+ # Backward-compatible: treat as edge_id (legacy callers)
202
+ pass
203
+
204
+
205
+ # ── FTS5 BM25 Search ─────────────────────────────────────────────────────────
206
+
207
+ # Stop words to filter from search queries
208
+ _STOP_WORDS = {
209
+ "the",
210
+ "a",
211
+ "an",
212
+ "and",
213
+ "or",
214
+ "but",
215
+ "in",
216
+ "on",
217
+ "at",
218
+ "to",
219
+ "for",
220
+ "of",
221
+ "with",
222
+ "by",
223
+ "from",
224
+ "is",
225
+ "it",
226
+ "that",
227
+ "this",
228
+ "are",
229
+ "was",
230
+ "be",
231
+ "has",
232
+ "had",
233
+ "have",
234
+ "do",
235
+ "does",
236
+ "did",
237
+ "will",
238
+ "would",
239
+ "could",
240
+ "should",
241
+ "may",
242
+ "might",
243
+ "can",
244
+ "shall",
245
+ "not",
246
+ "no",
247
+ "all",
248
+ "each",
249
+ "every",
250
+ "how",
251
+ "what",
252
+ "where",
253
+ "when",
254
+ "who",
255
+ "which",
256
+ "why",
257
+ "i",
258
+ "me",
259
+ "my",
260
+ "we",
261
+ "our",
262
+ "you",
263
+ "your",
264
+ "he",
265
+ "she",
266
+ "they",
267
+ "show",
268
+ "give",
269
+ "tell",
270
+ "been",
271
+ "done",
272
+ "made",
273
+ "used",
274
+ "using",
275
+ "work",
276
+ "works",
277
+ "found",
278
+ "also",
279
+ "into",
280
+ "then",
281
+ "than",
282
+ "just",
283
+ "more",
284
+ "some",
285
+ "such",
286
+ "over",
287
+ "only",
288
+ "out",
289
+ "its",
290
+ "so",
291
+ "up",
292
+ "as",
293
+ "if",
294
+ "look",
295
+ "need",
296
+ "needs",
297
+ "want",
298
+ "happen",
299
+ "happens",
300
+ "affect",
301
+ "affected",
302
+ "break",
303
+ "breaks",
304
+ "failing",
305
+ "implemented",
306
+ "implement",
307
+ "code",
308
+ "file",
309
+ "files",
310
+ "function",
311
+ "method",
312
+ "class",
313
+ "type",
314
+ "fix",
315
+ "bug",
316
+ "called",
317
+ }
318
+
319
+
320
+ def _split_camel_case(text: str) -> str:
321
+ """Split camelCase/PascalCase into space-separated words.
322
+ E.g., 'getUserName' → 'get User Name'
323
+ """
324
+ # Insert space between lowercase and uppercase
325
+ text = re.sub(r"([a-z])([A-Z])", r"\1 \2", text)
326
+ # Insert space between consecutive uppercase and uppercase+lowercase
327
+ text = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", text)
328
+ return text
329
+
330
+
331
+ def _get_stem_variants(term: str) -> list[str]:
332
+ """Generate stem variants for FTS prefix matching.
333
+ E.g., 'caching' → ['cach', 'cache'], 'eviction' → ['evict']
334
+ """
335
+ variants = []
336
+ t = term.lower()
337
+ if len(t) <= 3:
338
+ return variants
339
+
340
+ # -ing: caching→cach/cache, handling→handl/handle
341
+ if t.endswith("ing") and len(t) > 5:
342
+ base = t[:-3]
343
+ variants.append(base)
344
+ variants.append(base + "e")
345
+ if len(base) >= 2 and base[-1] == base[-2]:
346
+ variants.append(base[:-1])
347
+
348
+ # -tion/-sion: eviction→evict
349
+ if (t.endswith("tion") or t.endswith("sion")) and len(t) > 5:
350
+ variants.append(t[:-3])
351
+
352
+ # -ment: management→manage
353
+ if t.endswith("ment") and len(t) > 6:
354
+ variants.append(t[:-4])
355
+
356
+ # -ies: entries→entry
357
+ if t.endswith("ies") and len(t) > 4:
358
+ variants.append(t[:-3] + "y")
359
+ # -es: processes→process
360
+ elif t.endswith("es") and len(t) > 4:
361
+ variants.append(t[:-2])
362
+ # -s: errors→error (skip -ss endings)
363
+ elif t.endswith("s") and not t.endswith("ss") and len(t) > 4:
364
+ variants.append(t[:-1])
365
+
366
+ # -ed: handled→handle
367
+ if t.endswith("ed") and not t.endswith("eed") and len(t) > 4:
368
+ variants.append(t[:-1])
369
+ variants.append(t[:-2])
370
+ if t.endswith("ied") and len(t) > 5:
371
+ variants.append(t[:-3] + "y")
372
+
373
+ # -er: builder→build
374
+ if t.endswith("er") and len(t) > 4:
375
+ base = t[:-2]
376
+ variants.append(base)
377
+ variants.append(base + "e")
378
+ if len(base) >= 2 and base[-1] == base[-2]:
379
+ variants.append(base[:-1])
380
+
381
+ return [v for v in variants if len(v) >= 3 and v != t]
382
+
383
+
384
+ def _extract_search_terms(query: str) -> list[str]:
385
+ """Extract meaningful search terms from a natural language query.
386
+ Splits camelCase, PascalCase, snake_case into individual tokens.
387
+ Filters stop words. Generates stem variants for FTS prefix matching.
388
+ """
389
+ tokens: set[str] = set()
390
+
391
+ # Preserve compound identifiers before splitting
392
+ for m in re.finditer(r"\b([a-zA-Z][a-zA-Z0-9]*(?:[A-Z][a-z]+)+)\b", query):
393
+ if m.group(1) and len(m.group(1)) >= 3:
394
+ tokens.add(m.group(1).lower())
395
+
396
+ # Split camelCase/PascalCase
397
+ camel_split = _split_camel_case(query)
398
+ # Replace underscores and dots with spaces
399
+ normalised = re.sub(r"[_.]+", " ", camel_split)
400
+ # Split on non-alphanumeric
401
+ words = re.split(r"[^a-zA-Z0-9]+", normalised)
402
+
403
+ for word in words:
404
+ lower = word.lower()
405
+ if len(lower) < 3:
406
+ continue
407
+ if lower in _STOP_WORDS:
408
+ continue
409
+ tokens.add(lower)
410
+
411
+ # Generate stem variants
412
+ stems: set[str] = set()
413
+ for token in list(tokens):
414
+ for variant in _get_stem_variants(token):
415
+ if variant not in tokens and variant not in _STOP_WORDS:
416
+ stems.add(variant)
417
+ tokens |= stems
418
+
419
+ return list(tokens)
420
+
421
+
422
+ def _build_fts_query(terms: list[str]) -> str:
423
+ """Build FTS5 query string with prefix matching.
424
+ E.g., ['auth', 'service'] → '"auth"* OR "service"*'
425
+ """
426
+ parts = []
427
+ for term in terms:
428
+ # Escape special FTS5 characters
429
+ cleaned = re.sub(r"['\"*():^]", "", term)
430
+ if cleaned:
431
+ parts.append(f'"{cleaned}"*')
432
+ return " OR ".join(parts) if parts else ""
433
+
434
+
435
+ def _bounded_edit_distance(a: str, b: str, max_dist: int) -> int:
436
+ """Damerau-Levenshtein bounded edit distance. Returns max_dist+1 if exceeded."""
437
+ if a == b:
438
+ return 0
439
+ al, bl = len(a), len(b)
440
+ if abs(al - bl) > max_dist:
441
+ return max_dist + 1
442
+ if al == 0:
443
+ return bl
444
+ if bl == 0:
445
+ return al
446
+
447
+ prev = list(range(bl + 1))
448
+ cur = [0] * (bl + 1)
449
+
450
+ for i in range(1, al + 1):
451
+ cur[0] = i
452
+ row_min = cur[0]
453
+ for j in range(1, bl + 1):
454
+ cost = 0 if a[i - 1] == b[j - 1] else 1
455
+ cur[j] = min(cur[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost)
456
+ if cur[j] < row_min:
457
+ row_min = cur[j]
458
+ if row_min > max_dist:
459
+ return max_dist + 1
460
+ prev, cur = cur, prev
461
+
462
+ return prev[bl]
463
+
464
+
465
+ def _kind_bonus(kind: str) -> int:
466
+ """Kind-based bonus for search ranking."""
467
+ bonuses = {
468
+ "function": 10,
469
+ "method": 10,
470
+ "class": 8,
471
+ "interface": 9,
472
+ "type_alias": 6,
473
+ "struct": 6,
474
+ "trait": 9,
475
+ "enum": 5,
476
+ "property": 3,
477
+ "field": 3,
478
+ "variable": 2,
479
+ "constant": 3,
480
+ "import": 1,
481
+ "file": 0,
482
+ }
483
+ return bonuses.get(kind, 0)
484
+
485
+
486
+ def _exported_bonus(node) -> int:
487
+ """Bonus for exported/public symbols."""
488
+ if hasattr(node, "is_exported") and node.is_exported:
489
+ return 5
490
+ if hasattr(node, "visibility") and node.visibility == "private":
491
+ return -3
492
+ return 0
493
+
494
+
495
+ def _name_match_bonus(node_name: str, query: str) -> int:
496
+ """Bonus when a node's name matches the search query."""
497
+ name_lower = node_name.lower()
498
+ query_lower = query.lower()
499
+
500
+ # Exact match
501
+ if name_lower == query_lower:
502
+ return 80
503
+
504
+ # Name starts with query
505
+ if name_lower.startswith(query_lower):
506
+ ratio = len(query_lower) / len(name_lower) if name_lower else 0
507
+ return round(10 + 30 * ratio)
508
+
509
+ # Name contains query
510
+ if name_lower.find(query_lower) != -1:
511
+ return 10
512
+
513
+ return 0
514
+
515
+
516
+ def search_nodes_fts(
517
+ conn: sqlite3.Connection, query: str, limit: int = 10
518
+ ) -> list[tuple[Node, float]]:
519
+ """Full-text search with BM25 + multi-signal scoring.
520
+
521
+ 3-tier search strategy:
522
+ 1. FTS5 with prefix matching and BM25 column weights
523
+ 2. LIKE-based substring fallback (for camelCase matching)
524
+ 3. Fuzzy edit-distance fallback (for typos)
525
+
526
+ Post-hoc scoring adds: kind bonus, name match bonus.
527
+ """
528
+ if not query or not query.strip():
529
+ return []
530
+
531
+ terms = _extract_search_terms(query)
532
+ if not terms:
533
+ return []
534
+
535
+ # ── Tier 1: FTS5 with prefix matching ─────────────────────────────────
536
+ fts_query_str = _build_fts_query(terms)
537
+ results: list[tuple[Node, float]] = []
538
+ seen_ids: set[str] = set()
539
+
540
+ if fts_query_str:
541
+ fts_limit = max(limit * 5, 100)
542
+ try:
543
+ rows = conn.execute(
544
+ """
545
+ SELECT nodes.*, bm25(nodes_fts, 0, 20, 5, 1, 2) as score
546
+ FROM nodes_fts
547
+ JOIN nodes ON nodes_fts.id = nodes.id
548
+ WHERE nodes_fts MATCH ?
549
+ ORDER BY score LIMIT ?
550
+ """,
551
+ (fts_query_str, fts_limit),
552
+ ).fetchall()
553
+ for row in rows:
554
+ node = row_to_node(row)
555
+ score = abs(row["score"]) # bm25 returns negative scores
556
+ results.append((node, score))
557
+ seen_ids.add(node.id)
558
+ except Exception:
559
+ pass # FTS query failed, fall through
560
+
561
+ # ── Tier 2: LIKE-based substring search ───────────────────────────────
562
+ if len(results) < limit:
563
+ like_query = query.strip()
564
+ try:
565
+ rows = conn.execute(
566
+ """
567
+ SELECT nodes.*,
568
+ CASE
569
+ WHEN lower(name) = lower(?) THEN 1.0
570
+ WHEN lower(name) LIKE lower(?) THEN 0.9
571
+ WHEN lower(qualified_name) LIKE lower(?) THEN 0.7
572
+ ELSE 0.5
573
+ END as score
574
+ FROM nodes
575
+ WHERE (lower(name) LIKE lower(?) OR lower(qualified_name) LIKE lower(?))
576
+ AND id NOT IN ({})
577
+ ORDER BY score DESC, length(name) ASC LIMIT ?
578
+ """.format(",".join("?" * len(seen_ids)) if seen_ids else "NULL"),
579
+ [
580
+ like_query,
581
+ f"{like_query}%",
582
+ f"%{like_query}%",
583
+ f"%{like_query}%",
584
+ f"{like_query}%",
585
+ ]
586
+ + list(seen_ids)
587
+ + [limit * 3],
588
+ ).fetchall()
589
+ for row in rows:
590
+ node = row_to_node(row)
591
+ results.append((node, row["score"]))
592
+ seen_ids.add(node.id)
593
+ except Exception:
594
+ pass
595
+
596
+ # ── Tier 3: Fuzzy edit-distance fallback ──────────────────────────────
597
+ if len(results) < limit and len(query.strip()) >= 3:
598
+ query_lower = query.strip().lower()
599
+ max_dist = 1 if len(query_lower) <= 4 else 2
600
+ all_names = get_all_node_names(conn)
601
+ candidates = []
602
+ for name in all_names:
603
+ dist = _bounded_edit_distance(name.lower(), query_lower, max_dist)
604
+ if dist <= max_dist:
605
+ candidates.append((name, dist))
606
+ candidates.sort(key=lambda x: x[1])
607
+
608
+ fuzzy_limit = max(limit * 2, 50)
609
+ for name, dist in candidates[:fuzzy_limit]:
610
+ if len(results) >= limit:
611
+ break
612
+ try:
613
+ rows = conn.execute(
614
+ "SELECT * FROM nodes WHERE name = ? LIMIT 5",
615
+ (name,),
616
+ ).fetchall()
617
+ for row in rows:
618
+ if row["id"] not in seen_ids:
619
+ node = row_to_node(row)
620
+ results.append((node, 1.0 / (1 + dist)))
621
+ seen_ids.add(node.id)
622
+ except Exception:
623
+ pass
624
+
625
+ # ── Post-hoc scoring ──────────────────────────────────────────────────
626
+ if results:
627
+ scored = []
628
+ for node, score in results:
629
+ text = query.strip()
630
+ final_score = (
631
+ score
632
+ + _kind_bonus(node.kind.value)
633
+ + _name_match_bonus(node.name, text)
634
+ + _exported_bonus(node)
635
+ )
636
+ scored.append((node, final_score))
637
+ scored.sort(key=lambda x: x[1], reverse=True)
638
+ results = scored[:limit]
639
+
640
+ return results
641
+
642
+
643
+ # — Delta indexing helpers —
644
+
645
+
646
+ def get_nodes_by_file(conn: sqlite3.Connection, file_path: str) -> list:
647
+ """Fetch all nodes for a given file path."""
648
+ rows = conn.execute("SELECT * FROM nodes WHERE file_path = ?", (file_path,)).fetchall()
649
+ return [row_to_node(r) for r in rows]
650
+
651
+
652
+ def get_node_by_qualified_name(conn: sqlite3.Connection, qualified_name: str) -> Node | None:
653
+ """Fetch a node by qualified name."""
654
+ row = conn.execute("SELECT * FROM nodes WHERE qualified_name = ?", (qualified_name,)).fetchone()
655
+ return row_to_node(row) if row else None
656
+
657
+
658
+ def delete_node_and_edges(conn: sqlite3.Connection, node_id: str) -> None:
659
+ """Delete a node and all edges touching it."""
660
+ conn.execute("DELETE FROM edges WHERE source_id = ? OR target_id = ?", (node_id, node_id))
661
+ conn.execute("DELETE FROM nodes WHERE id = ?", (node_id,))
662
+
663
+
664
+ def delete_edges_by_source(conn: sqlite3.Connection, node_id: str) -> None:
665
+ """Delete all outgoing edges from a node."""
666
+ conn.execute("DELETE FROM edges WHERE source_id = ?", (node_id,))
667
+
668
+
669
+ def get_incoming_edges_to_node(conn: sqlite3.Connection, node_id: str) -> list:
670
+ """Fetch all edges targeting a node."""
671
+ rows = conn.execute("SELECT * FROM edges WHERE target_id = ?", (node_id,)).fetchall()
672
+ return rows
673
+
674
+
675
+ def insert_file_node_dep(conn: sqlite3.Connection, file_path: str, node_id: str) -> None:
676
+ """Insert a file-to-node dependency mapping."""
677
+ conn.execute(
678
+ "INSERT OR IGNORE INTO file_node_deps (file_path, node_id) VALUES (?, ?)",
679
+ (file_path, node_id),
680
+ )
681
+
682
+
683
+ def get_files_referencing_node(conn: sqlite3.Connection, node_id: str) -> list[str]:
684
+ """Get file paths that reference a given node."""
685
+ rows = conn.execute(
686
+ "SELECT file_path FROM file_node_deps WHERE node_id = ?", (node_id,)
687
+ ).fetchall()
688
+ return [r[0] for r in rows]
689
+
690
+
691
+ def count_ghost_edges(conn: sqlite3.Connection) -> int:
692
+ """Count edges whose target node no longer exists (integrity check)."""
693
+ row = conn.execute(
694
+ "SELECT COUNT(*) FROM edges WHERE target_id NOT IN (SELECT id FROM nodes)"
695
+ ).fetchone()
696
+ return row[0] if row else 0