ontosight-codegraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ontosight_codegraph/__init__.py +38 -0
- ontosight_codegraph/cli.py +96 -0
- ontosight_codegraph/query.py +59 -0
- ontosight_codegraph/store.py +589 -0
- ontosight_codegraph/topology.py +262 -0
- ontosight_codegraph/topology_display.py +102 -0
- ontosight_codegraph/view.py +267 -0
- ontosight_codegraph-0.1.0.dist-info/METADATA +71 -0
- ontosight_codegraph-0.1.0.dist-info/RECORD +11 -0
- ontosight_codegraph-0.1.0.dist-info/WHEEL +4 -0
- ontosight_codegraph-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,589 @@
|
|
|
1
|
+
"""Read CodeGraph SQLite index and build call-graph subgraphs for OntoSight."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import sqlite3
|
|
7
|
+
from collections import deque
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Callable, Iterable, List, Optional, Sequence, Set, Tuple
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
EXCLUDED_NODE_KINDS = frozenset({"parameter", "import", "export"})
|
|
15
|
+
CALL_EDGE_KIND = "calls"
|
|
16
|
+
INIT_HINT = "Run: npx @colbymchenry/codegraph init -i"
|
|
17
|
+
|
|
18
|
+
NODE_SELECT = """
|
|
19
|
+
SELECT id, kind, name, qualified_name, file_path, language,
|
|
20
|
+
start_line, end_line, signature, docstring
|
|
21
|
+
FROM nodes
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CodeGraphNotFoundError(FileNotFoundError):
|
|
26
|
+
"""Raised when `.codegraph/codegraph.db` is missing."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CodeSymbolNode(BaseModel):
|
|
30
|
+
"""A code symbol from the CodeGraph index."""
|
|
31
|
+
|
|
32
|
+
id: str
|
|
33
|
+
kind: str
|
|
34
|
+
name: str
|
|
35
|
+
qualified_name: str
|
|
36
|
+
file_path: str
|
|
37
|
+
language: str
|
|
38
|
+
start_line: int
|
|
39
|
+
end_line: int = 0
|
|
40
|
+
signature: Optional[str] = None
|
|
41
|
+
docstring: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CodeCallEdge(BaseModel):
|
|
45
|
+
"""A call edge between two symbols."""
|
|
46
|
+
|
|
47
|
+
id: str
|
|
48
|
+
source_id: str
|
|
49
|
+
target_id: str
|
|
50
|
+
line: Optional[int] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class SubgraphResult:
|
|
55
|
+
"""Extracted call subgraph plus metadata for CLI / viewer."""
|
|
56
|
+
|
|
57
|
+
nodes: List[CodeSymbolNode]
|
|
58
|
+
edges: List[CodeCallEdge]
|
|
59
|
+
truncated: bool
|
|
60
|
+
filter_summary: str
|
|
61
|
+
languages: List[str]
|
|
62
|
+
seed_ids: List[str]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def resolve_codegraph_db(project_path: Path) -> Path:
|
|
66
|
+
"""Return path to CodeGraph database or raise with init hint."""
|
|
67
|
+
root = project_path.resolve()
|
|
68
|
+
db_path = root / ".codegraph" / "codegraph.db"
|
|
69
|
+
if not db_path.is_file():
|
|
70
|
+
raise CodeGraphNotFoundError(
|
|
71
|
+
f"CodeGraph index not found at {db_path}. {INIT_HINT}"
|
|
72
|
+
)
|
|
73
|
+
return db_path
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _row_to_node(row: sqlite3.Row) -> CodeSymbolNode:
|
|
77
|
+
return CodeSymbolNode(
|
|
78
|
+
id=row["id"],
|
|
79
|
+
kind=row["kind"],
|
|
80
|
+
name=row["name"],
|
|
81
|
+
qualified_name=row["qualified_name"],
|
|
82
|
+
file_path=row["file_path"],
|
|
83
|
+
language=row["language"],
|
|
84
|
+
start_line=row["start_line"],
|
|
85
|
+
end_line=row["end_line"] or 0,
|
|
86
|
+
signature=row["signature"],
|
|
87
|
+
docstring=row["docstring"],
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _normalize_path_prefix(path_filter: Optional[str]) -> Optional[str]:
|
|
92
|
+
if not path_filter:
|
|
93
|
+
return None
|
|
94
|
+
cleaned = path_filter.replace("\\", "/").strip("/")
|
|
95
|
+
return cleaned or None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _tokenize_task(task: str) -> List[str]:
|
|
99
|
+
tokens = re.findall(r"[A-Za-z_][A-Za-z0-9_]*|[A-Za-z]{3,}", task.lower())
|
|
100
|
+
return [t for t in tokens if len(t) >= 2]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class CodeGraphStore:
|
|
104
|
+
"""Read-only access to a CodeGraph SQLite database."""
|
|
105
|
+
|
|
106
|
+
def __init__(self, db_path: Path):
|
|
107
|
+
self.db_path = db_path
|
|
108
|
+
self._conn = sqlite3.connect(str(db_path))
|
|
109
|
+
self._conn.row_factory = sqlite3.Row
|
|
110
|
+
|
|
111
|
+
def close(self) -> None:
|
|
112
|
+
self._conn.close()
|
|
113
|
+
|
|
114
|
+
def __enter__(self) -> "CodeGraphStore":
|
|
115
|
+
return self
|
|
116
|
+
|
|
117
|
+
def __exit__(self, *args: object) -> None:
|
|
118
|
+
self.close()
|
|
119
|
+
|
|
120
|
+
def _path_clause(self, path_filter: Optional[str]) -> Tuple[str, List[str]]:
|
|
121
|
+
prefix = _normalize_path_prefix(path_filter)
|
|
122
|
+
if not prefix:
|
|
123
|
+
return "", []
|
|
124
|
+
return " AND file_path LIKE ? ESCAPE '\\'", [f"{prefix}%"]
|
|
125
|
+
|
|
126
|
+
def _eligible_node_clause(self) -> str:
|
|
127
|
+
placeholders = ",".join("?" * len(EXCLUDED_NODE_KINDS))
|
|
128
|
+
return f" AND kind NOT IN ({placeholders})"
|
|
129
|
+
|
|
130
|
+
def fetch_nodes_by_ids(self, node_ids: Set[str]) -> List[CodeSymbolNode]:
|
|
131
|
+
if not node_ids:
|
|
132
|
+
return []
|
|
133
|
+
placeholders = ",".join("?" * len(node_ids))
|
|
134
|
+
query = f"{NODE_SELECT} WHERE id IN ({placeholders})"
|
|
135
|
+
rows = self._conn.execute(query, list(node_ids)).fetchall()
|
|
136
|
+
return [_row_to_node(r) for r in rows]
|
|
137
|
+
|
|
138
|
+
def fetch_call_edges_for_nodes(self, node_ids: Set[str]) -> List[CodeCallEdge]:
|
|
139
|
+
if not node_ids:
|
|
140
|
+
return []
|
|
141
|
+
placeholders = ",".join("?" * len(node_ids))
|
|
142
|
+
query = f"""
|
|
143
|
+
SELECT rowid AS edge_rowid, source, target, line
|
|
144
|
+
FROM edges
|
|
145
|
+
WHERE kind = ?
|
|
146
|
+
AND source IN ({placeholders})
|
|
147
|
+
AND target IN ({placeholders})
|
|
148
|
+
"""
|
|
149
|
+
params: List[object] = [CALL_EDGE_KIND, *node_ids, *node_ids]
|
|
150
|
+
rows = self._conn.execute(query, params).fetchall()
|
|
151
|
+
edges: List[CodeCallEdge] = []
|
|
152
|
+
for row in rows:
|
|
153
|
+
edges.append(
|
|
154
|
+
CodeCallEdge(
|
|
155
|
+
id=f"call:{row['source']}:{row['target']}:{row['edge_rowid']}",
|
|
156
|
+
source_id=row["source"],
|
|
157
|
+
target_id=row["target"],
|
|
158
|
+
line=row["line"],
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
return edges
|
|
162
|
+
|
|
163
|
+
def _load_adjacency(
|
|
164
|
+
self, path_filter: Optional[str]
|
|
165
|
+
) -> Tuple[dict[str, Set[str]], dict[str, int]]:
|
|
166
|
+
"""Build undirected adjacency and in-degree for call edges within path filter."""
|
|
167
|
+
path_sql, path_params = self._path_clause(path_filter)
|
|
168
|
+
kind_params = list(EXCLUDED_NODE_KINDS)
|
|
169
|
+
node_query = f"""
|
|
170
|
+
SELECT id FROM nodes
|
|
171
|
+
WHERE 1=1{self._eligible_node_clause()}{path_sql}
|
|
172
|
+
"""
|
|
173
|
+
allowed = {
|
|
174
|
+
row["id"]
|
|
175
|
+
for row in self._conn.execute(node_query, kind_params + path_params).fetchall()
|
|
176
|
+
}
|
|
177
|
+
if not allowed:
|
|
178
|
+
return {}, {}
|
|
179
|
+
|
|
180
|
+
placeholders = ",".join("?" * len(allowed))
|
|
181
|
+
edge_query = f"""
|
|
182
|
+
SELECT source, target FROM edges
|
|
183
|
+
WHERE kind = ?
|
|
184
|
+
AND source IN ({placeholders})
|
|
185
|
+
AND target IN ({placeholders})
|
|
186
|
+
"""
|
|
187
|
+
params: List[object] = [CALL_EDGE_KIND, *allowed, *allowed]
|
|
188
|
+
adjacency: dict[str, Set[str]] = {nid: set() for nid in allowed}
|
|
189
|
+
in_degree: dict[str, int] = {nid: 0 for nid in allowed}
|
|
190
|
+
|
|
191
|
+
for row in self._conn.execute(edge_query, params).fetchall():
|
|
192
|
+
src, tgt = row["source"], row["target"]
|
|
193
|
+
adjacency[src].add(tgt)
|
|
194
|
+
adjacency[tgt].add(src)
|
|
195
|
+
in_degree[tgt] = in_degree.get(tgt, 0) + 1
|
|
196
|
+
|
|
197
|
+
return adjacency, in_degree
|
|
198
|
+
|
|
199
|
+
def find_symbol_seeds(self, symbol: str, path_filter: Optional[str]) -> List[str]:
|
|
200
|
+
path_sql, path_params = self._path_clause(path_filter)
|
|
201
|
+
kind_params = list(EXCLUDED_NODE_KINDS)
|
|
202
|
+
exact_query = f"""
|
|
203
|
+
SELECT id FROM nodes
|
|
204
|
+
WHERE (name = ? OR qualified_name = ?)
|
|
205
|
+
{self._eligible_node_clause()}{path_sql}
|
|
206
|
+
ORDER BY start_line
|
|
207
|
+
LIMIT 10
|
|
208
|
+
"""
|
|
209
|
+
params: List[object] = [symbol, symbol, *kind_params, *path_params]
|
|
210
|
+
rows = self._conn.execute(exact_query, params).fetchall()
|
|
211
|
+
if rows:
|
|
212
|
+
return [row["id"] for row in rows]
|
|
213
|
+
|
|
214
|
+
like_query = f"""
|
|
215
|
+
SELECT id FROM nodes
|
|
216
|
+
WHERE (name LIKE ? OR qualified_name LIKE ?)
|
|
217
|
+
{self._eligible_node_clause()}{path_sql}
|
|
218
|
+
ORDER BY start_line
|
|
219
|
+
LIMIT 10
|
|
220
|
+
"""
|
|
221
|
+
like = f"%{symbol}%"
|
|
222
|
+
params = [like, like, *kind_params, *path_params]
|
|
223
|
+
rows = self._conn.execute(like_query, params).fetchall()
|
|
224
|
+
return [row["id"] for row in rows]
|
|
225
|
+
|
|
226
|
+
def find_task_seeds(self, task: str, path_filter: Optional[str], limit: int = 5) -> List[str]:
|
|
227
|
+
tokens = _tokenize_task(task)
|
|
228
|
+
if not tokens:
|
|
229
|
+
return self.find_symbol_seeds(task, path_filter)
|
|
230
|
+
|
|
231
|
+
scores: dict[str, int] = {}
|
|
232
|
+
path_sql, path_params = self._path_clause(path_filter)
|
|
233
|
+
kind_params = list(EXCLUDED_NODE_KINDS)
|
|
234
|
+
|
|
235
|
+
for token in tokens:
|
|
236
|
+
query = f"""
|
|
237
|
+
SELECT id, name, qualified_name, docstring FROM nodes
|
|
238
|
+
WHERE (LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?
|
|
239
|
+
OR LOWER(COALESCE(docstring, '')) LIKE ?)
|
|
240
|
+
{self._eligible_node_clause()}{path_sql}
|
|
241
|
+
LIMIT 50
|
|
242
|
+
"""
|
|
243
|
+
pattern = f"%{token}%"
|
|
244
|
+
params: List[object] = [pattern, pattern, pattern, *kind_params, *path_params]
|
|
245
|
+
for row in self._conn.execute(query, params).fetchall():
|
|
246
|
+
nid = row["id"]
|
|
247
|
+
text = " ".join(
|
|
248
|
+
filter(
|
|
249
|
+
None,
|
|
250
|
+
[row["name"], row["qualified_name"], row["docstring"] or ""],
|
|
251
|
+
)
|
|
252
|
+
).lower()
|
|
253
|
+
score = sum(1 for t in tokens if t in text)
|
|
254
|
+
scores[nid] = scores.get(nid, 0) + score
|
|
255
|
+
|
|
256
|
+
ranked = sorted(scores.items(), key=lambda item: (-item[1], item[0]))
|
|
257
|
+
return [nid for nid, _ in ranked[:limit]]
|
|
258
|
+
|
|
259
|
+
def pick_auto_seeds(self, path_filter: Optional[str], limit: int = 3) -> List[str]:
|
|
260
|
+
_, in_degree = self._load_adjacency(path_filter)
|
|
261
|
+
if not in_degree:
|
|
262
|
+
return []
|
|
263
|
+
ranked = sorted(in_degree.items(), key=lambda item: (-item[1], item[0]))
|
|
264
|
+
return [nid for nid, deg in ranked[:limit] if deg > 0] or [ranked[0][0]]
|
|
265
|
+
|
|
266
|
+
def search_symbols(
|
|
267
|
+
self, query: str, path_filter: Optional[str], limit: int = 10
|
|
268
|
+
) -> List[CodeSymbolNode]:
|
|
269
|
+
path_sql, path_params = self._path_clause(path_filter)
|
|
270
|
+
kind_params = list(EXCLUDED_NODE_KINDS)
|
|
271
|
+
pattern = f"%{query.strip()}%"
|
|
272
|
+
sql = f"""
|
|
273
|
+
SELECT id, kind, name, qualified_name, file_path, language,
|
|
274
|
+
start_line, end_line, signature, docstring
|
|
275
|
+
FROM nodes
|
|
276
|
+
WHERE (name LIKE ? OR qualified_name LIKE ?
|
|
277
|
+
OR COALESCE(signature, '') LIKE ?
|
|
278
|
+
OR COALESCE(docstring, '') LIKE ?)
|
|
279
|
+
{self._eligible_node_clause()}{path_sql}
|
|
280
|
+
ORDER BY
|
|
281
|
+
CASE WHEN name = ? THEN 0
|
|
282
|
+
WHEN name LIKE ? THEN 1
|
|
283
|
+
ELSE 2 END,
|
|
284
|
+
start_line
|
|
285
|
+
LIMIT ?
|
|
286
|
+
"""
|
|
287
|
+
exact = query.strip()
|
|
288
|
+
params: List[object] = [
|
|
289
|
+
pattern,
|
|
290
|
+
pattern,
|
|
291
|
+
pattern,
|
|
292
|
+
pattern,
|
|
293
|
+
*kind_params,
|
|
294
|
+
*path_params,
|
|
295
|
+
exact,
|
|
296
|
+
f"{exact}%",
|
|
297
|
+
limit,
|
|
298
|
+
]
|
|
299
|
+
rows = self._conn.execute(sql, params).fetchall()
|
|
300
|
+
return [_row_to_node(r) for r in rows]
|
|
301
|
+
|
|
302
|
+
def expand_subgraph(
|
|
303
|
+
self,
|
|
304
|
+
seed_ids: Sequence[str],
|
|
305
|
+
*,
|
|
306
|
+
path_filter: Optional[str],
|
|
307
|
+
hops: int,
|
|
308
|
+
max_nodes: int,
|
|
309
|
+
) -> Tuple[Set[str], bool]:
|
|
310
|
+
adjacency, _ = self._load_adjacency(path_filter)
|
|
311
|
+
if not adjacency:
|
|
312
|
+
return set(seed_ids), False
|
|
313
|
+
|
|
314
|
+
visited: Set[str] = set()
|
|
315
|
+
queue: deque[Tuple[str, int]] = deque()
|
|
316
|
+
|
|
317
|
+
for seed in seed_ids:
|
|
318
|
+
if seed in adjacency:
|
|
319
|
+
visited.add(seed)
|
|
320
|
+
queue.append((seed, 0))
|
|
321
|
+
|
|
322
|
+
if not visited:
|
|
323
|
+
# Seeds may be outside path filter; include them alone.
|
|
324
|
+
return set(seed_ids), False
|
|
325
|
+
|
|
326
|
+
truncated = False
|
|
327
|
+
while queue:
|
|
328
|
+
node_id, depth = queue.popleft()
|
|
329
|
+
if depth >= hops:
|
|
330
|
+
continue
|
|
331
|
+
for neighbor in adjacency.get(node_id, ()):
|
|
332
|
+
if neighbor in visited:
|
|
333
|
+
continue
|
|
334
|
+
if len(visited) >= max_nodes:
|
|
335
|
+
truncated = True
|
|
336
|
+
return visited, truncated
|
|
337
|
+
visited.add(neighbor)
|
|
338
|
+
queue.append((neighbor, depth + 1))
|
|
339
|
+
|
|
340
|
+
return visited, truncated
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _build_filter_summary(
|
|
344
|
+
*,
|
|
345
|
+
path_filter: Optional[str],
|
|
346
|
+
symbol: Optional[str],
|
|
347
|
+
task: Optional[str],
|
|
348
|
+
seed_ids: Sequence[str],
|
|
349
|
+
) -> str:
|
|
350
|
+
parts: List[str] = []
|
|
351
|
+
if path_filter:
|
|
352
|
+
parts.append(f"path={path_filter}")
|
|
353
|
+
if symbol:
|
|
354
|
+
parts.append(f"symbol={symbol}")
|
|
355
|
+
elif task:
|
|
356
|
+
parts.append(f"task={task!r}")
|
|
357
|
+
elif seed_ids:
|
|
358
|
+
parts.append(f"seeds={len(seed_ids)}")
|
|
359
|
+
else:
|
|
360
|
+
parts.append("auto-seed")
|
|
361
|
+
return ", ".join(parts)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def load_call_subgraph(
|
|
365
|
+
project_path: Path,
|
|
366
|
+
*,
|
|
367
|
+
path_filter: Optional[str] = None,
|
|
368
|
+
symbol: Optional[str] = None,
|
|
369
|
+
task: Optional[str] = None,
|
|
370
|
+
hops: int = 2,
|
|
371
|
+
max_nodes: int = 200,
|
|
372
|
+
) -> SubgraphResult:
|
|
373
|
+
"""Load a bounded call-graph subgraph from CodeGraph."""
|
|
374
|
+
if hops < 0:
|
|
375
|
+
raise ValueError("hops must be >= 0")
|
|
376
|
+
if max_nodes < 1:
|
|
377
|
+
raise ValueError("max_nodes must be >= 1")
|
|
378
|
+
if symbol and task:
|
|
379
|
+
raise ValueError("Specify only one of symbol or task")
|
|
380
|
+
|
|
381
|
+
db_path = resolve_codegraph_db(project_path)
|
|
382
|
+
|
|
383
|
+
with CodeGraphStore(db_path) as store:
|
|
384
|
+
if symbol:
|
|
385
|
+
seed_ids = store.find_symbol_seeds(symbol, path_filter)
|
|
386
|
+
if not seed_ids:
|
|
387
|
+
raise ValueError(f"No symbol matching {symbol!r} in CodeGraph index")
|
|
388
|
+
elif task:
|
|
389
|
+
seed_ids = store.find_task_seeds(task, path_filter)
|
|
390
|
+
if not seed_ids:
|
|
391
|
+
raise ValueError(f"No symbols matching task {task!r} in CodeGraph index")
|
|
392
|
+
else:
|
|
393
|
+
seed_ids = store.pick_auto_seeds(path_filter)
|
|
394
|
+
|
|
395
|
+
if not seed_ids:
|
|
396
|
+
return SubgraphResult(
|
|
397
|
+
nodes=[],
|
|
398
|
+
edges=[],
|
|
399
|
+
truncated=False,
|
|
400
|
+
filter_summary=_build_filter_summary(
|
|
401
|
+
path_filter=path_filter,
|
|
402
|
+
symbol=symbol,
|
|
403
|
+
task=task,
|
|
404
|
+
seed_ids=seed_ids,
|
|
405
|
+
),
|
|
406
|
+
languages=[],
|
|
407
|
+
seed_ids=[],
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
node_ids, truncated = store.expand_subgraph(
|
|
411
|
+
seed_ids,
|
|
412
|
+
path_filter=path_filter,
|
|
413
|
+
hops=hops,
|
|
414
|
+
max_nodes=max_nodes,
|
|
415
|
+
)
|
|
416
|
+
# Always include seeds even if path filter excluded them from adjacency.
|
|
417
|
+
node_ids.update(seed_ids)
|
|
418
|
+
|
|
419
|
+
nodes = store.fetch_nodes_by_ids(node_ids)
|
|
420
|
+
# Drop excluded kinds unless they are call endpoints.
|
|
421
|
+
edges = store.fetch_call_edges_for_nodes(node_ids)
|
|
422
|
+
endpoint_ids = {e.source_id for e in edges} | {e.target_id for e in edges}
|
|
423
|
+
nodes = [
|
|
424
|
+
n
|
|
425
|
+
for n in nodes
|
|
426
|
+
if n.kind not in EXCLUDED_NODE_KINDS or n.id in endpoint_ids
|
|
427
|
+
]
|
|
428
|
+
allowed_ids = {n.id for n in nodes}
|
|
429
|
+
edges = [
|
|
430
|
+
e
|
|
431
|
+
for e in edges
|
|
432
|
+
if e.source_id in allowed_ids and e.target_id in allowed_ids
|
|
433
|
+
]
|
|
434
|
+
languages = sorted({n.language for n in nodes if n.language})
|
|
435
|
+
|
|
436
|
+
return SubgraphResult(
|
|
437
|
+
nodes=nodes,
|
|
438
|
+
edges=edges,
|
|
439
|
+
truncated=truncated,
|
|
440
|
+
filter_summary=_build_filter_summary(
|
|
441
|
+
path_filter=path_filter,
|
|
442
|
+
symbol=symbol,
|
|
443
|
+
task=task,
|
|
444
|
+
seed_ids=seed_ids,
|
|
445
|
+
),
|
|
446
|
+
languages=languages,
|
|
447
|
+
seed_ids=list(seed_ids),
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
_QUERY_KEY_ALIASES = {
|
|
452
|
+
"path": "path_filter",
|
|
453
|
+
"max-nodes": "max_nodes",
|
|
454
|
+
"max_nodes": "max_nodes",
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def parse_codegraph_query(query: str) -> dict:
|
|
459
|
+
"""Parse CLI-style CodeGraph query strings into load_call_subgraph kwargs.
|
|
460
|
+
|
|
461
|
+
Supported forms:
|
|
462
|
+
symbol=load_call_subgraph path=hyperextract/ hops=3 max_nodes=500
|
|
463
|
+
symbol:load_call_subgraph task="fix parser bug"
|
|
464
|
+
"""
|
|
465
|
+
if not query or not query.strip():
|
|
466
|
+
return {}
|
|
467
|
+
|
|
468
|
+
text = query.strip()
|
|
469
|
+
result: dict = {}
|
|
470
|
+
i = 0
|
|
471
|
+
length = len(text)
|
|
472
|
+
|
|
473
|
+
def skip_ws() -> None:
|
|
474
|
+
nonlocal i
|
|
475
|
+
while i < length and text[i].isspace():
|
|
476
|
+
i += 1
|
|
477
|
+
|
|
478
|
+
def read_quoted() -> str:
|
|
479
|
+
nonlocal i
|
|
480
|
+
quote = text[i]
|
|
481
|
+
i += 1
|
|
482
|
+
start = i
|
|
483
|
+
while i < length and text[i] != quote:
|
|
484
|
+
if text[i] == "\\" and i + 1 < length:
|
|
485
|
+
i += 2
|
|
486
|
+
continue
|
|
487
|
+
i += 1
|
|
488
|
+
value = text[start:i]
|
|
489
|
+
if i < length:
|
|
490
|
+
i += 1
|
|
491
|
+
return value
|
|
492
|
+
|
|
493
|
+
def read_token() -> str:
|
|
494
|
+
nonlocal i
|
|
495
|
+
start = i
|
|
496
|
+
while i < length and not text[i].isspace() and text[i] not in "=:":
|
|
497
|
+
i += 1
|
|
498
|
+
return text[start:i]
|
|
499
|
+
|
|
500
|
+
while i < length:
|
|
501
|
+
skip_ws()
|
|
502
|
+
if i >= length:
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
key = read_token()
|
|
506
|
+
if not key:
|
|
507
|
+
i += 1
|
|
508
|
+
continue
|
|
509
|
+
|
|
510
|
+
skip_ws()
|
|
511
|
+
if i >= length or text[i] not in "=:":
|
|
512
|
+
raise ValueError(f"Expected '=' or ':' after key {key!r}")
|
|
513
|
+
i += 1
|
|
514
|
+
skip_ws()
|
|
515
|
+
|
|
516
|
+
if i < length and text[i] in "\"'":
|
|
517
|
+
value = read_quoted()
|
|
518
|
+
else:
|
|
519
|
+
value = read_token()
|
|
520
|
+
|
|
521
|
+
canonical = _QUERY_KEY_ALIASES.get(key.lower(), key.lower())
|
|
522
|
+
if canonical in ("hops", "max_nodes"):
|
|
523
|
+
try:
|
|
524
|
+
result[canonical] = int(value)
|
|
525
|
+
except ValueError as exc:
|
|
526
|
+
raise ValueError(f"Invalid integer for {key}: {value!r}") from exc
|
|
527
|
+
else:
|
|
528
|
+
result[canonical] = value
|
|
529
|
+
|
|
530
|
+
if "symbol" in result and "task" in result:
|
|
531
|
+
raise ValueError("Specify only one of symbol or task")
|
|
532
|
+
return result
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def merge_query_params(
|
|
536
|
+
*,
|
|
537
|
+
query: Optional[str] = None,
|
|
538
|
+
path_filter: Optional[str] = None,
|
|
539
|
+
symbol: Optional[str] = None,
|
|
540
|
+
task: Optional[str] = None,
|
|
541
|
+
hops: Optional[int] = None,
|
|
542
|
+
max_nodes: Optional[int] = None,
|
|
543
|
+
default_path_filter: Optional[str] = None,
|
|
544
|
+
) -> dict:
|
|
545
|
+
"""Merge structured fields with an optional query string (structured wins)."""
|
|
546
|
+
merged: dict = {}
|
|
547
|
+
if query and query.strip():
|
|
548
|
+
merged.update(parse_codegraph_query(query))
|
|
549
|
+
|
|
550
|
+
if path_filter is not None:
|
|
551
|
+
merged["path_filter"] = path_filter
|
|
552
|
+
elif default_path_filter and "path_filter" not in merged:
|
|
553
|
+
merged["path_filter"] = default_path_filter
|
|
554
|
+
if symbol is not None:
|
|
555
|
+
merged["symbol"] = symbol
|
|
556
|
+
if task is not None:
|
|
557
|
+
merged["task"] = task
|
|
558
|
+
if hops is not None:
|
|
559
|
+
merged["hops"] = hops
|
|
560
|
+
if max_nodes is not None:
|
|
561
|
+
merged["max_nodes"] = max_nodes
|
|
562
|
+
|
|
563
|
+
if merged.get("symbol") and merged.get("task"):
|
|
564
|
+
raise ValueError("Specify only one of symbol or task")
|
|
565
|
+
return merged
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def make_search_callback(
|
|
569
|
+
project_path: Path,
|
|
570
|
+
path_filter: Optional[str] = None,
|
|
571
|
+
*,
|
|
572
|
+
top_k: int = 10,
|
|
573
|
+
) -> Callable[[str], Tuple[List[CodeSymbolNode], List[CodeCallEdge]]]:
|
|
574
|
+
"""Create OntoSight search callback backed by CodeGraph symbol search."""
|
|
575
|
+
|
|
576
|
+
def search(query: str) -> Tuple[List[CodeSymbolNode], List[CodeCallEdge]]:
|
|
577
|
+
db_path = resolve_codegraph_db(project_path)
|
|
578
|
+
with CodeGraphStore(db_path) as store:
|
|
579
|
+
nodes = store.search_symbols(query, path_filter, limit=top_k)
|
|
580
|
+
node_ids = {n.id for n in nodes}
|
|
581
|
+
edges = store.fetch_call_edges_for_nodes(node_ids)
|
|
582
|
+
edges = [
|
|
583
|
+
e
|
|
584
|
+
for e in edges
|
|
585
|
+
if e.source_id in node_ids and e.target_id in node_ids
|
|
586
|
+
]
|
|
587
|
+
return nodes, edges
|
|
588
|
+
|
|
589
|
+
return search
|