code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
"""Tools 2, 3, 5, 6, 9: query / search / stats helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..context_savings import attach_context_savings, estimate_file_tokens
|
|
10
|
+
from ..embeddings import EmbeddingStore
|
|
11
|
+
from ..graph import _sanitize_name, edge_to_dict, node_to_dict
|
|
12
|
+
from ..hints import generate_hints, get_session
|
|
13
|
+
from ..incremental import get_changed_files, get_db_path, get_staged_and_unstaged
|
|
14
|
+
from ..search import hybrid_search
|
|
15
|
+
from ._common import _BUILTIN_CALL_NAMES, _get_store, _resolve_graph_file_paths
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# Tool 2: get_impact_radius
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
_QUERY_PATTERNS = {
|
|
24
|
+
"callers_of": "Find all functions that call a given function",
|
|
25
|
+
"callees_of": "Find all functions called by a given function",
|
|
26
|
+
"imports_of": "Find all imports of a given file or module",
|
|
27
|
+
"importers_of": "Find all files that import a given file or module",
|
|
28
|
+
"children_of": "Find all nodes contained in a file or class",
|
|
29
|
+
"tests_for": "Find all tests for a given function or class",
|
|
30
|
+
"inheritors_of": "Find all classes that inherit from a given class",
|
|
31
|
+
"file_summary": "Get a summary of all nodes in a file",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_impact_radius(
|
|
36
|
+
changed_files: list[str] | None = None,
|
|
37
|
+
max_depth: int = 2,
|
|
38
|
+
max_results: int = 500,
|
|
39
|
+
repo_root: str | None = None,
|
|
40
|
+
base: str = "HEAD~1",
|
|
41
|
+
detail_level: str = "standard",
|
|
42
|
+
) -> dict[str, Any]:
|
|
43
|
+
"""Analyze the blast radius of changed files.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
changed_files: Explicit list of changed file paths (relative to repo root).
|
|
47
|
+
If omitted, auto-detects from git diff.
|
|
48
|
+
max_depth: How many hops to traverse in the graph (default: 2).
|
|
49
|
+
max_results: Maximum impacted nodes to return (default: 500).
|
|
50
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
51
|
+
base: Git ref for auto-detecting changes (default: HEAD~1).
|
|
52
|
+
detail_level: "standard" (full output) or "minimal" (summary only).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Changed nodes, impacted nodes, impacted files, connecting edges,
|
|
56
|
+
plus ``truncated`` flag and ``total_impacted`` count.
|
|
57
|
+
"""
|
|
58
|
+
store, root = _get_store(repo_root)
|
|
59
|
+
try:
|
|
60
|
+
if changed_files is None:
|
|
61
|
+
changed_files = get_changed_files(root, base)
|
|
62
|
+
if not changed_files:
|
|
63
|
+
changed_files = get_staged_and_unstaged(root)
|
|
64
|
+
|
|
65
|
+
if not changed_files:
|
|
66
|
+
return {
|
|
67
|
+
"status": "ok",
|
|
68
|
+
"summary": "No changed files detected.",
|
|
69
|
+
"changed_nodes": [],
|
|
70
|
+
"impacted_nodes": [],
|
|
71
|
+
"impacted_files": [],
|
|
72
|
+
"truncated": False,
|
|
73
|
+
"total_impacted": 0,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Resolve user-facing paths to the file paths stored in the graph.
|
|
77
|
+
original_tokens = estimate_file_tokens(root, changed_files)
|
|
78
|
+
abs_files = _resolve_graph_file_paths(store, root, changed_files)
|
|
79
|
+
result = store.get_impact_radius(
|
|
80
|
+
abs_files, max_depth=max_depth, max_nodes=max_results
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
changed_dicts = [node_to_dict(n) for n in result["changed_nodes"]]
|
|
84
|
+
impacted_dicts = [node_to_dict(n) for n in result["impacted_nodes"]]
|
|
85
|
+
edge_dicts = [edge_to_dict(e) for e in result["edges"]]
|
|
86
|
+
truncated = result["truncated"]
|
|
87
|
+
total_impacted = result["total_impacted"]
|
|
88
|
+
|
|
89
|
+
summary_parts = [
|
|
90
|
+
f"Blast radius for {len(changed_files)} changed file(s):",
|
|
91
|
+
f" - {len(changed_dicts)} nodes directly changed",
|
|
92
|
+
f" - {len(impacted_dicts)} nodes impacted (within {max_depth} hops)",
|
|
93
|
+
f" - {len(result['impacted_files'])} additional files affected",
|
|
94
|
+
]
|
|
95
|
+
if truncated:
|
|
96
|
+
summary_parts.append(
|
|
97
|
+
f" - Results truncated: showing {len(impacted_dicts)}"
|
|
98
|
+
f" of {total_impacted} impacted nodes"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if detail_level == "minimal":
|
|
102
|
+
impacted_count = len(impacted_dicts)
|
|
103
|
+
if impacted_count > 20:
|
|
104
|
+
risk = "high"
|
|
105
|
+
elif impacted_count > 5:
|
|
106
|
+
risk = "medium"
|
|
107
|
+
else:
|
|
108
|
+
risk = "low"
|
|
109
|
+
key_entities = [
|
|
110
|
+
n["name"] for n in impacted_dicts[:5]
|
|
111
|
+
]
|
|
112
|
+
minimal_response = {
|
|
113
|
+
"status": "ok",
|
|
114
|
+
"summary": "\n".join(summary_parts),
|
|
115
|
+
"risk": risk,
|
|
116
|
+
"impacted_file_count": len(result["impacted_files"]),
|
|
117
|
+
"key_entities": key_entities,
|
|
118
|
+
"truncated": truncated,
|
|
119
|
+
}
|
|
120
|
+
attach_context_savings(minimal_response, original_tokens=original_tokens)
|
|
121
|
+
return minimal_response
|
|
122
|
+
|
|
123
|
+
response = {
|
|
124
|
+
"status": "ok",
|
|
125
|
+
"summary": "\n".join(summary_parts),
|
|
126
|
+
"changed_files": changed_files,
|
|
127
|
+
"changed_nodes": changed_dicts,
|
|
128
|
+
"impacted_nodes": impacted_dicts,
|
|
129
|
+
"impacted_files": result["impacted_files"],
|
|
130
|
+
"edges": edge_dicts,
|
|
131
|
+
"truncated": truncated,
|
|
132
|
+
"total_impacted": total_impacted,
|
|
133
|
+
}
|
|
134
|
+
attach_context_savings(response, original_tokens=original_tokens)
|
|
135
|
+
return response
|
|
136
|
+
finally:
|
|
137
|
+
store.close()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
# Tool 3: query_graph
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def query_graph(
|
|
146
|
+
pattern: str,
|
|
147
|
+
target: str,
|
|
148
|
+
repo_root: str | None = None,
|
|
149
|
+
detail_level: str = "standard",
|
|
150
|
+
) -> dict[str, Any]:
|
|
151
|
+
"""Run a predefined graph query.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
pattern: Query pattern. One of: callers_of, callees_of, imports_of,
|
|
155
|
+
importers_of, children_of, tests_for, inheritors_of, file_summary.
|
|
156
|
+
target: The node name, qualified name, or file path to query about.
|
|
157
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
158
|
+
detail_level: "standard" (full output) or "minimal" (summary only).
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Matching nodes and edges for the query.
|
|
162
|
+
"""
|
|
163
|
+
store, root = _get_store(repo_root)
|
|
164
|
+
try:
|
|
165
|
+
if pattern not in _QUERY_PATTERNS:
|
|
166
|
+
return {
|
|
167
|
+
"status": "error",
|
|
168
|
+
"error": (
|
|
169
|
+
f"Unknown pattern '{pattern}'. "
|
|
170
|
+
f"Available: {list(_QUERY_PATTERNS.keys())}"
|
|
171
|
+
),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
results: list[dict] = []
|
|
175
|
+
edges_out: list[dict] = []
|
|
176
|
+
|
|
177
|
+
# For callers_of, skip common builtins early (bare names only)
|
|
178
|
+
# "Who calls .map()?" returns hundreds of useless hits.
|
|
179
|
+
# Qualified names (e.g. "utils.py::map") bypass this filter.
|
|
180
|
+
if (
|
|
181
|
+
pattern == "callers_of"
|
|
182
|
+
and target in _BUILTIN_CALL_NAMES
|
|
183
|
+
and "::" not in target
|
|
184
|
+
):
|
|
185
|
+
return {
|
|
186
|
+
"status": "ok", "pattern": pattern, "target": target,
|
|
187
|
+
"description": _QUERY_PATTERNS[pattern],
|
|
188
|
+
"summary": (
|
|
189
|
+
f"'{target}' is a common builtin "
|
|
190
|
+
"— callers_of skipped to avoid noise."
|
|
191
|
+
),
|
|
192
|
+
"results": [], "edges": [],
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
# Resolve target - try as-is, then as absolute path, then search.
|
|
196
|
+
# file_summary targets are paths, so skip broad node search.
|
|
197
|
+
node = None
|
|
198
|
+
if pattern != "file_summary":
|
|
199
|
+
node = store.get_node(target)
|
|
200
|
+
if not node:
|
|
201
|
+
abs_target = str(root / target)
|
|
202
|
+
node = store.get_node(abs_target)
|
|
203
|
+
if not node:
|
|
204
|
+
# Search by name
|
|
205
|
+
candidates = store.search_nodes(target, limit=5)
|
|
206
|
+
if len(candidates) == 1:
|
|
207
|
+
node = candidates[0]
|
|
208
|
+
target = node.qualified_name
|
|
209
|
+
elif len(candidates) > 1:
|
|
210
|
+
return {
|
|
211
|
+
"status": "ambiguous",
|
|
212
|
+
"summary": (
|
|
213
|
+
f"Multiple matches for '{target}'. "
|
|
214
|
+
"Please use a qualified name."
|
|
215
|
+
),
|
|
216
|
+
"candidates": [node_to_dict(c) for c in candidates],
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if not node and pattern != "file_summary":
|
|
220
|
+
return {
|
|
221
|
+
"status": "not_found",
|
|
222
|
+
"summary": f"No node found matching '{target}'.",
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
qn = node.qualified_name if node else target
|
|
226
|
+
|
|
227
|
+
if pattern == "callers_of":
|
|
228
|
+
seen_sources: set[str] = set()
|
|
229
|
+
for e in store.get_edges_by_target(qn):
|
|
230
|
+
if e.kind == "CALLS":
|
|
231
|
+
if e.source_qualified not in seen_sources:
|
|
232
|
+
seen_sources.add(e.source_qualified)
|
|
233
|
+
caller = store.get_node(e.source_qualified)
|
|
234
|
+
if caller:
|
|
235
|
+
results.append(node_to_dict(caller))
|
|
236
|
+
edges_out.append(edge_to_dict(e))
|
|
237
|
+
# Fallback: CALLS edges store unqualified target names
|
|
238
|
+
# (e.g. "generateTestCode") while qn is fully qualified
|
|
239
|
+
# (e.g. "file.ts::generateTestCode"). Search by plain name too.
|
|
240
|
+
if node:
|
|
241
|
+
for e in store.search_edges_by_target_name(node.name):
|
|
242
|
+
if e.source_qualified not in seen_sources:
|
|
243
|
+
seen_sources.add(e.source_qualified)
|
|
244
|
+
caller = store.get_node(e.source_qualified)
|
|
245
|
+
if caller:
|
|
246
|
+
results.append(node_to_dict(caller))
|
|
247
|
+
edges_out.append(edge_to_dict(e))
|
|
248
|
+
|
|
249
|
+
elif pattern == "callees_of":
|
|
250
|
+
seen_targets: set[str] = set()
|
|
251
|
+
for e in store.get_edges_by_source(qn):
|
|
252
|
+
if e.kind == "CALLS":
|
|
253
|
+
if e.target_qualified not in seen_targets:
|
|
254
|
+
seen_targets.add(e.target_qualified)
|
|
255
|
+
callee = store.get_node(e.target_qualified)
|
|
256
|
+
if callee:
|
|
257
|
+
results.append(node_to_dict(callee))
|
|
258
|
+
elif "::" not in e.target_qualified:
|
|
259
|
+
results.append({
|
|
260
|
+
"kind": "Function",
|
|
261
|
+
"name": e.target_qualified,
|
|
262
|
+
"qualified_name": e.target_qualified,
|
|
263
|
+
})
|
|
264
|
+
edges_out.append(edge_to_dict(e))
|
|
265
|
+
|
|
266
|
+
elif pattern == "imports_of":
|
|
267
|
+
for e in store.get_edges_by_source(qn):
|
|
268
|
+
if e.kind == "IMPORTS_FROM":
|
|
269
|
+
results.append({"import_target": e.target_qualified})
|
|
270
|
+
edges_out.append(edge_to_dict(e))
|
|
271
|
+
|
|
272
|
+
elif pattern == "importers_of":
|
|
273
|
+
# Find edges where target matches this file.
|
|
274
|
+
# Use resolve() to canonicalize the path, matching how
|
|
275
|
+
# _resolve_module_to_file stores edge targets.
|
|
276
|
+
abs_target = (
|
|
277
|
+
str((root / target).resolve()) if node is None
|
|
278
|
+
else node.file_path
|
|
279
|
+
)
|
|
280
|
+
for e in store.get_edges_by_target(abs_target):
|
|
281
|
+
if e.kind == "IMPORTS_FROM":
|
|
282
|
+
results.append({
|
|
283
|
+
"importer": e.source_qualified,
|
|
284
|
+
"file": e.file_path,
|
|
285
|
+
})
|
|
286
|
+
edges_out.append(edge_to_dict(e))
|
|
287
|
+
|
|
288
|
+
elif pattern == "children_of":
|
|
289
|
+
for e in store.get_edges_by_source(qn):
|
|
290
|
+
if e.kind == "CONTAINS":
|
|
291
|
+
child = store.get_node(e.target_qualified)
|
|
292
|
+
if child:
|
|
293
|
+
results.append(node_to_dict(child))
|
|
294
|
+
|
|
295
|
+
elif pattern == "tests_for":
|
|
296
|
+
for e in store.get_edges_by_target(qn):
|
|
297
|
+
if e.kind == "TESTED_BY":
|
|
298
|
+
test = store.get_node(e.source_qualified)
|
|
299
|
+
if test:
|
|
300
|
+
results.append(node_to_dict(test))
|
|
301
|
+
# Also search by naming convention
|
|
302
|
+
name = node.name if node else target
|
|
303
|
+
test_nodes = store.search_nodes(f"test_{name}", limit=10)
|
|
304
|
+
test_nodes += store.search_nodes(f"Test{name}", limit=10)
|
|
305
|
+
seen = {r.get("qualified_name") for r in results}
|
|
306
|
+
for t in test_nodes:
|
|
307
|
+
if t.qualified_name not in seen and t.is_test:
|
|
308
|
+
results.append(node_to_dict(t))
|
|
309
|
+
|
|
310
|
+
elif pattern == "inheritors_of":
|
|
311
|
+
for e in store.get_edges_by_target(qn):
|
|
312
|
+
if e.kind in ("INHERITS", "IMPLEMENTS"):
|
|
313
|
+
child = store.get_node(e.source_qualified)
|
|
314
|
+
if child:
|
|
315
|
+
results.append(node_to_dict(child))
|
|
316
|
+
edges_out.append(edge_to_dict(e))
|
|
317
|
+
# Fallback: INHERITS/IMPLEMENTS edges store unqualified base names
|
|
318
|
+
# (e.g. "Animal") while qn is fully qualified
|
|
319
|
+
# (e.g. "sample.dart::Animal"). Search by plain name too. See: #87
|
|
320
|
+
if not results and node:
|
|
321
|
+
for kind in ("INHERITS", "IMPLEMENTS"):
|
|
322
|
+
for e in store.search_edges_by_target_name(node.name, kind=kind):
|
|
323
|
+
child = store.get_node(e.source_qualified)
|
|
324
|
+
if child:
|
|
325
|
+
results.append(node_to_dict(child))
|
|
326
|
+
edges_out.append(edge_to_dict(e))
|
|
327
|
+
|
|
328
|
+
elif pattern == "file_summary":
|
|
329
|
+
graph_paths = _resolve_graph_file_paths(store, root, [target])
|
|
330
|
+
for graph_path in graph_paths:
|
|
331
|
+
for n in store.get_nodes_by_file(graph_path):
|
|
332
|
+
results.append(node_to_dict(n))
|
|
333
|
+
|
|
334
|
+
summary = (
|
|
335
|
+
f"Found {len(results)} result(s) "
|
|
336
|
+
f"for {pattern}('{target}')"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if detail_level == "minimal":
|
|
340
|
+
minimal_results = [
|
|
341
|
+
{
|
|
342
|
+
k: r[k]
|
|
343
|
+
for k in ("name", "kind", "file_path")
|
|
344
|
+
if k in r
|
|
345
|
+
}
|
|
346
|
+
for r in results[:5]
|
|
347
|
+
]
|
|
348
|
+
return {
|
|
349
|
+
"status": "ok",
|
|
350
|
+
"pattern": pattern,
|
|
351
|
+
"target": target,
|
|
352
|
+
"description": _QUERY_PATTERNS[pattern],
|
|
353
|
+
"summary": summary,
|
|
354
|
+
"result_count": len(results),
|
|
355
|
+
"results": minimal_results,
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
"status": "ok",
|
|
360
|
+
"pattern": pattern,
|
|
361
|
+
"target": target,
|
|
362
|
+
"description": _QUERY_PATTERNS[pattern],
|
|
363
|
+
"summary": summary,
|
|
364
|
+
"results": results,
|
|
365
|
+
"edges": edges_out,
|
|
366
|
+
}
|
|
367
|
+
finally:
|
|
368
|
+
store.close()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
# ---------------------------------------------------------------------------
|
|
372
|
+
# Tool 5: semantic_search_nodes
|
|
373
|
+
# ---------------------------------------------------------------------------
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def semantic_search_nodes(
|
|
377
|
+
query: str,
|
|
378
|
+
kind: str | None = None,
|
|
379
|
+
limit: int = 20,
|
|
380
|
+
repo_root: str | None = None,
|
|
381
|
+
context_files: list[str] | None = None,
|
|
382
|
+
model: str | None = None,
|
|
383
|
+
provider: str | None = None,
|
|
384
|
+
detail_level: str = "standard",
|
|
385
|
+
) -> dict[str, Any]:
|
|
386
|
+
"""Search for nodes by name, keyword, or semantic similarity.
|
|
387
|
+
|
|
388
|
+
Uses hybrid search (FTS5 BM25 + vector embeddings merged via Reciprocal
|
|
389
|
+
Rank Fusion) as the primary search path, with graceful fallback to
|
|
390
|
+
keyword matching.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
query: Search string to match against node names and qualified names.
|
|
394
|
+
kind: Optional filter by node kind (File, Class, Function, Type, Test).
|
|
395
|
+
limit: Maximum results to return (default: 20).
|
|
396
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
397
|
+
context_files: Optional list of file paths. Nodes in these files
|
|
398
|
+
receive a relevance boost.
|
|
399
|
+
detail_level: "standard" (full output) or "minimal" (summary only).
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
Ranked list of matching nodes.
|
|
403
|
+
"""
|
|
404
|
+
store, root = _get_store(repo_root)
|
|
405
|
+
try:
|
|
406
|
+
results = hybrid_search(
|
|
407
|
+
store, query, kind=kind, limit=limit, context_files=context_files,
|
|
408
|
+
model=model, provider=provider,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
search_mode = "hybrid"
|
|
412
|
+
if not results:
|
|
413
|
+
search_mode = "keyword"
|
|
414
|
+
|
|
415
|
+
summary = f"Found {len(results)} node(s) matching '{query}'" + (
|
|
416
|
+
f" (kind={kind})" if kind else ""
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
if detail_level == "minimal":
|
|
420
|
+
minimal_results = [
|
|
421
|
+
{
|
|
422
|
+
k: r[k]
|
|
423
|
+
for k in ("name", "kind", "file_path", "score")
|
|
424
|
+
if k in r
|
|
425
|
+
}
|
|
426
|
+
for r in results[:5]
|
|
427
|
+
]
|
|
428
|
+
return {
|
|
429
|
+
"status": "ok",
|
|
430
|
+
"query": query,
|
|
431
|
+
"search_mode": search_mode,
|
|
432
|
+
"summary": summary,
|
|
433
|
+
"results": minimal_results,
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
result: dict[str, object] = {
|
|
437
|
+
"status": "ok",
|
|
438
|
+
"query": query,
|
|
439
|
+
"search_mode": search_mode,
|
|
440
|
+
"summary": summary,
|
|
441
|
+
"results": results,
|
|
442
|
+
}
|
|
443
|
+
result["_hints"] = generate_hints(
|
|
444
|
+
"semantic_search_nodes", result, get_session()
|
|
445
|
+
)
|
|
446
|
+
return result
|
|
447
|
+
finally:
|
|
448
|
+
store.close()
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# ---------------------------------------------------------------------------
|
|
452
|
+
# Tool 6: list_graph_stats
|
|
453
|
+
# ---------------------------------------------------------------------------
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def list_graph_stats(repo_root: str | None = None) -> dict[str, Any]:
|
|
457
|
+
"""Get aggregate statistics about the knowledge graph.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
Total nodes, edges, breakdown by kind, languages, and last update time.
|
|
464
|
+
"""
|
|
465
|
+
store, root = _get_store(repo_root)
|
|
466
|
+
try:
|
|
467
|
+
stats = store.get_stats()
|
|
468
|
+
|
|
469
|
+
summary_parts = [
|
|
470
|
+
f"Graph statistics for {root.name}:",
|
|
471
|
+
f" Files: {stats.files_count}",
|
|
472
|
+
f" Total nodes: {stats.total_nodes}",
|
|
473
|
+
f" Total edges: {stats.total_edges}",
|
|
474
|
+
f" Languages: {', '.join(stats.languages) if stats.languages else 'none'}",
|
|
475
|
+
f" Last updated: {stats.last_updated or 'never'}",
|
|
476
|
+
"",
|
|
477
|
+
"Nodes by kind:",
|
|
478
|
+
]
|
|
479
|
+
for kind, count in sorted(stats.nodes_by_kind.items()):
|
|
480
|
+
summary_parts.append(f" {kind}: {count}")
|
|
481
|
+
summary_parts.append("")
|
|
482
|
+
summary_parts.append("Edges by kind:")
|
|
483
|
+
for kind, count in sorted(stats.edges_by_kind.items()):
|
|
484
|
+
summary_parts.append(f" {kind}: {count}")
|
|
485
|
+
|
|
486
|
+
# Add embedding info if available
|
|
487
|
+
emb_store = EmbeddingStore(get_db_path(root))
|
|
488
|
+
try:
|
|
489
|
+
emb_count = emb_store.count()
|
|
490
|
+
summary_parts.append("")
|
|
491
|
+
summary_parts.append(f"Embeddings: {emb_count} nodes embedded")
|
|
492
|
+
if not emb_store.available:
|
|
493
|
+
summary_parts.append(
|
|
494
|
+
" (install sentence-transformers for semantic search)"
|
|
495
|
+
)
|
|
496
|
+
finally:
|
|
497
|
+
emb_store.close()
|
|
498
|
+
|
|
499
|
+
return {
|
|
500
|
+
"status": "ok",
|
|
501
|
+
"summary": "\n".join(summary_parts),
|
|
502
|
+
"total_nodes": stats.total_nodes,
|
|
503
|
+
"total_edges": stats.total_edges,
|
|
504
|
+
"nodes_by_kind": stats.nodes_by_kind,
|
|
505
|
+
"edges_by_kind": stats.edges_by_kind,
|
|
506
|
+
"languages": stats.languages,
|
|
507
|
+
"files_count": stats.files_count,
|
|
508
|
+
"last_updated": stats.last_updated,
|
|
509
|
+
"embeddings_count": emb_count,
|
|
510
|
+
}
|
|
511
|
+
finally:
|
|
512
|
+
store.close()
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
# ---------------------------------------------------------------------------
|
|
516
|
+
# Tool 9: find_large_functions
|
|
517
|
+
# ---------------------------------------------------------------------------
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def find_large_functions(
|
|
521
|
+
min_lines: int = 50,
|
|
522
|
+
kind: str | None = None,
|
|
523
|
+
file_path_pattern: str | None = None,
|
|
524
|
+
limit: int = 50,
|
|
525
|
+
repo_root: str | None = None,
|
|
526
|
+
) -> dict[str, Any]:
|
|
527
|
+
"""Find functions, classes, or files exceeding a line-count threshold.
|
|
528
|
+
|
|
529
|
+
Useful for identifying decomposition targets, code-quality audits,
|
|
530
|
+
and enforcing size limits during code review.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
min_lines: Minimum line count to flag (default: 50).
|
|
534
|
+
kind: Filter by node kind: Function, Class, File, or Test.
|
|
535
|
+
file_path_pattern: Filter by file path substring (e.g. "components/").
|
|
536
|
+
limit: Maximum results (default: 50).
|
|
537
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
Oversized nodes with line counts, ordered largest first.
|
|
541
|
+
"""
|
|
542
|
+
store, root = _get_store(repo_root)
|
|
543
|
+
try:
|
|
544
|
+
nodes = store.get_nodes_by_size(
|
|
545
|
+
min_lines=min_lines,
|
|
546
|
+
kind=kind,
|
|
547
|
+
file_path_pattern=file_path_pattern,
|
|
548
|
+
limit=limit,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
results = []
|
|
552
|
+
for n in nodes:
|
|
553
|
+
d = node_to_dict(n)
|
|
554
|
+
d["line_count"] = (
|
|
555
|
+
(n.line_end - n.line_start + 1)
|
|
556
|
+
if n.line_start and n.line_end
|
|
557
|
+
else 0
|
|
558
|
+
)
|
|
559
|
+
# Make file_path relative for readability
|
|
560
|
+
try:
|
|
561
|
+
d["relative_path"] = str(Path(n.file_path).relative_to(root))
|
|
562
|
+
except ValueError:
|
|
563
|
+
d["relative_path"] = n.file_path
|
|
564
|
+
results.append(d)
|
|
565
|
+
|
|
566
|
+
summary_parts = [
|
|
567
|
+
f"Found {len(results)} node(s) with >= {min_lines} lines"
|
|
568
|
+
+ (f" (kind={kind})" if kind else "")
|
|
569
|
+
+ (f" matching '{file_path_pattern}'" if file_path_pattern else "")
|
|
570
|
+
+ ":",
|
|
571
|
+
]
|
|
572
|
+
for r in results[:10]:
|
|
573
|
+
summary_parts.append(
|
|
574
|
+
f" {r['line_count']:>4} lines | {r['kind']:>8} | "
|
|
575
|
+
f"{r['name']} ({r['relative_path']}:{r['line_start']})"
|
|
576
|
+
)
|
|
577
|
+
if len(results) > 10:
|
|
578
|
+
summary_parts.append(f" ... and {len(results) - 10} more")
|
|
579
|
+
|
|
580
|
+
return {
|
|
581
|
+
"status": "ok",
|
|
582
|
+
"summary": "\n".join(summary_parts),
|
|
583
|
+
"total_found": len(results),
|
|
584
|
+
"min_lines": min_lines,
|
|
585
|
+
"results": results,
|
|
586
|
+
}
|
|
587
|
+
finally:
|
|
588
|
+
store.close()
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
# -------------------------------------------------------------------
|
|
592
|
+
# traverse_graph: free-form BFS / DFS traversal
|
|
593
|
+
# -------------------------------------------------------------------
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def traverse_graph_func(
|
|
597
|
+
query: str,
|
|
598
|
+
mode: str = "bfs",
|
|
599
|
+
depth: int = 3,
|
|
600
|
+
token_budget: int = 2000,
|
|
601
|
+
repo_root: str | None = None,
|
|
602
|
+
) -> dict[str, Any]:
|
|
603
|
+
"""BFS/DFS traversal from best-matching node.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
query: Search string to find the starting node.
|
|
607
|
+
mode: "bfs" (breadth-first) or "dfs" (depth-first).
|
|
608
|
+
depth: Max traversal depth (1-6). Default: 3.
|
|
609
|
+
token_budget: Approximate token limit for results.
|
|
610
|
+
repo_root: Repository root path.
|
|
611
|
+
"""
|
|
612
|
+
store, root = _get_store(repo_root)
|
|
613
|
+
try:
|
|
614
|
+
results = hybrid_search(store, query, limit=1)
|
|
615
|
+
if not results:
|
|
616
|
+
return {
|
|
617
|
+
"error": f"No node matching '{query}'",
|
|
618
|
+
"nodes": [],
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
start_qn = results[0]["qualified_name"]
|
|
622
|
+
depth = max(1, min(depth, 6))
|
|
623
|
+
|
|
624
|
+
# BFS / DFS traversal
|
|
625
|
+
visited: dict[str, int] = {} # qn -> depth
|
|
626
|
+
queue: list[tuple[str, int]] = [
|
|
627
|
+
(start_qn, 0),
|
|
628
|
+
]
|
|
629
|
+
traversal: list[dict] = []
|
|
630
|
+
approx_tokens = 0
|
|
631
|
+
|
|
632
|
+
while queue:
|
|
633
|
+
if mode == "bfs":
|
|
634
|
+
current_qn, cur_depth = queue.pop(0)
|
|
635
|
+
else:
|
|
636
|
+
current_qn, cur_depth = queue.pop()
|
|
637
|
+
|
|
638
|
+
if current_qn in visited:
|
|
639
|
+
continue
|
|
640
|
+
if cur_depth > depth:
|
|
641
|
+
continue
|
|
642
|
+
|
|
643
|
+
visited[current_qn] = cur_depth
|
|
644
|
+
node = store.get_node(current_qn)
|
|
645
|
+
if not node:
|
|
646
|
+
continue
|
|
647
|
+
|
|
648
|
+
entry = {
|
|
649
|
+
"name": _sanitize_name(node.name),
|
|
650
|
+
"qualified_name": node.qualified_name,
|
|
651
|
+
"kind": node.kind,
|
|
652
|
+
"file": node.file_path,
|
|
653
|
+
"depth": cur_depth,
|
|
654
|
+
}
|
|
655
|
+
approx_tokens += len(str(entry)) // 4
|
|
656
|
+
if approx_tokens > token_budget:
|
|
657
|
+
break
|
|
658
|
+
|
|
659
|
+
traversal.append(entry)
|
|
660
|
+
|
|
661
|
+
# Get neighbours
|
|
662
|
+
out_edges = store.get_edges_by_source(
|
|
663
|
+
current_qn
|
|
664
|
+
)
|
|
665
|
+
in_edges = store.get_edges_by_target(
|
|
666
|
+
current_qn
|
|
667
|
+
)
|
|
668
|
+
for e in out_edges:
|
|
669
|
+
tgt = e.target_qualified
|
|
670
|
+
if tgt not in visited:
|
|
671
|
+
queue.append((tgt, cur_depth + 1))
|
|
672
|
+
for e in in_edges:
|
|
673
|
+
src = e.source_qualified
|
|
674
|
+
if src not in visited:
|
|
675
|
+
queue.append((src, cur_depth + 1))
|
|
676
|
+
|
|
677
|
+
return {
|
|
678
|
+
"start_node": start_qn,
|
|
679
|
+
"mode": mode,
|
|
680
|
+
"max_depth": depth,
|
|
681
|
+
"nodes_visited": len(traversal),
|
|
682
|
+
"traversal": traversal,
|
|
683
|
+
"truncated": approx_tokens > token_budget,
|
|
684
|
+
"next_tool_suggestions": [
|
|
685
|
+
"query_graph callers_of"
|
|
686
|
+
" -- focused relationship query",
|
|
687
|
+
"get_impact_radius"
|
|
688
|
+
" -- blast radius analysis",
|
|
689
|
+
],
|
|
690
|
+
}
|
|
691
|
+
finally:
|
|
692
|
+
store.close()
|