code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,692 @@
1
+ """Tools 2, 3, 5, 6, 9: query / search / stats helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from ..context_savings import attach_context_savings, estimate_file_tokens
10
+ from ..embeddings import EmbeddingStore
11
+ from ..graph import _sanitize_name, edge_to_dict, node_to_dict
12
+ from ..hints import generate_hints, get_session
13
+ from ..incremental import get_changed_files, get_db_path, get_staged_and_unstaged
14
+ from ..search import hybrid_search
15
+ from ._common import _BUILTIN_CALL_NAMES, _get_store, _resolve_graph_file_paths
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Tool 2: get_impact_radius
21
+ # ---------------------------------------------------------------------------
22
+
23
+ _QUERY_PATTERNS = {
24
+ "callers_of": "Find all functions that call a given function",
25
+ "callees_of": "Find all functions called by a given function",
26
+ "imports_of": "Find all imports of a given file or module",
27
+ "importers_of": "Find all files that import a given file or module",
28
+ "children_of": "Find all nodes contained in a file or class",
29
+ "tests_for": "Find all tests for a given function or class",
30
+ "inheritors_of": "Find all classes that inherit from a given class",
31
+ "file_summary": "Get a summary of all nodes in a file",
32
+ }
33
+
34
+
35
+ def get_impact_radius(
36
+ changed_files: list[str] | None = None,
37
+ max_depth: int = 2,
38
+ max_results: int = 500,
39
+ repo_root: str | None = None,
40
+ base: str = "HEAD~1",
41
+ detail_level: str = "standard",
42
+ ) -> dict[str, Any]:
43
+ """Analyze the blast radius of changed files.
44
+
45
+ Args:
46
+ changed_files: Explicit list of changed file paths (relative to repo root).
47
+ If omitted, auto-detects from git diff.
48
+ max_depth: How many hops to traverse in the graph (default: 2).
49
+ max_results: Maximum impacted nodes to return (default: 500).
50
+ repo_root: Repository root path. Auto-detected if omitted.
51
+ base: Git ref for auto-detecting changes (default: HEAD~1).
52
+ detail_level: "standard" (full output) or "minimal" (summary only).
53
+
54
+ Returns:
55
+ Changed nodes, impacted nodes, impacted files, connecting edges,
56
+ plus ``truncated`` flag and ``total_impacted`` count.
57
+ """
58
+ store, root = _get_store(repo_root)
59
+ try:
60
+ if changed_files is None:
61
+ changed_files = get_changed_files(root, base)
62
+ if not changed_files:
63
+ changed_files = get_staged_and_unstaged(root)
64
+
65
+ if not changed_files:
66
+ return {
67
+ "status": "ok",
68
+ "summary": "No changed files detected.",
69
+ "changed_nodes": [],
70
+ "impacted_nodes": [],
71
+ "impacted_files": [],
72
+ "truncated": False,
73
+ "total_impacted": 0,
74
+ }
75
+
76
+ # Resolve user-facing paths to the file paths stored in the graph.
77
+ original_tokens = estimate_file_tokens(root, changed_files)
78
+ abs_files = _resolve_graph_file_paths(store, root, changed_files)
79
+ result = store.get_impact_radius(
80
+ abs_files, max_depth=max_depth, max_nodes=max_results
81
+ )
82
+
83
+ changed_dicts = [node_to_dict(n) for n in result["changed_nodes"]]
84
+ impacted_dicts = [node_to_dict(n) for n in result["impacted_nodes"]]
85
+ edge_dicts = [edge_to_dict(e) for e in result["edges"]]
86
+ truncated = result["truncated"]
87
+ total_impacted = result["total_impacted"]
88
+
89
+ summary_parts = [
90
+ f"Blast radius for {len(changed_files)} changed file(s):",
91
+ f" - {len(changed_dicts)} nodes directly changed",
92
+ f" - {len(impacted_dicts)} nodes impacted (within {max_depth} hops)",
93
+ f" - {len(result['impacted_files'])} additional files affected",
94
+ ]
95
+ if truncated:
96
+ summary_parts.append(
97
+ f" - Results truncated: showing {len(impacted_dicts)}"
98
+ f" of {total_impacted} impacted nodes"
99
+ )
100
+
101
+ if detail_level == "minimal":
102
+ impacted_count = len(impacted_dicts)
103
+ if impacted_count > 20:
104
+ risk = "high"
105
+ elif impacted_count > 5:
106
+ risk = "medium"
107
+ else:
108
+ risk = "low"
109
+ key_entities = [
110
+ n["name"] for n in impacted_dicts[:5]
111
+ ]
112
+ minimal_response = {
113
+ "status": "ok",
114
+ "summary": "\n".join(summary_parts),
115
+ "risk": risk,
116
+ "impacted_file_count": len(result["impacted_files"]),
117
+ "key_entities": key_entities,
118
+ "truncated": truncated,
119
+ }
120
+ attach_context_savings(minimal_response, original_tokens=original_tokens)
121
+ return minimal_response
122
+
123
+ response = {
124
+ "status": "ok",
125
+ "summary": "\n".join(summary_parts),
126
+ "changed_files": changed_files,
127
+ "changed_nodes": changed_dicts,
128
+ "impacted_nodes": impacted_dicts,
129
+ "impacted_files": result["impacted_files"],
130
+ "edges": edge_dicts,
131
+ "truncated": truncated,
132
+ "total_impacted": total_impacted,
133
+ }
134
+ attach_context_savings(response, original_tokens=original_tokens)
135
+ return response
136
+ finally:
137
+ store.close()
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # Tool 3: query_graph
142
+ # ---------------------------------------------------------------------------
143
+
144
+
145
+ def query_graph(
146
+ pattern: str,
147
+ target: str,
148
+ repo_root: str | None = None,
149
+ detail_level: str = "standard",
150
+ ) -> dict[str, Any]:
151
+ """Run a predefined graph query.
152
+
153
+ Args:
154
+ pattern: Query pattern. One of: callers_of, callees_of, imports_of,
155
+ importers_of, children_of, tests_for, inheritors_of, file_summary.
156
+ target: The node name, qualified name, or file path to query about.
157
+ repo_root: Repository root path. Auto-detected if omitted.
158
+ detail_level: "standard" (full output) or "minimal" (summary only).
159
+
160
+ Returns:
161
+ Matching nodes and edges for the query.
162
+ """
163
+ store, root = _get_store(repo_root)
164
+ try:
165
+ if pattern not in _QUERY_PATTERNS:
166
+ return {
167
+ "status": "error",
168
+ "error": (
169
+ f"Unknown pattern '{pattern}'. "
170
+ f"Available: {list(_QUERY_PATTERNS.keys())}"
171
+ ),
172
+ }
173
+
174
+ results: list[dict] = []
175
+ edges_out: list[dict] = []
176
+
177
+ # For callers_of, skip common builtins early (bare names only)
178
+ # "Who calls .map()?" returns hundreds of useless hits.
179
+ # Qualified names (e.g. "utils.py::map") bypass this filter.
180
+ if (
181
+ pattern == "callers_of"
182
+ and target in _BUILTIN_CALL_NAMES
183
+ and "::" not in target
184
+ ):
185
+ return {
186
+ "status": "ok", "pattern": pattern, "target": target,
187
+ "description": _QUERY_PATTERNS[pattern],
188
+ "summary": (
189
+ f"'{target}' is a common builtin "
190
+ "— callers_of skipped to avoid noise."
191
+ ),
192
+ "results": [], "edges": [],
193
+ }
194
+
195
+ # Resolve target - try as-is, then as absolute path, then search.
196
+ # file_summary targets are paths, so skip broad node search.
197
+ node = None
198
+ if pattern != "file_summary":
199
+ node = store.get_node(target)
200
+ if not node:
201
+ abs_target = str(root / target)
202
+ node = store.get_node(abs_target)
203
+ if not node:
204
+ # Search by name
205
+ candidates = store.search_nodes(target, limit=5)
206
+ if len(candidates) == 1:
207
+ node = candidates[0]
208
+ target = node.qualified_name
209
+ elif len(candidates) > 1:
210
+ return {
211
+ "status": "ambiguous",
212
+ "summary": (
213
+ f"Multiple matches for '{target}'. "
214
+ "Please use a qualified name."
215
+ ),
216
+ "candidates": [node_to_dict(c) for c in candidates],
217
+ }
218
+
219
+ if not node and pattern != "file_summary":
220
+ return {
221
+ "status": "not_found",
222
+ "summary": f"No node found matching '{target}'.",
223
+ }
224
+
225
+ qn = node.qualified_name if node else target
226
+
227
+ if pattern == "callers_of":
228
+ seen_sources: set[str] = set()
229
+ for e in store.get_edges_by_target(qn):
230
+ if e.kind == "CALLS":
231
+ if e.source_qualified not in seen_sources:
232
+ seen_sources.add(e.source_qualified)
233
+ caller = store.get_node(e.source_qualified)
234
+ if caller:
235
+ results.append(node_to_dict(caller))
236
+ edges_out.append(edge_to_dict(e))
237
+ # Fallback: CALLS edges store unqualified target names
238
+ # (e.g. "generateTestCode") while qn is fully qualified
239
+ # (e.g. "file.ts::generateTestCode"). Search by plain name too.
240
+ if node:
241
+ for e in store.search_edges_by_target_name(node.name):
242
+ if e.source_qualified not in seen_sources:
243
+ seen_sources.add(e.source_qualified)
244
+ caller = store.get_node(e.source_qualified)
245
+ if caller:
246
+ results.append(node_to_dict(caller))
247
+ edges_out.append(edge_to_dict(e))
248
+
249
+ elif pattern == "callees_of":
250
+ seen_targets: set[str] = set()
251
+ for e in store.get_edges_by_source(qn):
252
+ if e.kind == "CALLS":
253
+ if e.target_qualified not in seen_targets:
254
+ seen_targets.add(e.target_qualified)
255
+ callee = store.get_node(e.target_qualified)
256
+ if callee:
257
+ results.append(node_to_dict(callee))
258
+ elif "::" not in e.target_qualified:
259
+ results.append({
260
+ "kind": "Function",
261
+ "name": e.target_qualified,
262
+ "qualified_name": e.target_qualified,
263
+ })
264
+ edges_out.append(edge_to_dict(e))
265
+
266
+ elif pattern == "imports_of":
267
+ for e in store.get_edges_by_source(qn):
268
+ if e.kind == "IMPORTS_FROM":
269
+ results.append({"import_target": e.target_qualified})
270
+ edges_out.append(edge_to_dict(e))
271
+
272
+ elif pattern == "importers_of":
273
+ # Find edges where target matches this file.
274
+ # Use resolve() to canonicalize the path, matching how
275
+ # _resolve_module_to_file stores edge targets.
276
+ abs_target = (
277
+ str((root / target).resolve()) if node is None
278
+ else node.file_path
279
+ )
280
+ for e in store.get_edges_by_target(abs_target):
281
+ if e.kind == "IMPORTS_FROM":
282
+ results.append({
283
+ "importer": e.source_qualified,
284
+ "file": e.file_path,
285
+ })
286
+ edges_out.append(edge_to_dict(e))
287
+
288
+ elif pattern == "children_of":
289
+ for e in store.get_edges_by_source(qn):
290
+ if e.kind == "CONTAINS":
291
+ child = store.get_node(e.target_qualified)
292
+ if child:
293
+ results.append(node_to_dict(child))
294
+
295
+ elif pattern == "tests_for":
296
+ for e in store.get_edges_by_target(qn):
297
+ if e.kind == "TESTED_BY":
298
+ test = store.get_node(e.source_qualified)
299
+ if test:
300
+ results.append(node_to_dict(test))
301
+ # Also search by naming convention
302
+ name = node.name if node else target
303
+ test_nodes = store.search_nodes(f"test_{name}", limit=10)
304
+ test_nodes += store.search_nodes(f"Test{name}", limit=10)
305
+ seen = {r.get("qualified_name") for r in results}
306
+ for t in test_nodes:
307
+ if t.qualified_name not in seen and t.is_test:
308
+ results.append(node_to_dict(t))
309
+
310
+ elif pattern == "inheritors_of":
311
+ for e in store.get_edges_by_target(qn):
312
+ if e.kind in ("INHERITS", "IMPLEMENTS"):
313
+ child = store.get_node(e.source_qualified)
314
+ if child:
315
+ results.append(node_to_dict(child))
316
+ edges_out.append(edge_to_dict(e))
317
+ # Fallback: INHERITS/IMPLEMENTS edges store unqualified base names
318
+ # (e.g. "Animal") while qn is fully qualified
319
+ # (e.g. "sample.dart::Animal"). Search by plain name too. See: #87
320
+ if not results and node:
321
+ for kind in ("INHERITS", "IMPLEMENTS"):
322
+ for e in store.search_edges_by_target_name(node.name, kind=kind):
323
+ child = store.get_node(e.source_qualified)
324
+ if child:
325
+ results.append(node_to_dict(child))
326
+ edges_out.append(edge_to_dict(e))
327
+
328
+ elif pattern == "file_summary":
329
+ graph_paths = _resolve_graph_file_paths(store, root, [target])
330
+ for graph_path in graph_paths:
331
+ for n in store.get_nodes_by_file(graph_path):
332
+ results.append(node_to_dict(n))
333
+
334
+ summary = (
335
+ f"Found {len(results)} result(s) "
336
+ f"for {pattern}('{target}')"
337
+ )
338
+
339
+ if detail_level == "minimal":
340
+ minimal_results = [
341
+ {
342
+ k: r[k]
343
+ for k in ("name", "kind", "file_path")
344
+ if k in r
345
+ }
346
+ for r in results[:5]
347
+ ]
348
+ return {
349
+ "status": "ok",
350
+ "pattern": pattern,
351
+ "target": target,
352
+ "description": _QUERY_PATTERNS[pattern],
353
+ "summary": summary,
354
+ "result_count": len(results),
355
+ "results": minimal_results,
356
+ }
357
+
358
+ return {
359
+ "status": "ok",
360
+ "pattern": pattern,
361
+ "target": target,
362
+ "description": _QUERY_PATTERNS[pattern],
363
+ "summary": summary,
364
+ "results": results,
365
+ "edges": edges_out,
366
+ }
367
+ finally:
368
+ store.close()
369
+
370
+
371
+ # ---------------------------------------------------------------------------
372
+ # Tool 5: semantic_search_nodes
373
+ # ---------------------------------------------------------------------------
374
+
375
+
376
+ def semantic_search_nodes(
377
+ query: str,
378
+ kind: str | None = None,
379
+ limit: int = 20,
380
+ repo_root: str | None = None,
381
+ context_files: list[str] | None = None,
382
+ model: str | None = None,
383
+ provider: str | None = None,
384
+ detail_level: str = "standard",
385
+ ) -> dict[str, Any]:
386
+ """Search for nodes by name, keyword, or semantic similarity.
387
+
388
+ Uses hybrid search (FTS5 BM25 + vector embeddings merged via Reciprocal
389
+ Rank Fusion) as the primary search path, with graceful fallback to
390
+ keyword matching.
391
+
392
+ Args:
393
+ query: Search string to match against node names and qualified names.
394
+ kind: Optional filter by node kind (File, Class, Function, Type, Test).
395
+ limit: Maximum results to return (default: 20).
396
+ repo_root: Repository root path. Auto-detected if omitted.
397
+ context_files: Optional list of file paths. Nodes in these files
398
+ receive a relevance boost.
399
+ detail_level: "standard" (full output) or "minimal" (summary only).
400
+
401
+ Returns:
402
+ Ranked list of matching nodes.
403
+ """
404
+ store, root = _get_store(repo_root)
405
+ try:
406
+ results = hybrid_search(
407
+ store, query, kind=kind, limit=limit, context_files=context_files,
408
+ model=model, provider=provider,
409
+ )
410
+
411
+ search_mode = "hybrid"
412
+ if not results:
413
+ search_mode = "keyword"
414
+
415
+ summary = f"Found {len(results)} node(s) matching '{query}'" + (
416
+ f" (kind={kind})" if kind else ""
417
+ )
418
+
419
+ if detail_level == "minimal":
420
+ minimal_results = [
421
+ {
422
+ k: r[k]
423
+ for k in ("name", "kind", "file_path", "score")
424
+ if k in r
425
+ }
426
+ for r in results[:5]
427
+ ]
428
+ return {
429
+ "status": "ok",
430
+ "query": query,
431
+ "search_mode": search_mode,
432
+ "summary": summary,
433
+ "results": minimal_results,
434
+ }
435
+
436
+ result: dict[str, object] = {
437
+ "status": "ok",
438
+ "query": query,
439
+ "search_mode": search_mode,
440
+ "summary": summary,
441
+ "results": results,
442
+ }
443
+ result["_hints"] = generate_hints(
444
+ "semantic_search_nodes", result, get_session()
445
+ )
446
+ return result
447
+ finally:
448
+ store.close()
449
+
450
+
451
+ # ---------------------------------------------------------------------------
452
+ # Tool 6: list_graph_stats
453
+ # ---------------------------------------------------------------------------
454
+
455
+
456
+ def list_graph_stats(repo_root: str | None = None) -> dict[str, Any]:
457
+ """Get aggregate statistics about the knowledge graph.
458
+
459
+ Args:
460
+ repo_root: Repository root path. Auto-detected if omitted.
461
+
462
+ Returns:
463
+ Total nodes, edges, breakdown by kind, languages, and last update time.
464
+ """
465
+ store, root = _get_store(repo_root)
466
+ try:
467
+ stats = store.get_stats()
468
+
469
+ summary_parts = [
470
+ f"Graph statistics for {root.name}:",
471
+ f" Files: {stats.files_count}",
472
+ f" Total nodes: {stats.total_nodes}",
473
+ f" Total edges: {stats.total_edges}",
474
+ f" Languages: {', '.join(stats.languages) if stats.languages else 'none'}",
475
+ f" Last updated: {stats.last_updated or 'never'}",
476
+ "",
477
+ "Nodes by kind:",
478
+ ]
479
+ for kind, count in sorted(stats.nodes_by_kind.items()):
480
+ summary_parts.append(f" {kind}: {count}")
481
+ summary_parts.append("")
482
+ summary_parts.append("Edges by kind:")
483
+ for kind, count in sorted(stats.edges_by_kind.items()):
484
+ summary_parts.append(f" {kind}: {count}")
485
+
486
+ # Add embedding info if available
487
+ emb_store = EmbeddingStore(get_db_path(root))
488
+ try:
489
+ emb_count = emb_store.count()
490
+ summary_parts.append("")
491
+ summary_parts.append(f"Embeddings: {emb_count} nodes embedded")
492
+ if not emb_store.available:
493
+ summary_parts.append(
494
+ " (install sentence-transformers for semantic search)"
495
+ )
496
+ finally:
497
+ emb_store.close()
498
+
499
+ return {
500
+ "status": "ok",
501
+ "summary": "\n".join(summary_parts),
502
+ "total_nodes": stats.total_nodes,
503
+ "total_edges": stats.total_edges,
504
+ "nodes_by_kind": stats.nodes_by_kind,
505
+ "edges_by_kind": stats.edges_by_kind,
506
+ "languages": stats.languages,
507
+ "files_count": stats.files_count,
508
+ "last_updated": stats.last_updated,
509
+ "embeddings_count": emb_count,
510
+ }
511
+ finally:
512
+ store.close()
513
+
514
+
515
+ # ---------------------------------------------------------------------------
516
+ # Tool 9: find_large_functions
517
+ # ---------------------------------------------------------------------------
518
+
519
+
520
+ def find_large_functions(
521
+ min_lines: int = 50,
522
+ kind: str | None = None,
523
+ file_path_pattern: str | None = None,
524
+ limit: int = 50,
525
+ repo_root: str | None = None,
526
+ ) -> dict[str, Any]:
527
+ """Find functions, classes, or files exceeding a line-count threshold.
528
+
529
+ Useful for identifying decomposition targets, code-quality audits,
530
+ and enforcing size limits during code review.
531
+
532
+ Args:
533
+ min_lines: Minimum line count to flag (default: 50).
534
+ kind: Filter by node kind: Function, Class, File, or Test.
535
+ file_path_pattern: Filter by file path substring (e.g. "components/").
536
+ limit: Maximum results (default: 50).
537
+ repo_root: Repository root path. Auto-detected if omitted.
538
+
539
+ Returns:
540
+ Oversized nodes with line counts, ordered largest first.
541
+ """
542
+ store, root = _get_store(repo_root)
543
+ try:
544
+ nodes = store.get_nodes_by_size(
545
+ min_lines=min_lines,
546
+ kind=kind,
547
+ file_path_pattern=file_path_pattern,
548
+ limit=limit,
549
+ )
550
+
551
+ results = []
552
+ for n in nodes:
553
+ d = node_to_dict(n)
554
+ d["line_count"] = (
555
+ (n.line_end - n.line_start + 1)
556
+ if n.line_start and n.line_end
557
+ else 0
558
+ )
559
+ # Make file_path relative for readability
560
+ try:
561
+ d["relative_path"] = str(Path(n.file_path).relative_to(root))
562
+ except ValueError:
563
+ d["relative_path"] = n.file_path
564
+ results.append(d)
565
+
566
+ summary_parts = [
567
+ f"Found {len(results)} node(s) with >= {min_lines} lines"
568
+ + (f" (kind={kind})" if kind else "")
569
+ + (f" matching '{file_path_pattern}'" if file_path_pattern else "")
570
+ + ":",
571
+ ]
572
+ for r in results[:10]:
573
+ summary_parts.append(
574
+ f" {r['line_count']:>4} lines | {r['kind']:>8} | "
575
+ f"{r['name']} ({r['relative_path']}:{r['line_start']})"
576
+ )
577
+ if len(results) > 10:
578
+ summary_parts.append(f" ... and {len(results) - 10} more")
579
+
580
+ return {
581
+ "status": "ok",
582
+ "summary": "\n".join(summary_parts),
583
+ "total_found": len(results),
584
+ "min_lines": min_lines,
585
+ "results": results,
586
+ }
587
+ finally:
588
+ store.close()
589
+
590
+
591
+ # -------------------------------------------------------------------
592
+ # traverse_graph: free-form BFS / DFS traversal
593
+ # -------------------------------------------------------------------
594
+
595
+
596
+ def traverse_graph_func(
597
+ query: str,
598
+ mode: str = "bfs",
599
+ depth: int = 3,
600
+ token_budget: int = 2000,
601
+ repo_root: str | None = None,
602
+ ) -> dict[str, Any]:
603
+ """BFS/DFS traversal from best-matching node.
604
+
605
+ Args:
606
+ query: Search string to find the starting node.
607
+ mode: "bfs" (breadth-first) or "dfs" (depth-first).
608
+ depth: Max traversal depth (1-6). Default: 3.
609
+ token_budget: Approximate token limit for results.
610
+ repo_root: Repository root path.
611
+ """
612
+ store, root = _get_store(repo_root)
613
+ try:
614
+ results = hybrid_search(store, query, limit=1)
615
+ if not results:
616
+ return {
617
+ "error": f"No node matching '{query}'",
618
+ "nodes": [],
619
+ }
620
+
621
+ start_qn = results[0]["qualified_name"]
622
+ depth = max(1, min(depth, 6))
623
+
624
+ # BFS / DFS traversal
625
+ visited: dict[str, int] = {} # qn -> depth
626
+ queue: list[tuple[str, int]] = [
627
+ (start_qn, 0),
628
+ ]
629
+ traversal: list[dict] = []
630
+ approx_tokens = 0
631
+
632
+ while queue:
633
+ if mode == "bfs":
634
+ current_qn, cur_depth = queue.pop(0)
635
+ else:
636
+ current_qn, cur_depth = queue.pop()
637
+
638
+ if current_qn in visited:
639
+ continue
640
+ if cur_depth > depth:
641
+ continue
642
+
643
+ visited[current_qn] = cur_depth
644
+ node = store.get_node(current_qn)
645
+ if not node:
646
+ continue
647
+
648
+ entry = {
649
+ "name": _sanitize_name(node.name),
650
+ "qualified_name": node.qualified_name,
651
+ "kind": node.kind,
652
+ "file": node.file_path,
653
+ "depth": cur_depth,
654
+ }
655
+ approx_tokens += len(str(entry)) // 4
656
+ if approx_tokens > token_budget:
657
+ break
658
+
659
+ traversal.append(entry)
660
+
661
+ # Get neighbours
662
+ out_edges = store.get_edges_by_source(
663
+ current_qn
664
+ )
665
+ in_edges = store.get_edges_by_target(
666
+ current_qn
667
+ )
668
+ for e in out_edges:
669
+ tgt = e.target_qualified
670
+ if tgt not in visited:
671
+ queue.append((tgt, cur_depth + 1))
672
+ for e in in_edges:
673
+ src = e.source_qualified
674
+ if src not in visited:
675
+ queue.append((src, cur_depth + 1))
676
+
677
+ return {
678
+ "start_node": start_qn,
679
+ "mode": mode,
680
+ "max_depth": depth,
681
+ "nodes_visited": len(traversal),
682
+ "traversal": traversal,
683
+ "truncated": approx_tokens > token_budget,
684
+ "next_tool_suggestions": [
685
+ "query_graph callers_of"
686
+ " -- focused relationship query",
687
+ "get_impact_radius"
688
+ " -- blast radius analysis",
689
+ ],
690
+ }
691
+ finally:
692
+ store.close()