codegraph-cli 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. codegraph_cli/__init__.py +4 -0
  2. codegraph_cli/agents.py +191 -0
  3. codegraph_cli/bug_detector.py +386 -0
  4. codegraph_cli/chat_agent.py +352 -0
  5. codegraph_cli/chat_session.py +220 -0
  6. codegraph_cli/cli.py +330 -0
  7. codegraph_cli/cli_chat.py +367 -0
  8. codegraph_cli/cli_diagnose.py +133 -0
  9. codegraph_cli/cli_refactor.py +230 -0
  10. codegraph_cli/cli_setup.py +470 -0
  11. codegraph_cli/cli_test.py +177 -0
  12. codegraph_cli/cli_v2.py +267 -0
  13. codegraph_cli/codegen_agent.py +265 -0
  14. codegraph_cli/config.py +31 -0
  15. codegraph_cli/config_manager.py +341 -0
  16. codegraph_cli/context_manager.py +500 -0
  17. codegraph_cli/crew_agents.py +123 -0
  18. codegraph_cli/crew_chat.py +159 -0
  19. codegraph_cli/crew_tools.py +497 -0
  20. codegraph_cli/diff_engine.py +265 -0
  21. codegraph_cli/embeddings.py +241 -0
  22. codegraph_cli/graph_export.py +144 -0
  23. codegraph_cli/llm.py +642 -0
  24. codegraph_cli/models.py +47 -0
  25. codegraph_cli/models_v2.py +185 -0
  26. codegraph_cli/orchestrator.py +49 -0
  27. codegraph_cli/parser.py +800 -0
  28. codegraph_cli/performance_analyzer.py +223 -0
  29. codegraph_cli/project_context.py +230 -0
  30. codegraph_cli/rag.py +200 -0
  31. codegraph_cli/refactor_agent.py +452 -0
  32. codegraph_cli/security_scanner.py +366 -0
  33. codegraph_cli/storage.py +390 -0
  34. codegraph_cli/templates/graph_interactive.html +257 -0
  35. codegraph_cli/testgen_agent.py +316 -0
  36. codegraph_cli/validation_engine.py +285 -0
  37. codegraph_cli/vector_store.py +293 -0
  38. codegraph_cli-2.0.0.dist-info/METADATA +318 -0
  39. codegraph_cli-2.0.0.dist-info/RECORD +43 -0
  40. codegraph_cli-2.0.0.dist-info/WHEEL +5 -0
  41. codegraph_cli-2.0.0.dist-info/entry_points.txt +2 -0
  42. codegraph_cli-2.0.0.dist-info/licenses/LICENSE +21 -0
  43. codegraph_cli-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,800 @@
1
+ """Semantic code parser using Tree-sitter for multi-language AST extraction.
2
+
3
+ Replaces the legacy ast-based parser with Tree-sitter for:
4
+ - Error-tolerant parsing (handles broken / incomplete syntax gracefully)
5
+ - Multi-language support (Python now; JS/TS/Go extensible)
6
+ - Semantic chunking by function / class definition (not line-count windows)
7
+
8
+ Falls back to Python's built-in ``ast`` module when tree-sitter is unavailable.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import ast
14
+ import logging
15
+ from abc import ABC, abstractmethod
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Set, Tuple
18
+
19
+ from .models import Edge, Node
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Language <-> file-extension mapping (extensible)
25
+ # ---------------------------------------------------------------------------
26
+ LANGUAGE_MAP: Dict[str, str] = {
27
+ ".py": "python",
28
+ ".js": "javascript",
29
+ ".ts": "typescript",
30
+ ".tsx": "tsx",
31
+ ".jsx": "javascript",
32
+ ".go": "go",
33
+ ".rs": "rust",
34
+ ".java": "java",
35
+ ".rb": "ruby",
36
+ ".cpp": "cpp",
37
+ ".c": "c",
38
+ ".cs": "c_sharp",
39
+ }
40
+
41
+ SKIP_DIRS: Set[str] = {
42
+ ".venv", "venv", "__pycache__", "node_modules", ".git",
43
+ "site-packages", ".tox", ".pytest_cache", "build", "dist",
44
+ ".mypy_cache", ".ruff_cache", "htmlcov", ".eggs",
45
+ "egg-info", ".codegraph", "lancedb",
46
+ }
47
+
48
+
49
+ # ===================================================================
50
+ # Abstract Parser Interface
51
+ # ===================================================================
52
+
53
+ class Parser(ABC):
54
+ """Abstract base class for all code parsers."""
55
+
56
+ @abstractmethod
57
+ def parse_file(
58
+ self,
59
+ file_path: Path,
60
+ source: Optional[str] = None,
61
+ ) -> Tuple[List[Node], List[Edge]]:
62
+ """Parse a single file into nodes and edges."""
63
+ ...
64
+
65
+ @abstractmethod
66
+ def parse_project(self) -> Tuple[List[Node], List[Edge]]:
67
+ """Parse the entire project rooted at *project_root*."""
68
+ ...
69
+
70
+ @abstractmethod
71
+ def supports_language(self, language: str) -> bool:
72
+ """Return True if this parser can handle *language*."""
73
+ ...
74
+
75
+
76
+ # ===================================================================
77
+ # Tree-sitter Parser (Primary)
78
+ # ===================================================================
79
+
80
+ class TreeSitterParser(Parser):
81
+ """Error-tolerant, multi-language parser built on Tree-sitter.
82
+
83
+ Uses ``tree-sitter-languages`` for pre-built grammars so setup is
84
+ zero-config for the end-user. Tree-sitter produces a *concrete
85
+ syntax tree* (CST) that preserves every token, allowing reliable
86
+ extraction even when the source has minor syntax errors.
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ project_root: Path,
92
+ languages: Optional[List[str]] = None,
93
+ ) -> None:
94
+ self.project_root = project_root
95
+ self._parsers: Dict[str, Any] = {}
96
+ self._requested_languages = languages or ["python"]
97
+ self._init_parsers()
98
+
99
+ # ------------------------------------------------------------------
100
+ # Initialisation
101
+ # ------------------------------------------------------------------
102
+
103
+ # Map language name -> module that provides the tree-sitter Language
104
+ _GRAMMAR_MODULES: Dict[str, str] = {
105
+ "python": "tree_sitter_python",
106
+ "javascript": "tree_sitter_javascript",
107
+ "typescript": "tree_sitter_typescript",
108
+ }
109
+
110
+ def _init_parsers(self) -> None:
111
+ try:
112
+ import tree_sitter # type: ignore[import-untyped] # noqa: F401
113
+ except ImportError:
114
+ logger.warning(
115
+ "tree-sitter is not installed -- "
116
+ "Tree-sitter parsing unavailable. "
117
+ "Install with: pip install tree-sitter tree-sitter-python"
118
+ )
119
+ return
120
+
121
+ from tree_sitter import Language, Parser as TSParser # type: ignore[import-untyped]
122
+
123
+ for lang in self._requested_languages:
124
+ mod_name = self._GRAMMAR_MODULES.get(lang)
125
+ if mod_name is None:
126
+ logger.warning("No grammar module mapped for language '%s'", lang)
127
+ continue
128
+ try:
129
+ import importlib
130
+ mod = importlib.import_module(mod_name)
131
+ # tree-sitter >=0.22 per-language packages expose a
132
+ # language() function that returns the Language capsule.
133
+ ts_lang = Language(mod.language())
134
+ parser = TSParser(ts_lang)
135
+ self._parsers[lang] = parser
136
+ logger.debug("Loaded tree-sitter parser for %s", lang)
137
+ except ImportError:
138
+ logger.warning(
139
+ "Grammar package '%s' not installed for language '%s'. "
140
+ "Install with: pip install %s",
141
+ mod_name, lang, mod_name.replace('_', '-'),
142
+ )
143
+ except Exception as exc:
144
+ logger.warning("Could not load tree-sitter grammar for %s: %s", lang, exc)
145
+
146
+ def supports_language(self, language: str) -> bool:
147
+ return language in self._parsers
148
+
149
+ # ------------------------------------------------------------------
150
+ # Project-level parsing
151
+ # ------------------------------------------------------------------
152
+
153
+ def parse_project(self) -> Tuple[List[Node], List[Edge]]:
154
+ all_nodes: List[Node] = []
155
+ all_edges: List[Edge] = []
156
+
157
+ for ext, lang in LANGUAGE_MAP.items():
158
+ if lang not in self._parsers:
159
+ continue
160
+ for file_path in sorted(self.project_root.rglob(f"*{ext}")):
161
+ if any(part in SKIP_DIRS for part in file_path.parts):
162
+ continue
163
+ try:
164
+ nodes, edges = self.parse_file(file_path)
165
+ all_nodes.extend(nodes)
166
+ all_edges.extend(edges)
167
+ except Exception as exc:
168
+ logger.warning("Failed to parse %s: %s", file_path, exc)
169
+
170
+ all_edges = _resolve_call_edges(all_nodes, all_edges)
171
+ return all_nodes, all_edges
172
+
173
+ # ------------------------------------------------------------------
174
+ # File-level parsing
175
+ # ------------------------------------------------------------------
176
+
177
+ def parse_file(
178
+ self,
179
+ file_path: Path,
180
+ source: Optional[str] = None,
181
+ ) -> Tuple[List[Node], List[Edge]]:
182
+ if source is None:
183
+ source = file_path.read_text(encoding="utf-8", errors="ignore")
184
+
185
+ ext = file_path.suffix
186
+ lang = LANGUAGE_MAP.get(ext)
187
+ if not lang or lang not in self._parsers:
188
+ return [], []
189
+
190
+ parser = self._parsers[lang]
191
+ source_bytes = source.encode("utf-8")
192
+ tree = parser.parse(source_bytes)
193
+
194
+ rel_path = str(file_path.relative_to(self.project_root))
195
+ lines = source.splitlines()
196
+
197
+ # -- Module node --------------------------------------------------
198
+ module_name = rel_path.replace("/", ".").removesuffix(".py")
199
+ module_id = f"module:{module_name}"
200
+ module_node = Node(
201
+ node_id=module_id,
202
+ node_type="module",
203
+ name=module_name.split(".")[-1],
204
+ qualname=module_name,
205
+ file_path=rel_path,
206
+ start_line=1,
207
+ end_line=max(len(lines), 1),
208
+ code=source,
209
+ docstring=self._extract_module_docstring(tree.root_node),
210
+ )
211
+
212
+ nodes: List[Node] = [module_node]
213
+ edges: List[Edge] = []
214
+
215
+ # -- Language-specific extraction ---------------------------------
216
+ if lang == "python":
217
+ self._walk_python(
218
+ tree.root_node,
219
+ scope_stack=[module_name],
220
+ scope_id_stack=[module_id],
221
+ rel_path=rel_path,
222
+ lines=lines,
223
+ nodes=nodes,
224
+ edges=edges,
225
+ )
226
+ self._extract_python_imports(tree.root_node, module_id, edges)
227
+ # Future: elif lang in ("javascript", "typescript"): ...
228
+
229
+ return nodes, edges
230
+
231
+ # ------------------------------------------------------------------
232
+ # Python: recursive definition walker
233
+ # ------------------------------------------------------------------
234
+
235
+ def _walk_python(
236
+ self,
237
+ ts_node: Any,
238
+ scope_stack: List[str],
239
+ scope_id_stack: List[str],
240
+ rel_path: str,
241
+ lines: List[str],
242
+ nodes: List[Node],
243
+ edges: List[Edge],
244
+ ) -> None:
245
+ """Recursively extract class / function definitions from *ts_node*."""
246
+ for child in ts_node.children:
247
+ outer_node = child
248
+ actual_def = child
249
+
250
+ # Unwrap @decorated_definition -> inner function/class
251
+ if child.type == "decorated_definition":
252
+ inner = child.child_by_field_name("definition")
253
+ if inner is None:
254
+ continue
255
+ actual_def = inner
256
+
257
+ if actual_def.type == "function_definition":
258
+ self._process_python_function(
259
+ outer_node, actual_def, scope_stack, scope_id_stack,
260
+ rel_path, lines, nodes, edges,
261
+ )
262
+ elif actual_def.type == "class_definition":
263
+ self._process_python_class(
264
+ outer_node, actual_def, scope_stack, scope_id_stack,
265
+ rel_path, lines, nodes, edges,
266
+ )
267
+
268
+ def _process_python_function(
269
+ self,
270
+ outer_node: Any,
271
+ func_node: Any,
272
+ scope_stack: List[str],
273
+ scope_id_stack: List[str],
274
+ rel_path: str,
275
+ lines: List[str],
276
+ nodes: List[Node],
277
+ edges: List[Edge],
278
+ ) -> None:
279
+ name_node = func_node.child_by_field_name("name")
280
+ if name_node is None:
281
+ return
282
+ name: str = name_node.text.decode("utf-8")
283
+ qualname = ".".join(scope_stack + [name])
284
+ node_id = f"function:{qualname}"
285
+
286
+ start_line = outer_node.start_point[0] + 1
287
+ end_line = outer_node.end_point[0] + 1
288
+ code = "\n".join(lines[start_line - 1: end_line])
289
+
290
+ nodes.append(Node(
291
+ node_id=node_id,
292
+ node_type="function",
293
+ name=name,
294
+ qualname=qualname,
295
+ file_path=rel_path,
296
+ start_line=start_line,
297
+ end_line=end_line,
298
+ code=code,
299
+ docstring=self._extract_docstring(func_node),
300
+ ))
301
+ edges.append(Edge(src=scope_id_stack[-1], dst=node_id, edge_type="contains"))
302
+
303
+ # Call edges from function body
304
+ for call_name in self._collect_calls(func_node):
305
+ edges.append(Edge(src=node_id, dst=call_name, edge_type="calls"))
306
+
307
+ # Recurse into body for nested definitions
308
+ body = func_node.child_by_field_name("body")
309
+ if body is not None:
310
+ self._walk_python(
311
+ body,
312
+ scope_stack + [name],
313
+ scope_id_stack + [node_id],
314
+ rel_path, lines, nodes, edges,
315
+ )
316
+
317
+ def _process_python_class(
318
+ self,
319
+ outer_node: Any,
320
+ class_node: Any,
321
+ scope_stack: List[str],
322
+ scope_id_stack: List[str],
323
+ rel_path: str,
324
+ lines: List[str],
325
+ nodes: List[Node],
326
+ edges: List[Edge],
327
+ ) -> None:
328
+ name_node = class_node.child_by_field_name("name")
329
+ if name_node is None:
330
+ return
331
+ name: str = name_node.text.decode("utf-8")
332
+ qualname = ".".join(scope_stack + [name])
333
+ node_id = f"class:{qualname}"
334
+
335
+ start_line = outer_node.start_point[0] + 1
336
+ end_line = outer_node.end_point[0] + 1
337
+ code = "\n".join(lines[start_line - 1: end_line])
338
+
339
+ nodes.append(Node(
340
+ node_id=node_id,
341
+ node_type="class",
342
+ name=name,
343
+ qualname=qualname,
344
+ file_path=rel_path,
345
+ start_line=start_line,
346
+ end_line=end_line,
347
+ code=code,
348
+ docstring=self._extract_docstring(class_node),
349
+ ))
350
+ edges.append(Edge(src=scope_id_stack[-1], dst=node_id, edge_type="contains"))
351
+
352
+ # Walk class body for methods / nested classes
353
+ body = class_node.child_by_field_name("body")
354
+ if body is not None:
355
+ self._walk_python(
356
+ body,
357
+ scope_stack + [name],
358
+ scope_id_stack + [node_id],
359
+ rel_path, lines, nodes, edges,
360
+ )
361
+
362
+ # ------------------------------------------------------------------
363
+ # Python: imports
364
+ # ------------------------------------------------------------------
365
+
366
+ @staticmethod
367
+ def _extract_python_imports(
368
+ root: Any,
369
+ module_id: str,
370
+ edges: List[Edge],
371
+ ) -> None:
372
+ for child in root.children:
373
+ if child.type == "import_statement":
374
+ for sub in child.children:
375
+ if sub.type == "dotted_name":
376
+ mod = sub.text.decode("utf-8")
377
+ edges.append(Edge(
378
+ src=module_id, dst=f"module:{mod}", edge_type="depends_on",
379
+ ))
380
+ elif sub.type == "aliased_import":
381
+ name_n = sub.child_by_field_name("name")
382
+ if name_n is not None:
383
+ mod = name_n.text.decode("utf-8")
384
+ edges.append(Edge(
385
+ src=module_id, dst=f"module:{mod}", edge_type="depends_on",
386
+ ))
387
+
388
+ elif child.type == "import_from_statement":
389
+ mod_node = child.child_by_field_name("module_name")
390
+ if mod_node is None:
391
+ continue
392
+ if mod_node.type == "dotted_name":
393
+ mod = mod_node.text.decode("utf-8")
394
+ elif mod_node.type == "relative_import":
395
+ dotted: Optional[str] = None
396
+ for sub in mod_node.children:
397
+ if sub.type == "dotted_name":
398
+ dotted = sub.text.decode("utf-8")
399
+ mod = dotted or ""
400
+ else:
401
+ mod = mod_node.text.decode("utf-8")
402
+ if mod:
403
+ edges.append(Edge(
404
+ src=module_id, dst=f"module:{mod}", edge_type="depends_on",
405
+ ))
406
+
407
+ # ------------------------------------------------------------------
408
+ # Call extraction
409
+ # ------------------------------------------------------------------
410
+
411
+ @staticmethod
412
+ def _collect_calls(func_node: Any) -> List[str]:
413
+ """Return every function/method name called inside *func_node*."""
414
+ calls: List[str] = []
415
+
416
+ def _find(node: Any) -> None:
417
+ if node.type == "call":
418
+ func = node.child_by_field_name("function")
419
+ if func is not None:
420
+ name = _resolve_ts_call_name(func)
421
+ if name:
422
+ calls.append(name)
423
+ for ch in node.children:
424
+ if ch.type in (
425
+ "function_definition",
426
+ "class_definition",
427
+ "decorated_definition",
428
+ ):
429
+ continue
430
+ _find(ch)
431
+
432
+ body = func_node.child_by_field_name("body")
433
+ if body is not None:
434
+ _find(body)
435
+ return calls
436
+
437
+ # ------------------------------------------------------------------
438
+ # Docstring helpers
439
+ # ------------------------------------------------------------------
440
+
441
+ @staticmethod
442
+ def _extract_docstring(def_node: Any) -> str:
443
+ """Extract the docstring from a function / class definition node."""
444
+ body = def_node.child_by_field_name("body")
445
+ if body is None:
446
+ return ""
447
+ for child in body.children:
448
+ if child.type == "expression_statement":
449
+ for expr in child.children:
450
+ if expr.type == "string":
451
+ raw = expr.text.decode("utf-8")
452
+ for q in ('"""', "'''"):
453
+ if raw.startswith(q) and raw.endswith(q):
454
+ return raw[3:-3].strip()
455
+ for q in ('"', "'"):
456
+ if raw.startswith(q) and raw.endswith(q):
457
+ return raw[1:-1].strip()
458
+ return raw.strip()
459
+ break
460
+ elif child.type != "comment":
461
+ break
462
+ return ""
463
+
464
+ @staticmethod
465
+ def _extract_module_docstring(root: Any) -> str:
466
+ """Extract the module-level docstring."""
467
+ for child in root.children:
468
+ if child.type == "expression_statement":
469
+ for expr in child.children:
470
+ if expr.type == "string":
471
+ raw = expr.text.decode("utf-8")
472
+ for q in ('"""', "'''"):
473
+ if raw.startswith(q) and raw.endswith(q):
474
+ return raw[3:-3].strip()
475
+ return raw.strip()
476
+ break
477
+ elif child.type == "comment":
478
+ continue
479
+ else:
480
+ break
481
+ return ""
482
+
483
+
484
+ # ===================================================================
485
+ # AST Fallback Parser (when tree-sitter is not installed)
486
+ # ===================================================================
487
+
488
+ class ASTFallbackParser(Parser):
489
+ """Pure-Python fallback using the built-in ``ast`` module.
490
+
491
+ Only supports Python. Used automatically when tree-sitter is missing.
492
+ """
493
+
494
+ def __init__(self, project_root: Path) -> None:
495
+ self.project_root = project_root
496
+
497
+ def supports_language(self, language: str) -> bool:
498
+ return language == "python"
499
+
500
+ def parse_project(self) -> Tuple[List[Node], List[Edge]]:
501
+ nodes: List[Node] = []
502
+ edges: List[Edge] = []
503
+ for fp in sorted(self.project_root.rglob("*.py")):
504
+ if any(part in SKIP_DIRS for part in fp.parts):
505
+ continue
506
+ try:
507
+ n, e = self.parse_file(fp)
508
+ nodes.extend(n)
509
+ edges.extend(e)
510
+ except Exception as exc:
511
+ logger.warning("AST parse failed for %s: %s", fp, exc)
512
+ edges = _resolve_call_edges(nodes, edges)
513
+ return nodes, edges
514
+
515
+ def parse_file(
516
+ self,
517
+ file_path: Path,
518
+ source: Optional[str] = None,
519
+ ) -> Tuple[List[Node], List[Edge]]:
520
+ if source is None:
521
+ source = file_path.read_text(encoding="utf-8", errors="ignore")
522
+
523
+ try:
524
+ tree = ast.parse(source)
525
+ except SyntaxError as exc:
526
+ logger.warning("SyntaxError in %s: %s", file_path, exc)
527
+ return [], []
528
+
529
+ rel_path = str(file_path.relative_to(self.project_root))
530
+ lines = source.splitlines()
531
+ module_name = rel_path.replace("/", ".").removesuffix(".py")
532
+ module_id = f"module:{module_name}"
533
+
534
+ module_node = Node(
535
+ node_id=module_id,
536
+ node_type="module",
537
+ name=module_name.split(".")[-1],
538
+ qualname=module_name,
539
+ file_path=rel_path,
540
+ start_line=1,
541
+ end_line=max(len(lines), 1),
542
+ code=source,
543
+ docstring=ast.get_docstring(tree) or "",
544
+ )
545
+
546
+ visitor = _ASTVisitor(module_id, module_name, rel_path, lines)
547
+ visitor.visit(tree)
548
+
549
+ nodes = [module_node] + visitor.nodes
550
+ edges = list(visitor.edges)
551
+
552
+ for stmt in tree.body:
553
+ if isinstance(stmt, ast.Import):
554
+ for alias in stmt.names:
555
+ edges.append(Edge(
556
+ src=module_id, dst=f"module:{alias.name}", edge_type="depends_on",
557
+ ))
558
+ elif isinstance(stmt, ast.ImportFrom) and stmt.module:
559
+ edges.append(Edge(
560
+ src=module_id, dst=f"module:{stmt.module}", edge_type="depends_on",
561
+ ))
562
+
563
+ return nodes, edges
564
+
565
+
566
+ # ===================================================================
567
+ # Backward-Compatible Alias
568
+ # ===================================================================
569
+
570
+ class PythonGraphParser(Parser):
571
+ """Drop-in replacement for the legacy ``PythonGraphParser``.
572
+
573
+ Automatically selects **TreeSitterParser** when tree-sitter is
574
+ available, otherwise falls back to the built-in AST parser.
575
+ """
576
+
577
+ def __init__(self, project_root: Path) -> None:
578
+ self.project_root = project_root
579
+ ts = TreeSitterParser(project_root, languages=["python"])
580
+ if ts.supports_language("python"):
581
+ self._delegate: Parser = ts
582
+ logger.info("Using Tree-sitter parser (error-tolerant, semantic chunking)")
583
+ else:
584
+ self._delegate = ASTFallbackParser(project_root)
585
+ logger.info("Using AST fallback parser (Python only)")
586
+
587
+ def parse_file(
588
+ self,
589
+ file_path: Path,
590
+ source: Optional[str] = None,
591
+ ) -> Tuple[List[Node], List[Edge]]:
592
+ return self._delegate.parse_file(file_path, source)
593
+
594
+ def parse_project(self) -> Tuple[List[Node], List[Edge]]:
595
+ return self._delegate.parse_project()
596
+
597
+ def supports_language(self, language: str) -> bool:
598
+ return self._delegate.supports_language(language)
599
+
600
+
601
+ # ===================================================================
602
+ # Shared Helpers
603
+ # ===================================================================
604
+
605
+ def _resolve_ts_call_name(func_node: Any) -> Optional[str]:
606
+ """Resolve a Tree-sitter call-function node to a dotted name string."""
607
+ if func_node.type == "identifier":
608
+ return func_node.text.decode("utf-8")
609
+ if func_node.type == "attribute":
610
+ parts: List[str] = []
611
+ current = func_node
612
+ while current is not None and current.type == "attribute":
613
+ attr = current.child_by_field_name("attribute")
614
+ if attr is not None:
615
+ parts.append(attr.text.decode("utf-8"))
616
+ current = current.child_by_field_name("object")
617
+ if current is not None and current.type == "identifier":
618
+ parts.append(current.text.decode("utf-8"))
619
+ return ".".join(reversed(parts)) if parts else None
620
+ if func_node.type == "call":
621
+ inner = func_node.child_by_field_name("function")
622
+ if inner is not None:
623
+ return _resolve_ts_call_name(inner)
624
+ return None
625
+
626
+
627
+ def _resolve_call_edges(nodes: List[Node], edges: List[Edge]) -> List[Edge]:
628
+ """Resolve symbolic call destinations to concrete node IDs.
629
+
630
+ Language-agnostic post-processing shared by every parser backend.
631
+ """
632
+ qual_by_name: Dict[str, List[str]] = {}
633
+ qual_by_qualname: Dict[str, str] = {}
634
+ for n in nodes:
635
+ qual_by_name.setdefault(n.name, []).append(n.node_id)
636
+ qual_by_qualname[n.qualname] = n.node_id
637
+ node_ids = {n.node_id for n in nodes}
638
+
639
+ resolved: List[Edge] = []
640
+ for edge in edges:
641
+ if edge.edge_type != "calls":
642
+ resolved.append(edge)
643
+ continue
644
+ if edge.dst in node_ids:
645
+ resolved.append(edge)
646
+ continue
647
+
648
+ resolved_dst: Optional[str] = None
649
+
650
+ # --- dotted calls (self.method, obj.method) ----------------------
651
+ if "." in edge.dst:
652
+ parts = edge.dst.split(".")
653
+ method_name = parts[-1]
654
+
655
+ # self.method -> resolve inside same class
656
+ if parts[0] == "self" and edge.src.startswith("function:"):
657
+ src_qualname = edge.src.removeprefix("function:")
658
+ if "." in src_qualname:
659
+ class_qualname = ".".join(src_qualname.split(".")[:-1])
660
+ target_qualname = f"{class_qualname}.{method_name}"
661
+ if target_qualname in qual_by_qualname:
662
+ resolved_dst = qual_by_qualname[target_qualname]
663
+
664
+ if resolved_dst is None and method_name in qual_by_name:
665
+ candidates = qual_by_name[method_name]
666
+ src_parts = edge.src.split(":")[1].split(".") if ":" in edge.src else []
667
+ for cand in candidates:
668
+ cand_parts = cand.split(":")[1].split(".") if ":" in cand else []
669
+ if src_parts and cand_parts and src_parts[:-1] == cand_parts[:-1]:
670
+ resolved_dst = cand
671
+ break
672
+ if resolved_dst is None:
673
+ resolved_dst = candidates[0]
674
+
675
+ # --- simple name lookups -----------------------------------------
676
+ elif edge.dst in qual_by_name:
677
+ resolved_dst = qual_by_name[edge.dst][0]
678
+ elif edge.dst in qual_by_qualname:
679
+ resolved_dst = qual_by_qualname[edge.dst]
680
+
681
+ if resolved_dst:
682
+ resolved.append(Edge(src=edge.src, dst=resolved_dst, edge_type="calls"))
683
+ else:
684
+ resolved.append(edge)
685
+
686
+ return resolved
687
+
688
+
689
+ # ===================================================================
690
+ # Legacy AST visitor (used by ASTFallbackParser)
691
+ # ===================================================================
692
+
693
+ class _ASTVisitor(ast.NodeVisitor):
694
+ """Walks a Python AST and collects Node / Edge objects."""
695
+
696
+ def __init__(
697
+ self,
698
+ module_id: str,
699
+ module_name: str,
700
+ rel_path: str,
701
+ lines: List[str],
702
+ ) -> None:
703
+ self.module_id = module_id
704
+ self.module_name = module_name
705
+ self.rel_path = rel_path
706
+ self.lines = lines
707
+ self.scope_stack: List[str] = [module_name]
708
+ self.scope_id_stack: List[str] = [module_id]
709
+ self.nodes: List[Node] = []
710
+ self.edges: List[Edge] = []
711
+
712
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
713
+ qualname = self._mk_qualname(node.name)
714
+ node_id = f"class:{qualname}"
715
+ self.nodes.append(Node(
716
+ node_id=node_id, node_type="class", name=node.name,
717
+ qualname=qualname, file_path=self.rel_path,
718
+ start_line=node.lineno,
719
+ end_line=getattr(node, "end_lineno", node.lineno),
720
+ code=self._snippet(node),
721
+ docstring=ast.get_docstring(node) or "",
722
+ ))
723
+ self.edges.append(Edge(
724
+ src=self.scope_id_stack[-1], dst=node_id, edge_type="contains",
725
+ ))
726
+ self.scope_stack.append(node.name)
727
+ self.scope_id_stack.append(node_id)
728
+ self.generic_visit(node)
729
+ self.scope_stack.pop()
730
+ self.scope_id_stack.pop()
731
+
732
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
733
+ self._visit_function(node)
734
+
735
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
736
+ self._visit_function(node)
737
+
738
+ def _visit_function(self, node: ast.AST) -> None:
739
+ assert isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
740
+ qualname = self._mk_qualname(node.name)
741
+ node_id = f"function:{qualname}"
742
+ self.nodes.append(Node(
743
+ node_id=node_id, node_type="function", name=node.name,
744
+ qualname=qualname, file_path=self.rel_path,
745
+ start_line=node.lineno,
746
+ end_line=getattr(node, "end_lineno", node.lineno),
747
+ code=self._snippet(node),
748
+ docstring=ast.get_docstring(node) or "",
749
+ ))
750
+ self.edges.append(Edge(
751
+ src=self.scope_id_stack[-1], dst=node_id, edge_type="contains",
752
+ ))
753
+ for call_name in _ast_collect_calls(node):
754
+ self.edges.append(Edge(src=node_id, dst=call_name, edge_type="calls"))
755
+
756
+ self.scope_stack.append(node.name)
757
+ self.scope_id_stack.append(node_id)
758
+ self.generic_visit(node)
759
+ self.scope_stack.pop()
760
+ self.scope_id_stack.pop()
761
+
762
+ def _snippet(self, node: ast.AST) -> str:
763
+ start = max(getattr(node, "lineno", 1) - 1, 0)
764
+ end = getattr(node, "end_lineno", start + 1)
765
+ return "\n".join(self.lines[start:end])
766
+
767
+ def _mk_qualname(self, name: str) -> str:
768
+ return ".".join(self.scope_stack + [name])
769
+
770
+
771
+ def _ast_collect_calls(node: ast.AST) -> List[str]:
772
+ names: List[str] = []
773
+
774
+ class _CV(ast.NodeVisitor):
775
+ def visit_Call(self, call_node: ast.Call) -> None:
776
+ n = _ast_name_from_expr(call_node.func)
777
+ if n:
778
+ names.append(n)
779
+ self.generic_visit(call_node)
780
+
781
+ _CV().visit(node)
782
+ return names
783
+
784
+
785
+ def _ast_name_from_expr(expr: ast.AST) -> Optional[str]:
786
+ if isinstance(expr, ast.Name):
787
+ return expr.id
788
+ if isinstance(expr, ast.Attribute):
789
+ parts: List[str] = []
790
+ current: ast.AST = expr
791
+ while isinstance(current, ast.Attribute):
792
+ parts.append(current.attr)
793
+ current = current.value
794
+ if isinstance(current, ast.Name):
795
+ parts.append(current.id)
796
+ return ".".join(reversed(parts)) if parts else None
797
+ if isinstance(expr, ast.Call):
798
+ return _ast_name_from_expr(expr.func)
799
+ return None
800
+