codebase-retrieval-context-engine 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. codebase_retrieval_context_engine-2.0.0.dist-info/METADATA +505 -0
  2. codebase_retrieval_context_engine-2.0.0.dist-info/RECORD +46 -0
  3. codebase_retrieval_context_engine-2.0.0.dist-info/WHEEL +4 -0
  4. codebase_retrieval_context_engine-2.0.0.dist-info/entry_points.txt +3 -0
  5. codebase_retrieval_context_engine-2.0.0.dist-info/licenses/LICENSE +201 -0
  6. corbell/__init__.py +6 -0
  7. corbell/cli/__init__.py +1 -0
  8. corbell/cli/commands/__init__.py +1 -0
  9. corbell/cli/commands/index.py +86 -0
  10. corbell/cli/commands/query.py +71 -0
  11. corbell/cli/main.py +57 -0
  12. corbell/core/__init__.py +1 -0
  13. corbell/core/constants.py +52 -0
  14. corbell/core/embeddings/__init__.py +6 -0
  15. corbell/core/embeddings/base.py +68 -0
  16. corbell/core/embeddings/extractor.py +201 -0
  17. corbell/core/embeddings/factory.py +48 -0
  18. corbell/core/embeddings/model.py +401 -0
  19. corbell/core/embeddings/search_cache.py +95 -0
  20. corbell/core/embeddings/sqlite_store.py +271 -0
  21. corbell/core/gitignore.py +76 -0
  22. corbell/core/graph/__init__.py +1 -0
  23. corbell/core/graph/builder.py +696 -0
  24. corbell/core/graph/method_graph.py +1077 -0
  25. corbell/core/graph/providers/__init__.py +6 -0
  26. corbell/core/graph/providers/aws_patterns.py +62 -0
  27. corbell/core/graph/providers/azure_patterns.py +64 -0
  28. corbell/core/graph/providers/gcp_patterns.py +59 -0
  29. corbell/core/graph/schema.py +175 -0
  30. corbell/core/graph/sqlite_store.py +500 -0
  31. corbell/core/indexing/__init__.py +1 -0
  32. corbell/core/indexing/builder.py +608 -0
  33. corbell/core/indexing/lock.py +150 -0
  34. corbell/core/indexing/tracker.py +245 -0
  35. corbell/core/llm_client.py +677 -0
  36. corbell/core/mcp/__init__.py +1 -0
  37. corbell/core/mcp/server.py +214 -0
  38. corbell/core/query/__init__.py +1 -0
  39. corbell/core/query/diagnostics.py +38 -0
  40. corbell/core/query/engine.py +321 -0
  41. corbell/core/query/enhancer.py +102 -0
  42. corbell/core/query/formatter.py +98 -0
  43. corbell/core/query/graph_expander.py +284 -0
  44. corbell/core/query/merger.py +171 -0
  45. corbell/core/query/reranker.py +131 -0
  46. corbell/core/workspace.py +408 -0
@@ -0,0 +1,1077 @@
1
+ """Method-call AST graph builder.
2
+
3
+ Builds method-level call graphs from source code using tree-sitter for accurate
4
+ multi-language parsing. Falls back to Python ``ast`` for Python files when
5
+ tree-sitter is unavailable, and to lightweight regex for other languages.
6
+
7
+ Supported languages (via tree-sitter):
8
+ Python, JavaScript, TypeScript, TSX, JSX, Go, Java
9
+
10
+ Install tree-sitter grammars:
11
+ pip install "corbell[treesitter]"
12
+ # or individually:
13
+ pip install tree-sitter tree-sitter-python tree-sitter-javascript \\
14
+ tree-sitter-typescript tree-sitter-go tree-sitter-java
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import ast
20
+ import re
21
+ from collections import defaultdict
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional, Set, Tuple
24
+
25
+ from corbell.core.graph.schema import DependencyEdge, GraphStore, MethodNode
26
+ from corbell.core.gitignore import load_gitignore
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Tree-sitter setup (optional dependency)
30
+ # ---------------------------------------------------------------------------
31
+
32
+ try:
33
+ import tree_sitter # noqa: F401
34
+ from tree_sitter import Language, Parser as TSParser
35
+ _TS_AVAILABLE = True
36
+ except ImportError:
37
+ _TS_AVAILABLE = False
38
+
39
+ # Mapping: our language name -> (tree-sitter module name, language() callable attr)
40
+ _TS_MODULES: Dict[str, str] = {
41
+ "python": "tree_sitter_python",
42
+ "javascript": "tree_sitter_javascript",
43
+ "typescript": "tree_sitter_typescript",
44
+ "tsx": "tree_sitter_typescript", # same package, different grammar fn
45
+ "go": "tree_sitter_go",
46
+ "java": "tree_sitter_java",
47
+ "csharp": "tree_sitter_c_sharp",
48
+ "rust": "tree_sitter_rust",
49
+ "ruby": "tree_sitter_ruby",
50
+ "php": "tree_sitter_php",
51
+ }
52
+
53
+ # Which AST node types to treat as function/method definitions per language
54
+ _TS_TARGET_NODES: Dict[str, Set[str]] = {
55
+ "python": {
56
+ "function_definition",
57
+ "async_function_definition",
58
+ },
59
+ "javascript": {
60
+ "function_declaration",
61
+ "function_expression",
62
+ "generator_function_declaration",
63
+ "arrow_function",
64
+ "method_definition",
65
+ },
66
+ "typescript": {
67
+ "function_declaration",
68
+ "function_expression",
69
+ "generator_function_declaration",
70
+ "arrow_function",
71
+ "method_definition",
72
+ "ambient_declaration", # declare function ...
73
+ },
74
+ "tsx": {
75
+ "function_declaration",
76
+ "function_expression",
77
+ "generator_function_declaration",
78
+ "arrow_function",
79
+ "method_definition",
80
+ },
81
+ "go": {
82
+ "function_declaration",
83
+ "method_declaration",
84
+ },
85
+ "java": {
86
+ "method_declaration",
87
+ "constructor_declaration",
88
+ },
89
+ "csharp": {
90
+ "method_declaration",
91
+ "constructor_declaration",
92
+ "local_function_statement",
93
+ },
94
+ "rust": {
95
+ "function_item",
96
+ },
97
+ "ruby": {
98
+ "method",
99
+ "singleton_method",
100
+ },
101
+ "php": {
102
+ "function_definition",
103
+ "method_declaration",
104
+ },
105
+ }
106
+
107
+ # Child field names that hold the identifier for each language's function node
108
+ _TS_NAME_FIELDS: Dict[str, List[str]] = {
109
+ "python": ["name"],
110
+ "javascript": ["name"],
111
+ "typescript": ["name"],
112
+ "go": ["name"],
113
+ "java": ["name"],
114
+ "csharp": ["name"],
115
+ "rust": ["name"],
116
+ "ruby": ["name"],
117
+ "php": ["name"],
118
+ }
119
+
120
+ _SKIP_DIRS = {
121
+ ".git", "__pycache__", "node_modules", "venv", "env", ".venv", "tests", "__tests__",
122
+ ".pytest_cache", "dist", "build", "coverage", ".next", ".nuxt",
123
+ ".svelte-kit", ".cache", "out", "__tests__", ".turbo", ".vercel",
124
+ "storybook-static", ".storybook",
125
+ }
126
+ _EXT_LANG = {
127
+ ".py": "python",
128
+ ".js": "javascript",
129
+ ".ts": "typescript",
130
+ ".tsx": "tsx", # tsx uses a separate tree-sitter grammar (language_tsx)
131
+ ".jsx": "javascript",
132
+ ".go": "go",
133
+ ".java": "java",
134
+ ".cs": "csharp",
135
+ ".rs": "rust",
136
+ ".rb": "ruby",
137
+ ".php": "php",
138
+ }
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # Call site node types per language (for extracting function calls)
142
+ # ---------------------------------------------------------------------------
143
+
144
+ _TS_CALL_SITE_NODES: Dict[str, Set[str]] = {
145
+ "python": {"call"},
146
+ "javascript": {"call_expression", "new_expression"},
147
+ "typescript": {"call_expression", "new_expression"},
148
+ "tsx": {"call_expression", "new_expression"},
149
+ "go": {"call_expression"},
150
+ "java": {"method_invocation", "object_creation_expression"},
151
+ "csharp": {"invocation_expression", "object_creation_expression"},
152
+ "rust": {"call_expression", "macro_invocation"},
153
+ "ruby": {"call"},
154
+ "php": {"function_call_expression", "member_call_expression", "scoped_call_expression", "object_creation_expression"},
155
+ }
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # Builtin blocklist — filter high-noise language builtins from call graph
159
+ # ---------------------------------------------------------------------------
160
+
161
+ _BUILTIN_BLOCKLIST: Dict[str, Set[str]] = {
162
+ "python": {
163
+ "print", "len", "range", "enumerate", "zip", "map", "filter",
164
+ "sorted", "reversed", "list", "dict", "set", "tuple", "str",
165
+ "int", "float", "bool", "bytes", "type", "isinstance", "issubclass",
166
+ "hasattr", "getattr", "setattr", "delattr", "super", "object",
167
+ "open", "repr", "hash", "id", "hex", "oct", "bin", "abs", "round",
168
+ "min", "max", "sum", "all", "any", "next", "iter", "vars",
169
+ "format", "input", "exec", "eval", "compile", "globals", "locals",
170
+ "staticmethod", "classmethod", "property", "append", "extend",
171
+ "items", "keys", "values", "get", "update", "pop", "copy", "join",
172
+ "split", "strip", "replace", "startswith", "endswith", "decode",
173
+ "encode", "lower", "upper", "format_map",
174
+ },
175
+ "javascript": {
176
+ "console", "log", "error", "warn", "info", "debug", "assert",
177
+ "setTimeout", "setInterval", "clearTimeout", "clearInterval",
178
+ "setImmediate", "clearImmediate", "queueMicrotask",
179
+ "Promise", "resolve", "reject", "then", "catch", "finally", "all",
180
+ "fetch", "JSON", "parse", "stringify", "Math", "Date", "Array",
181
+ "Object", "String", "Number", "Boolean", "Symbol", "BigInt",
182
+ "parseInt", "parseFloat", "isNaN", "isFinite", "encodeURIComponent",
183
+ "decodeURIComponent", "encodeURI", "decodeURI", "require",
184
+ "map", "filter", "reduce", "forEach", "find", "findIndex",
185
+ "push", "pop", "shift", "unshift", "splice", "slice", "join",
186
+ "toString", "valueOf", "hasOwnProperty", "includes", "indexOf",
187
+ "addEventListener", "removeEventListener", "emit", "on", "off",
188
+ "next", "return", "throw", "keys", "values", "entries", "assign",
189
+ "useState", "useEffect", "useContext", "useRef", "useMemo",
190
+ "useCallback", "useReducer", "useLayoutEffect", "createContext",
191
+ "createElement", "render", "it", "describe", "expect", "test",
192
+ "beforeEach", "afterEach", "beforeAll", "afterAll", "jest",
193
+ },
194
+ "go": {
195
+ "make", "len", "cap", "append", "copy", "delete", "close",
196
+ "panic", "recover", "print", "println", "new", "real", "imag",
197
+ "Errorf", "Sprintf", "Printf", "Println", "Fprintf", "Scanf",
198
+ "Error", "String", "Format", "Marshal", "Unmarshal",
199
+ "Fatal", "Fatalf", "Log", "Logf",
200
+ },
201
+ "java": {
202
+ "println", "print", "printf", "format", "toString", "hashCode",
203
+ "equals", "compareTo", "length", "size", "isEmpty", "contains",
204
+ "add", "get", "put", "remove", "clear", "iterator", "next",
205
+ "append", "insert", "delete", "substring", "charAt", "indexOf",
206
+ "parseInt", "parseLong", "parseDouble", "parseFloat",
207
+ "valueOf", "of", "ofNullable", "orElse", "isPresent", "get",
208
+ "stream", "collect", "toList", "toMap", "filter", "map",
209
+ "forEach", "anyMatch", "allMatch", "findFirst",
210
+ },
211
+ "csharp": {
212
+ "WriteLine", "Write", "ToString", "Equals", "GetHashCode", "GetType",
213
+ "ReferenceEquals", "Parse", "TryParse", "Format", "Join", "Concat",
214
+ "IsNullOrEmpty", "IsNullOrWhiteSpace", "Select", "Where", "ToList",
215
+ "ToArray", "FirstOrDefault", "Any", "All", "Count", "Max", "Min",
216
+ "Sum", "Add", "Remove", "Clear", "Contains", "IndexOf", "Substring",
217
+ },
218
+ "rust": {
219
+ "println", "print", "format", "panic", "unwrap", "expect",
220
+ "clone", "to_string", "into", "from", "as_ref", "as_mut",
221
+ "len", "is_empty", "push", "pop", "insert", "remove", "clear",
222
+ "iter", "iter_mut", "into_iter", "map", "filter", "collect",
223
+ "any", "all", "find", "Ok", "Err", "Some", "None",
224
+ },
225
+ "ruby": {
226
+ "puts", "print", "p", "printf", "sprintf", "raise", "fail",
227
+ "require", "require_relative", "include", "extend", "prepend",
228
+ "to_s", "to_i", "to_f", "to_a", "to_h", "to_sym", "class",
229
+ "is_a?", "kind_of?", "instance_of?", "respond_to?", "nil?",
230
+ "empty?", "length", "size", "push", "pop", "shift", "unshift",
231
+ "map", "select", "reject", "reduce", "inject", "each", "find",
232
+ },
233
+ "php": {
234
+ "echo", "print", "print_r", "var_dump", "var_export", "printf",
235
+ "sprintf", "die", "exit", "isset", "empty", "unset", "count",
236
+ "sizeof", "array_push", "array_pop", "array_shift", "array_unshift",
237
+ "array_map", "array_filter", "array_reduce", "array_keys", "array_values",
238
+ "in_array", "explode", "implode", "str_replace", "substr", "strlen",
239
+ "strpos", "strtolower", "strtoupper", "trim", "json_encode", "json_decode",
240
+ "Exception", "RuntimeException", "InvalidArgumentException",
241
+ },
242
+ }
243
+ # Add typescript as alias of javascript builtins
244
+ _BUILTIN_BLOCKLIST["typescript"] = _BUILTIN_BLOCKLIST["javascript"]
245
+ _BUILTIN_BLOCKLIST["tsx"] = _BUILTIN_BLOCKLIST["javascript"]
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # Parser cache
250
+ # ---------------------------------------------------------------------------
251
+
252
+ _parser_cache: Dict[str, Any] = {} # lang -> TSParser | None
253
+
254
+
255
+ def _get_ts_parser(lang: str) -> Optional[Any]:
256
+ """Return a cached tree-sitter Parser for *lang*, or None if unavailable."""
257
+ if not _TS_AVAILABLE:
258
+ return None
259
+ if lang in _parser_cache:
260
+ return _parser_cache[lang]
261
+
262
+ module_name = _TS_MODULES.get(lang)
263
+ parser = None
264
+ if module_name:
265
+ try:
266
+ mod = __import__(module_name)
267
+ # tree_sitter_typescript exposes two grammars:
268
+ # language_typescript() for .ts files
269
+ # language_tsx() for .tsx files (JSX-aware)
270
+ if lang == "tsx" and hasattr(mod, "language_tsx"):
271
+ lang_obj = Language(mod.language_tsx())
272
+ elif lang == "typescript" and hasattr(mod, "language_typescript"):
273
+ lang_obj = Language(mod.language_typescript())
274
+ elif lang == "php" and hasattr(mod, "language_php"):
275
+ lang_obj = Language(mod.language_php())
276
+ elif hasattr(mod, "language"):
277
+ lang_obj = Language(mod.language())
278
+ else:
279
+ raise AttributeError(f"No language() callable in {module_name}")
280
+ p = TSParser(lang_obj)
281
+ parser = p
282
+ except Exception:
283
+ parser = None
284
+
285
+ _parser_cache[lang] = parser
286
+ return parser
287
+
288
+
289
+ # ---------------------------------------------------------------------------
290
+ # Main builder
291
+ # ---------------------------------------------------------------------------
292
+
293
+
294
+ class MethodGraphBuilder:
295
+ """Extract method nodes and call edges, store in GraphStore."""
296
+
297
+ def __init__(self, graph_store: GraphStore):
298
+ self.store = graph_store
299
+
300
+ def build_for_service(self, service_id: str, repo_path: Path) -> Dict[str, Any]:
301
+ """Scan *repo_path* and populate method nodes + call edges.
302
+
303
+ Uses tree-sitter for all supported languages when the grammar packages
304
+ are installed. Falls back to Python ``ast`` for Python files, and to
305
+ lightweight regex for JS/TS/Go/Java when tree-sitter is unavailable.
306
+
307
+ Args:
308
+ service_id: Identifier for the owning service.
309
+ repo_path: Root directory of the repository to scan.
310
+
311
+ Returns:
312
+ Summary dict with ``methods``, ``calls``, ``files_scanned``, ``ts_available``.
313
+ """
314
+ all_methods: Dict[str, Dict] = {}
315
+ all_calls: List[Dict] = []
316
+ files_scanned = 0
317
+
318
+ gitignore_spec = load_gitignore(Path(repo_path))
319
+
320
+ for fp in Path(repo_path).rglob("*"):
321
+ if not fp.is_file():
322
+ continue
323
+ # Only skip if the immediate parent directory name is in SKIP_DIRS
324
+ # (avoids false-positives from matching path segments like 'corbel')
325
+ rel = fp.relative_to(repo_path)
326
+ if any(part in _SKIP_DIRS for part in rel.parts):
327
+ continue
328
+ if gitignore_spec.match_file(str(rel).replace("\\", "/")):
329
+ continue
330
+ lang = _EXT_LANG.get(fp.suffix)
331
+ if not lang:
332
+ continue
333
+ files_scanned += 1
334
+ result = self._analyze_file(fp, service_id, lang)
335
+ for m in result["methods"]:
336
+ all_methods[m["id"]] = m
337
+ all_calls.extend(result["calls"])
338
+
339
+ # Persist method nodes
340
+ for method_id, info in all_methods.items():
341
+ node = MethodNode(
342
+ id=method_id,
343
+ repo=str(repo_path),
344
+ file_path=info["file_path"],
345
+ class_name=info.get("class_name"),
346
+ method_name=info["name"],
347
+ signature=info.get("signature", info["name"]),
348
+ docstring=info.get("docstring"),
349
+ line_start=info.get("line_number", 0),
350
+ line_end=info.get("line_end", info.get("line_number", 0)),
351
+ service_id=service_id,
352
+ typed_signature=info.get("typed_signature"),
353
+ )
354
+ self.store.upsert_node(node)
355
+
356
+ # Build and persist call graph edges
357
+ call_graph = self._build_call_graph(all_methods, all_calls)
358
+ for caller_id, callee_id, meta in call_graph:
359
+ self.store.upsert_edge(
360
+ DependencyEdge(
361
+ source_id=caller_id,
362
+ target_id=callee_id,
363
+ kind="method_call",
364
+ metadata=meta,
365
+ )
366
+ )
367
+
368
+ return {
369
+ "methods": len(all_methods),
370
+ "calls": len(call_graph),
371
+ "files_scanned": files_scanned,
372
+ "ts_available": _TS_AVAILABLE,
373
+ }
374
+
375
+
376
+ # ------------------------------------------------------------------ #
377
+ # Dispatch #
378
+ # ------------------------------------------------------------------ #
379
+
380
+ def _analyze_file(self, fp: Path, service_id: str, lang: str) -> Dict:
381
+ try:
382
+ content = fp.read_text(encoding="utf-8", errors="ignore")
383
+ except Exception:
384
+ return {"methods": [], "calls": []}
385
+
386
+ # 1. Try tree-sitter
387
+ parser = _get_ts_parser(lang)
388
+ if parser is not None:
389
+ return self._analyze_with_tree_sitter(fp, content, service_id, lang, parser)
390
+
391
+ # 2. Python-specific fallback: stdlib ast (accurate)
392
+ if lang == "python":
393
+ return self._analyze_python_ast(fp, content, service_id)
394
+
395
+ # 3. Last resort: regex (JS/TS/Go/Java when tree-sitter is absent)
396
+ return self._analyze_regex_fallback(fp, content, service_id, lang)
397
+
398
+ def _make_method_id(self, service_id: str, fp: Path, full_name: str) -> str:
399
+ return f"{service_id}::{fp.name}::{full_name}"
400
+
401
+ # ------------------------------------------------------------------ #
402
+ # Tree-sitter analyzer (all languages) #
403
+ # ------------------------------------------------------------------ #
404
+
405
+ def _analyze_with_tree_sitter(
406
+ self,
407
+ fp: Path,
408
+ content: str,
409
+ service_id: str,
410
+ lang: str,
411
+ parser: Any,
412
+ ) -> Dict:
413
+ """Parse *content* with tree-sitter and extract method nodes + call sites."""
414
+ methods: List[Dict] = []
415
+ calls: List[Dict] = []
416
+
417
+ try:
418
+ tree = parser.parse(bytes(content, "utf-8"))
419
+ except Exception:
420
+ return {"methods": [], "calls": []}
421
+
422
+ target_node_types = _TS_TARGET_NODES.get(lang, set())
423
+ call_site_types = _TS_CALL_SITE_NODES.get(lang, set())
424
+ builtins = _BUILTIN_BLOCKLIST.get(lang, set())
425
+ def _node_name(node) -> Optional[str]:
426
+ """Extract the identifier name from a function/method node."""
427
+ # 1. Try matching identifier child that is exactly the "name" field
428
+ for child in node.children:
429
+ if child.type == "identifier" and child == node.child_by_field_name("name"):
430
+ return child.text.decode("utf-8", errors="ignore")
431
+ # 2. Try via the "name" field directly (PHP uses node type "name")
432
+ name_field = node.child_by_field_name("name")
433
+ if name_field is not None:
434
+ return name_field.text.decode("utf-8", errors="ignore")
435
+ # 3. Fall back to first identifier child
436
+ for child in node.children:
437
+ if child.type == "identifier":
438
+ return child.text.decode("utf-8", errors="ignore")
439
+ return None
440
+
441
+ def _receiver_or_class(node) -> Optional[str]:
442
+ """For Go method_declaration, extract the receiver type name."""
443
+ recv = node.child_by_field_name("receiver")
444
+ if recv:
445
+ for sub in recv.children:
446
+ if sub.type in ("type_identifier", "pointer_type", "qualified_type"):
447
+ return sub.text.decode("utf-8", errors="ignore").lstrip("*")
448
+ return None
449
+
450
+ def _extract_callee_name(node) -> Optional[str]:
451
+ """Extract the called function/method name from a call site node."""
452
+ if lang == "python":
453
+ func = node.child_by_field_name("function")
454
+ if func is None:
455
+ return None
456
+ if func.type == "identifier":
457
+ return func.text.decode("utf-8", errors="ignore")
458
+ if func.type == "attribute":
459
+ attr = func.child_by_field_name("attribute")
460
+ if attr:
461
+ return attr.text.decode("utf-8", errors="ignore")
462
+ elif lang in ("javascript", "typescript", "tsx"):
463
+ if node.type == "new_expression":
464
+ # new MyClass(...) — get the constructor name
465
+ ctor = node.child_by_field_name("constructor")
466
+ if ctor and ctor.type == "identifier":
467
+ return ctor.text.decode("utf-8", errors="ignore")
468
+ return None
469
+ func = node.child_by_field_name("function")
470
+ if func is None:
471
+ return None
472
+ if func.type == "identifier":
473
+ return func.text.decode("utf-8", errors="ignore")
474
+ if func.type in ("member_expression", "subscript_expression"):
475
+ prop = func.child_by_field_name("property")
476
+ if prop:
477
+ return prop.text.decode("utf-8", errors="ignore")
478
+ elif lang == "go":
479
+ func = node.child_by_field_name("function")
480
+ if func is None:
481
+ return None
482
+ if func.type == "identifier":
483
+ return func.text.decode("utf-8", errors="ignore")
484
+ if func.type == "selector_expression":
485
+ field = func.child_by_field_name("field")
486
+ if field:
487
+ return field.text.decode("utf-8", errors="ignore")
488
+ elif lang == "java":
489
+ if node.type == "object_creation_expression":
490
+ type_node = node.child_by_field_name("type")
491
+ if type_node:
492
+ return type_node.text.decode("utf-8", errors="ignore")
493
+ return None
494
+ name = node.child_by_field_name("name")
495
+ if name:
496
+ return name.text.decode("utf-8", errors="ignore")
497
+ elif lang == "csharp":
498
+ if node.type == "object_creation_expression":
499
+ t = node.child_by_field_name("type")
500
+ if t:
501
+ return t.text.decode("utf-8", errors="ignore")
502
+ return None
503
+ func = node.child_by_field_name("function")
504
+ if func is None:
505
+ return None
506
+ if func.type == "identifier":
507
+ return func.text.decode("utf-8", errors="ignore")
508
+ if func.type == "member_access_expression":
509
+ name = func.child_by_field_name("name")
510
+ if name:
511
+ return name.text.decode("utf-8", errors="ignore")
512
+ elif lang == "rust":
513
+ func = node.child_by_field_name("function")
514
+ if func:
515
+ if func.type in ("identifier", "scoped_identifier"):
516
+ return func.text.decode("utf-8", errors="ignore")
517
+ elif func.type == "field_expression":
518
+ field = func.child_by_field_name("field")
519
+ if field:
520
+ return field.text.decode("utf-8", errors="ignore")
521
+ elif node.type == "macro_invocation":
522
+ mac = node.child_by_field_name("macro")
523
+ if mac:
524
+ return mac.text.decode("utf-8", errors="ignore")
525
+ elif lang == "ruby":
526
+ method = node.child_by_field_name("method")
527
+ if method:
528
+ return method.text.decode("utf-8", errors="ignore")
529
+ elif lang == "php":
530
+ if node.type == "object_creation_expression":
531
+ cls = node.child_by_field_name("class")
532
+ if cls:
533
+ return cls.text.decode("utf-8", errors="ignore")
534
+ return None
535
+ name_node = node.child_by_field_name("name")
536
+ if name_node:
537
+ return name_node.text.decode("utf-8", errors="ignore")
538
+ return None
539
+
540
+ def _extract_typed_signature(node) -> str:
541
+ """Build a typed signature string like ``validate(token: str) -> bool``."""
542
+ name = _node_name(node) or "?"
543
+ params_node = node.child_by_field_name("parameters")
544
+ param_strs: List[str] = []
545
+
546
+ if params_node:
547
+ for param in params_node.named_children:
548
+ if lang in ("javascript", "typescript", "tsx"):
549
+ pattern = (
550
+ param.child_by_field_name("pattern")
551
+ or param.child_by_field_name("name")
552
+ )
553
+ type_ann = param.child_by_field_name("type")
554
+ pname = pattern.text.decode("utf-8", "ignore") if pattern else ""
555
+ if type_ann:
556
+ raw_t = type_ann.text.decode("utf-8", "ignore").strip().lstrip(":").strip()
557
+ param_strs.append(f"{pname}: {raw_t}" if pname else raw_t)
558
+ elif pname:
559
+ param_strs.append(pname)
560
+
561
+ elif lang == "python":
562
+ if param.type in (
563
+ "typed_parameter", "typed_default_parameter"
564
+ ):
565
+ pname = ""
566
+ ptype = ""
567
+ for child in param.children:
568
+ if child.type == "identifier" and not pname:
569
+ pname = child.text.decode("utf-8", "ignore")
570
+ elif child.type == "type":
571
+ ptype = child.text.decode("utf-8", "ignore")
572
+ param_strs.append(f"{pname}: {ptype}" if ptype else pname)
573
+ elif param.type in ("identifier", "list_splat_pattern", "dictionary_splat_pattern"):
574
+ param_strs.append(param.text.decode("utf-8", "ignore"))
575
+ elif param.type == "default_parameter":
576
+ n = param.child_by_field_name("name")
577
+ if n:
578
+ param_strs.append(n.text.decode("utf-8", "ignore"))
579
+
580
+ elif lang == "go":
581
+ pnames: List[str] = []
582
+ ptype = ""
583
+ for child in param.children:
584
+ if child.type == "identifier":
585
+ pnames.append(child.text.decode("utf-8", "ignore"))
586
+ elif child.type in (
587
+ "type_identifier", "pointer_type", "qualified_type",
588
+ "slice_type", "array_type", "map_type", "interface_type",
589
+ ):
590
+ ptype = child.text.decode("utf-8", "ignore")
591
+ if pnames:
592
+ param_strs.append(
593
+ f"{' '.join(pnames)} {ptype}".strip() if ptype else " ".join(pnames)
594
+ )
595
+
596
+ elif lang == "java":
597
+ pname_node = param.child_by_field_name("name")
598
+ ptype_node = param.child_by_field_name("type")
599
+ if pname_node and ptype_node:
600
+ param_strs.append(
601
+ f"{ptype_node.text.decode('utf-8','ignore')} "
602
+ f"{pname_node.text.decode('utf-8','ignore')}"
603
+ )
604
+
605
+ elif lang == "csharp":
606
+ pname_node = param.child_by_field_name("name")
607
+ ptype_node = param.child_by_field_name("type")
608
+ if pname_node and ptype_node:
609
+ param_strs.append(
610
+ f"{ptype_node.text.decode('utf-8','ignore')} "
611
+ f"{pname_node.text.decode('utf-8','ignore')}"
612
+ )
613
+ elif param.type == "parameter":
614
+ param_strs.append(param.text.decode("utf-8", "ignore"))
615
+
616
+ elif lang == "rust":
617
+ pat = param.child_by_field_name("pattern")
618
+ typ = param.child_by_field_name("type")
619
+ if pat and typ:
620
+ param_strs.append(
621
+ f"{pat.text.decode('utf-8','ignore')}: {typ.text.decode('utf-8','ignore')}"
622
+ )
623
+ else:
624
+ param_strs.append(param.text.decode("utf-8", "ignore"))
625
+
626
+ elif lang == "ruby":
627
+ if param.type in ("identifier", "keyword_parameter", "optional_parameter"):
628
+ param_strs.append(param.text.decode("utf-8", "ignore"))
629
+
630
+ elif lang == "php":
631
+ pname_node = param.child_by_field_name("name")
632
+ ptype_node = param.child_by_field_name("type")
633
+ pstr = ""
634
+ if ptype_node:
635
+ pstr += ptype_node.text.decode("utf-8", "ignore") + " "
636
+ if pname_node:
637
+ pstr += pname_node.text.decode("utf-8", "ignore")
638
+ if pstr:
639
+ param_strs.append(pstr.strip())
640
+
641
+ params_str = ", ".join(param_strs)
642
+
643
+ # Return type
644
+ ret_node = node.child_by_field_name("return_type")
645
+ if ret_node:
646
+ ret_raw = ret_node.text.decode("utf-8", "ignore").strip()
647
+ # Strip leading ':' (TS) or '->' (Python ts node already has it stripped)
648
+ ret_clean = ret_raw.lstrip(":->").strip().lstrip(">:").strip()
649
+ if ret_clean:
650
+ return f"{name}({params_str}) -> {ret_clean}"
651
+ return f"{name}({params_str})"
652
+
653
+ def traverse(
654
+ node,
655
+ enclosing_class: Optional[str] = None,
656
+ parent=None,
657
+ enclosing_method_id: Optional[str] = None,
658
+ ) -> None:
659
+ # Track class/struct/interface context
660
+ if node.type in {"class_declaration", "class_definition",
661
+ "struct_type", "type_declaration",
662
+ "interface_declaration"}:
663
+ name_child = node.child_by_field_name("name")
664
+ cls_name = (
665
+ name_child.text.decode("utf-8", errors="ignore")
666
+ if name_child else None
667
+ )
668
+ for child in node.children:
669
+ traverse(
670
+ child,
671
+ enclosing_class=cls_name or enclosing_class,
672
+ parent=node,
673
+ enclosing_method_id=enclosing_method_id,
674
+ )
675
+ return
676
+
677
+ current_method_id = enclosing_method_id # inherited default
678
+
679
+ if node.type in target_node_types:
680
+ raw_name = _node_name(node)
681
+
682
+ # For Go method_declaration, use receiver type as class
683
+ eff_class = enclosing_class
684
+ if lang == "go" and node.type == "method_declaration":
685
+ eff_class = _receiver_or_class(node) or eff_class
686
+
687
+ # Arrow functions / function expressions without their own name
688
+ if raw_name is None and node.type in {
689
+ "arrow_function", "function_expression", "generator_function",
690
+ }:
691
+ if parent and parent.type == "variable_declarator":
692
+ name_child = parent.child_by_field_name("name")
693
+ if name_child:
694
+ raw_name = name_child.text.decode("utf-8", errors="ignore")
695
+
696
+ if raw_name:
697
+ # Skip test and mock methods
698
+ lower_name = raw_name.lower()
699
+ if lower_name.startswith("test_") or "mock" in lower_name:
700
+ return
701
+
702
+ full = f"{eff_class}.{raw_name}" if eff_class else raw_name
703
+ mid = self._make_method_id(service_id, fp, full)
704
+ line_start = node.start_point[0] + 1
705
+ line_end = node.end_point[0] + 1
706
+
707
+ # Python docstring extraction
708
+ docstring: Optional[str] = None
709
+ if lang == "python" and node.children:
710
+ body = node.child_by_field_name("body")
711
+ if body and body.children:
712
+ first = body.children[0]
713
+ if first.type == "expression_statement":
714
+ ds_node = first.children[0] if first.children else None
715
+ if ds_node and ds_node.type == "string":
716
+ docstring = ds_node.text.decode(
717
+ "utf-8", errors="ignore"
718
+ ).strip("\"'")
719
+
720
+ typed_sig = _extract_typed_signature(node)
721
+
722
+ methods.append({
723
+ "id": mid,
724
+ "name": raw_name,
725
+ "full_name": full,
726
+ "class_name": eff_class,
727
+ "file_path": str(fp),
728
+ "line_number": line_start,
729
+ "line_end": line_end,
730
+ "signature": raw_name, # plain name (backward compat)
731
+ "typed_signature": typed_sig, # NEW: full typed form
732
+ "docstring": docstring,
733
+ "service_id": service_id,
734
+ })
735
+ current_method_id = mid # children see us as enclosing method
736
+
737
+ elif call_site_types and node.type in call_site_types and enclosing_method_id:
738
+ # Extract call site
739
+ callee = _extract_callee_name(node)
740
+ if callee and callee not in builtins:
741
+ calls.append({
742
+ "caller_id": enclosing_method_id,
743
+ "callee_name": callee,
744
+ "line_number": node.start_point[0] + 1,
745
+ })
746
+
747
+ for child in node.children:
748
+ traverse(
749
+ child,
750
+ enclosing_class=enclosing_class,
751
+ parent=node,
752
+ enclosing_method_id=current_method_id,
753
+ )
754
+
755
+ traverse(tree.root_node)
756
+ return {"methods": methods, "calls": calls}
757
+
758
+ # ------------------------------------------------------------------ #
759
+ # Python ast fallback #
760
+ # ------------------------------------------------------------------ #
761
+
762
+ def _analyze_python_ast(self, fp: Path, content: str, service_id: str) -> Dict:
763
+ """Use Python's stdlib ast for accurate extraction when tree-sitter is absent."""
764
+ methods: List[Dict] = []
765
+ calls: List[Dict] = []
766
+
767
+ try:
768
+ tree = ast.parse(content, filename=str(fp))
769
+ except SyntaxError:
770
+ return {"methods": [], "calls": []}
771
+
772
+ class _Visitor(ast.NodeVisitor):
773
+ def __init__(self_inner):
774
+ self_inner.current_class: Optional[str] = None
775
+ self_inner.current_method_id: Optional[str] = None
776
+
777
+ def visit_ClassDef(self_inner, node):
778
+ old = self_inner.current_class
779
+ self_inner.current_class = node.name
780
+ self_inner.generic_visit(node)
781
+ self_inner.current_class = old
782
+
783
+ def _visit_func(self_inner, node):
784
+ mname = node.name
785
+ # Skip test and mock methods
786
+ lower_name = mname.lower()
787
+ if lower_name.startswith("test_") or "mock" in lower_name:
788
+ return
789
+
790
+ full = (
791
+ f"{self_inner.current_class}.{mname}"
792
+ if self_inner.current_class else mname
793
+ )
794
+ mid = self._make_method_id(service_id, fp, full)
795
+
796
+ sig_parts = [a.arg for a in node.args.args]
797
+ sig = f"def {mname}({', '.join(sig_parts)})"
798
+ docstring = ast.get_docstring(node)
799
+
800
+ line_end = max(
801
+ (getattr(n, "end_lineno", node.lineno) for n in ast.walk(node)),
802
+ default=node.lineno,
803
+ )
804
+ methods.append({
805
+ "id": mid,
806
+ "name": mname,
807
+ "full_name": full,
808
+ "class_name": self_inner.current_class,
809
+ "file_path": str(fp),
810
+ "line_number": node.lineno,
811
+ "line_end": line_end,
812
+ "is_async": isinstance(node, ast.AsyncFunctionDef),
813
+ "signature": sig,
814
+ "docstring": docstring,
815
+ "service_id": service_id,
816
+ })
817
+
818
+ old_mid = self_inner.current_method_id
819
+ self_inner.current_method_id = mid
820
+ self_inner.generic_visit(node)
821
+ self_inner.current_method_id = old_mid
822
+
823
+ visit_FunctionDef = _visit_func
824
+ visit_AsyncFunctionDef = _visit_func
825
+
826
+ def visit_Call(self_inner, node):
827
+ if not self_inner.current_method_id:
828
+ self_inner.generic_visit(node)
829
+ return
830
+ callee: Optional[str] = None
831
+ if isinstance(node.func, ast.Name):
832
+ callee = node.func.id
833
+ elif isinstance(node.func, ast.Attribute):
834
+ callee = node.func.attr
835
+ if callee:
836
+ calls.append({
837
+ "caller_id": self_inner.current_method_id,
838
+ "callee_name": callee,
839
+ "line_number": node.lineno,
840
+ })
841
+ self_inner.generic_visit(node)
842
+
843
+ _Visitor().visit(tree)
844
+ return {"methods": methods, "calls": calls}
845
+
846
+ # ------------------------------------------------------------------ #
847
+ # Regex fallback (JS/TS/Go/Java when tree-sitter absent) #
848
+ # ------------------------------------------------------------------ #
849
+
850
+ def _analyze_regex_fallback(
851
+ self, fp: Path, content: str, service_id: str, lang: str
852
+ ) -> Dict:
853
+ """Minimal regex extraction used only when tree-sitter grammars are missing."""
854
+ if lang in ("javascript", "typescript", "tsx"):
855
+ return self._regex_js(fp, content, service_id)
856
+ if lang == "go":
857
+ return self._regex_go(fp, content, service_id)
858
+ if lang == "java":
859
+ return self._regex_java(fp, content, service_id)
860
+ if lang == "csharp":
861
+ return self._regex_csharp(fp, content, service_id)
862
+ if lang == "rust":
863
+ return self._regex_rust(fp, content, service_id)
864
+ if lang == "ruby":
865
+ return self._regex_ruby(fp, content, service_id)
866
+ if lang == "php":
867
+ return self._regex_php(fp, content, service_id)
868
+ return {"methods": [], "calls": []}
869
+
870
+ # --- JS/TS regex (used only as last-resort fallback) ---
871
+
872
+ def _regex_js(self, fp: Path, content: str, service_id: str) -> Dict:
873
+ methods: List[Dict] = []
874
+ lines = content.splitlines()
875
+ current_class: Optional[str] = None
876
+ KEYWORDS = {
877
+ "if", "else", "for", "while", "switch", "catch", "try", "return",
878
+ "new", "typeof", "instanceof", "import", "export", "from", "class",
879
+ "extends", "implements", "interface", "type", "enum", "declare",
880
+ "public", "private", "protected", "static", "async", "await",
881
+ }
882
+ PATTERNS: List[Tuple[re.Pattern, str]] = [
883
+ (re.compile(r"^\s*export\s+default\s+(?:async\s+)?function\s*([\w$]*)\s*[<(]"), "default_fn"),
884
+ (re.compile(r"^\s*export\s+(?:async\s+)?function\s+([\w$]+)\s*[<(]"), "exported_fn"),
885
+ (re.compile(r"^\s*(?:export\s+)?async\s+function\s+([\w$]+)\s*[<(]"), "async_fn"),
886
+ (re.compile(r"^\s*(?:export\s+)?function\s+([\w$]+)\s*[<(]"), "fn"),
887
+ (re.compile(r"^\s*export\s+(?:const|let|var)\s+([\w$]+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[\w$]+)\s*(?::[^=]+)?=>"), "exported_arrow"),
888
+ (re.compile(r"^\s*(?:const|let|var)\s+([\w$]+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[\w$]+)\s*(?::[^=>]+)?=>"), "arrow"),
889
+ (re.compile(r"^\s*(?:(?:public|private|protected|static|abstract|override|async|readonly)\s+)*"
890
+ r"([\w$]+)\s*[<(][^)]*\)\s*(?::[^{]+)?\s*\{"), "class_method"),
891
+ ]
892
+ class_pat = re.compile(r"^\s*(?:export\s+)?(?:abstract\s+)?class\s+([\w$]+)")
893
+ for lnum, line in enumerate(lines, 1):
894
+ cm = class_pat.match(line)
895
+ if cm:
896
+ current_class = cm.group(1)
897
+ for pat, kind in PATTERNS:
898
+ m = pat.match(line)
899
+ if not m:
900
+ continue
901
+ raw = m.group(1) if m.lastindex and m.group(1) else None
902
+ if raw is None:
903
+ raw = fp.stem if kind == "default_fn" else None
904
+ if not raw or raw in KEYWORDS:
905
+ continue
906
+ # Skip test and mock methods
907
+ lower_name = raw.lower()
908
+ if lower_name.startswith("test_") or "mock" in lower_name:
909
+ continue
910
+
911
+ full = f"{current_class}.{raw}" if (current_class and kind == "class_method") else raw
912
+ mid = self._make_method_id(service_id, fp, full)
913
+ methods.append({
914
+ "id": mid, "name": raw, "full_name": full,
915
+ "class_name": current_class if kind == "class_method" else None,
916
+ "file_path": str(fp), "line_number": lnum, "line_end": lnum,
917
+ "signature": raw, "docstring": None, "service_id": service_id,
918
+ })
919
+ break
920
+ return {"methods": methods, "calls": []}
921
+
922
+ def _regex_go(self, fp: Path, content: str, service_id: str) -> Dict:
923
+ methods: List[Dict] = []
924
+ pat = re.compile(r"^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(")
925
+ for lnum, line in enumerate(content.splitlines(), 1):
926
+ m = pat.match(line)
927
+ if m:
928
+ mname = m.group(1)
929
+ # Skip test and mock methods
930
+ lower_name = mname.lower()
931
+ if lower_name.startswith("test_") or "mock" in lower_name:
932
+ continue
933
+
934
+ mid = self._make_method_id(service_id, fp, mname)
935
+ methods.append({
936
+ "id": mid, "name": mname, "full_name": mname,
937
+ "class_name": None, "file_path": str(fp),
938
+ "line_number": lnum, "line_end": lnum,
939
+ "signature": mname, "docstring": None, "service_id": service_id,
940
+ })
941
+ return {"methods": methods, "calls": []}
942
+
943
+ def _regex_java(self, fp: Path, content: str, service_id: str) -> Dict:
944
+ methods: List[Dict] = []
945
+ pat = re.compile(
946
+ r"(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+(\w+)\s*\([^)]*\)\s*\{?"
947
+ )
948
+ skip = {"if", "for", "while", "switch", "catch", "class"}
949
+ for lnum, line in enumerate(content.splitlines(), 1):
950
+ m = pat.search(line)
951
+ if m and m.group(1) not in skip and "class " not in line:
952
+ mname = m.group(1)
953
+ # Skip test and mock methods
954
+ lower_name = mname.lower()
955
+ if lower_name.startswith("test_") or "mock" in lower_name:
956
+ continue
957
+
958
+ mid = self._make_method_id(service_id, fp, mname)
959
+ methods.append({
960
+ "id": mid, "name": mname, "full_name": mname,
961
+ "class_name": None, "file_path": str(fp),
962
+ "line_number": lnum, "line_end": lnum,
963
+ "signature": mname, "docstring": None, "service_id": service_id,
964
+ })
965
+ return {"methods": methods, "calls": []}
966
+
967
+ def _regex_csharp(self, fp: Path, content: str, service_id: str) -> Dict:
968
+ methods: List[Dict] = []
969
+ pat = re.compile(
970
+ r"(?:public|private|protected|internal|static|async|\s)+[\w<>\[\]]+\s+(\w+)\s*\([^)]*\)\s*\{?"
971
+ )
972
+ skip = {"if", "for", "while", "switch", "catch", "class"}
973
+ for lnum, line in enumerate(content.splitlines(), 1):
974
+ m = pat.search(line)
975
+ if m and m.group(1) not in skip and "class " not in line:
976
+ mname = m.group(1)
977
+ lower_name = mname.lower()
978
+ if lower_name.startswith("test") or "mock" in lower_name:
979
+ continue
980
+ mid = self._make_method_id(service_id, fp, mname)
981
+ methods.append({
982
+ "id": mid, "name": mname, "full_name": mname,
983
+ "class_name": None, "file_path": str(fp),
984
+ "line_number": lnum, "line_end": lnum,
985
+ "signature": mname, "docstring": None, "service_id": service_id,
986
+ })
987
+ return {"methods": methods, "calls": []}
988
+
989
+ def _regex_rust(self, fp: Path, content: str, service_id: str) -> Dict:
990
+ methods: List[Dict] = []
991
+ pat = re.compile(r"^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(")
992
+ for lnum, line in enumerate(content.splitlines(), 1):
993
+ m = pat.match(line)
994
+ if m:
995
+ mname = m.group(1)
996
+ lower_name = mname.lower()
997
+ if lower_name.startswith("test") or "mock" in lower_name:
998
+ continue
999
+ mid = self._make_method_id(service_id, fp, mname)
1000
+ methods.append({
1001
+ "id": mid, "name": mname, "full_name": mname,
1002
+ "class_name": None, "file_path": str(fp),
1003
+ "line_number": lnum, "line_end": lnum,
1004
+ "signature": mname, "docstring": None, "service_id": service_id,
1005
+ })
1006
+ return {"methods": methods, "calls": []}
1007
+
1008
+ def _regex_ruby(self, fp: Path, content: str, service_id: str) -> Dict:
1009
+ methods: List[Dict] = []
1010
+ pat = re.compile(r"^\s*def\s+(?:self\.)?(\w+)")
1011
+ for lnum, line in enumerate(content.splitlines(), 1):
1012
+ m = pat.match(line)
1013
+ if m:
1014
+ mname = m.group(1)
1015
+ lower_name = mname.lower()
1016
+ if lower_name.startswith("test_") or "mock" in lower_name:
1017
+ continue
1018
+ mid = self._make_method_id(service_id, fp, mname)
1019
+ methods.append({
1020
+ "id": mid, "name": mname, "full_name": mname,
1021
+ "class_name": None, "file_path": str(fp),
1022
+ "line_number": lnum, "line_end": lnum,
1023
+ "signature": mname, "docstring": None, "service_id": service_id,
1024
+ })
1025
+ return {"methods": methods, "calls": []}
1026
+
1027
+ def _regex_php(self, fp: Path, content: str, service_id: str) -> Dict:
1028
+ methods: List[Dict] = []
1029
+ pat = re.compile(r"^\s*(?:(?:public|private|protected|static|final)\s+)*function\s+(\w+)\s*\(")
1030
+ for lnum, line in enumerate(content.splitlines(), 1):
1031
+ m = pat.match(line)
1032
+ if m:
1033
+ mname = m.group(1)
1034
+ lower_name = mname.lower()
1035
+ if lower_name.startswith("test") or "mock" in lower_name:
1036
+ continue
1037
+ mid = self._make_method_id(service_id, fp, mname)
1038
+ methods.append({
1039
+ "id": mid, "name": mname, "full_name": mname,
1040
+ "class_name": None, "file_path": str(fp),
1041
+ "line_number": lnum, "line_end": lnum,
1042
+ "signature": mname, "docstring": None, "service_id": service_id,
1043
+ })
1044
+ return {"methods": methods, "calls": []}
1045
+
1046
+ # ------------------------------------------------------------------ #
1047
+ # Call graph resolution #
1048
+ # ------------------------------------------------------------------ #
1049
+
1050
+ def _build_call_graph(
1051
+ self, all_methods: Dict[str, Dict], all_calls: List[Dict]
1052
+ ) -> List[Tuple[str, str, Dict]]:
1053
+ """Match call names to method IDs → (caller, callee, meta) triples."""
1054
+ name_to_ids: Dict[str, Set[str]] = defaultdict(set)
1055
+ for mid, info in all_methods.items():
1056
+ name_to_ids[info["name"]].add(mid)
1057
+ if info.get("full_name") and info["full_name"] != info["name"]:
1058
+ name_to_ids[info["full_name"]].add(mid)
1059
+
1060
+ seen: Set[Tuple[str, str]] = set()
1061
+ result = []
1062
+ skip = {"if", "for", "while", "return", "try", "except", "catch", "with", "else", "elif"}
1063
+ for call in all_calls:
1064
+ caller_id = call["caller_id"]
1065
+ callee_name = call.get("callee_name", "")
1066
+ if callee_name in skip:
1067
+ continue
1068
+ for callee_id in name_to_ids.get(callee_name, set()):
1069
+ if caller_id == callee_id:
1070
+ continue
1071
+ key = (caller_id, callee_id)
1072
+ if key not in seen:
1073
+ seen.add(key)
1074
+ result.append(
1075
+ (caller_id, callee_id, {"line": call.get("line_number")})
1076
+ )
1077
+ return result