codegraph-gen 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_gen/__init__.py +0 -0
- codegraph_gen/__main__.py +311 -0
- codegraph_gen/ai.py +77 -0
- codegraph_gen/analyzer.py +100 -0
- codegraph_gen/builder.py +747 -0
- codegraph_gen/cluster.py +116 -0
- codegraph_gen/config.py +76 -0
- codegraph_gen/detect.py +59 -0
- codegraph_gen/engine.py +367 -0
- codegraph_gen/parser/__init__.py +27 -0
- codegraph_gen/parser/base.py +38 -0
- codegraph_gen/parser/cpp.py +349 -0
- codegraph_gen/parser/go.py +268 -0
- codegraph_gen/parser/javascript.py +370 -0
- codegraph_gen/parser/kotlin.py +387 -0
- codegraph_gen/parser/python.py +415 -0
- codegraph_gen/parser/rust.py +497 -0
- codegraph_gen/parser/swift.py +327 -0
- codegraph_gen/py.typed +0 -0
- codegraph_gen/renderer.py +498 -0
- codegraph_gen/writer.py +97 -0
- codegraph_gen-0.2.0.dist-info/METADATA +169 -0
- codegraph_gen-0.2.0.dist-info/RECORD +25 -0
- codegraph_gen-0.2.0.dist-info/WHEEL +4 -0
- codegraph_gen-0.2.0.dist-info/entry_points.txt +4 -0
codegraph_gen/builder.py
ADDED
|
@@ -0,0 +1,747 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from codegraph_gen.parser.base import ExtractionResult
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Common builtin/standard library functions for languages to avoid call graph pollution
|
|
9
|
+
BUILTIN_FUNCTIONS: dict[str, set[str]] = {
|
|
10
|
+
"python": {
|
|
11
|
+
"print",
|
|
12
|
+
"len",
|
|
13
|
+
"range",
|
|
14
|
+
"str",
|
|
15
|
+
"int",
|
|
16
|
+
"dict",
|
|
17
|
+
"list",
|
|
18
|
+
"set",
|
|
19
|
+
"tuple",
|
|
20
|
+
"open",
|
|
21
|
+
"sum",
|
|
22
|
+
"min",
|
|
23
|
+
"max",
|
|
24
|
+
"abs",
|
|
25
|
+
"enumerate",
|
|
26
|
+
"zip",
|
|
27
|
+
"any",
|
|
28
|
+
"all",
|
|
29
|
+
"map",
|
|
30
|
+
"filter",
|
|
31
|
+
"super",
|
|
32
|
+
"repr",
|
|
33
|
+
"type",
|
|
34
|
+
"isinstance",
|
|
35
|
+
"issubclass",
|
|
36
|
+
"dir",
|
|
37
|
+
"id",
|
|
38
|
+
"hash",
|
|
39
|
+
"input",
|
|
40
|
+
},
|
|
41
|
+
"go": {
|
|
42
|
+
"print",
|
|
43
|
+
"println",
|
|
44
|
+
"panic",
|
|
45
|
+
"recover",
|
|
46
|
+
"make",
|
|
47
|
+
"new",
|
|
48
|
+
"len",
|
|
49
|
+
"cap",
|
|
50
|
+
"append",
|
|
51
|
+
"copy",
|
|
52
|
+
"delete",
|
|
53
|
+
"complex",
|
|
54
|
+
"real",
|
|
55
|
+
"imag",
|
|
56
|
+
"close",
|
|
57
|
+
},
|
|
58
|
+
"javascript": {
|
|
59
|
+
"console",
|
|
60
|
+
"require",
|
|
61
|
+
"module",
|
|
62
|
+
"exports",
|
|
63
|
+
"process",
|
|
64
|
+
"window",
|
|
65
|
+
"document",
|
|
66
|
+
"eval",
|
|
67
|
+
"parseInt",
|
|
68
|
+
"parseFloat",
|
|
69
|
+
"isNaN",
|
|
70
|
+
"isFinite",
|
|
71
|
+
"decodeURI",
|
|
72
|
+
"encodeURI",
|
|
73
|
+
"Object",
|
|
74
|
+
"Array",
|
|
75
|
+
"String",
|
|
76
|
+
"Number",
|
|
77
|
+
"Boolean",
|
|
78
|
+
"Date",
|
|
79
|
+
"RegExp",
|
|
80
|
+
"Error",
|
|
81
|
+
"Map",
|
|
82
|
+
"Set",
|
|
83
|
+
"Promise",
|
|
84
|
+
"JSON",
|
|
85
|
+
"Math",
|
|
86
|
+
"setTimeout",
|
|
87
|
+
"clearTimeout",
|
|
88
|
+
"setInterval",
|
|
89
|
+
"clearInterval",
|
|
90
|
+
},
|
|
91
|
+
"typescript": {
|
|
92
|
+
"console",
|
|
93
|
+
"require",
|
|
94
|
+
"module",
|
|
95
|
+
"exports",
|
|
96
|
+
"process",
|
|
97
|
+
"window",
|
|
98
|
+
"document",
|
|
99
|
+
"eval",
|
|
100
|
+
"parseInt",
|
|
101
|
+
"parseFloat",
|
|
102
|
+
"isNaN",
|
|
103
|
+
"isFinite",
|
|
104
|
+
"decodeURI",
|
|
105
|
+
"encodeURI",
|
|
106
|
+
"Object",
|
|
107
|
+
"Array",
|
|
108
|
+
"String",
|
|
109
|
+
"Number",
|
|
110
|
+
"Boolean",
|
|
111
|
+
"Date",
|
|
112
|
+
"RegExp",
|
|
113
|
+
"Error",
|
|
114
|
+
"Map",
|
|
115
|
+
"Set",
|
|
116
|
+
"Promise",
|
|
117
|
+
"JSON",
|
|
118
|
+
"Math",
|
|
119
|
+
"setTimeout",
|
|
120
|
+
"clearTimeout",
|
|
121
|
+
"setInterval",
|
|
122
|
+
"clearInterval",
|
|
123
|
+
},
|
|
124
|
+
"rust": {
|
|
125
|
+
"println!",
|
|
126
|
+
"print!",
|
|
127
|
+
"format!",
|
|
128
|
+
"panic!",
|
|
129
|
+
"vec!",
|
|
130
|
+
"assert!",
|
|
131
|
+
"assert_eq!",
|
|
132
|
+
"Option",
|
|
133
|
+
"Result",
|
|
134
|
+
"Some",
|
|
135
|
+
"None",
|
|
136
|
+
"Ok",
|
|
137
|
+
"Err",
|
|
138
|
+
"Default",
|
|
139
|
+
},
|
|
140
|
+
"swift": {
|
|
141
|
+
"print",
|
|
142
|
+
"min",
|
|
143
|
+
"max",
|
|
144
|
+
"abs",
|
|
145
|
+
"count",
|
|
146
|
+
"fatalError",
|
|
147
|
+
"precondition",
|
|
148
|
+
"assert",
|
|
149
|
+
},
|
|
150
|
+
"kotlin": {
|
|
151
|
+
"print",
|
|
152
|
+
"println",
|
|
153
|
+
"listOf",
|
|
154
|
+
"mapOf",
|
|
155
|
+
"setOf",
|
|
156
|
+
"mutableListOf",
|
|
157
|
+
"mutableMapOf",
|
|
158
|
+
"mutableSetOf",
|
|
159
|
+
"arrayOf",
|
|
160
|
+
"emptyList",
|
|
161
|
+
"emptyMap",
|
|
162
|
+
"emptySet",
|
|
163
|
+
"run",
|
|
164
|
+
"let",
|
|
165
|
+
"also",
|
|
166
|
+
"apply",
|
|
167
|
+
"takeIf",
|
|
168
|
+
"takeUnless",
|
|
169
|
+
"repeat",
|
|
170
|
+
"require",
|
|
171
|
+
"check",
|
|
172
|
+
"error",
|
|
173
|
+
},
|
|
174
|
+
"c": {
|
|
175
|
+
"printf",
|
|
176
|
+
"scanf",
|
|
177
|
+
"malloc",
|
|
178
|
+
"free",
|
|
179
|
+
"calloc",
|
|
180
|
+
"realloc",
|
|
181
|
+
"memcpy",
|
|
182
|
+
"memset",
|
|
183
|
+
"strcpy",
|
|
184
|
+
"strlen",
|
|
185
|
+
"strcmp",
|
|
186
|
+
"strcat",
|
|
187
|
+
"exit",
|
|
188
|
+
"fopen",
|
|
189
|
+
"fclose",
|
|
190
|
+
"fprintf",
|
|
191
|
+
"sprintf",
|
|
192
|
+
"sizeof",
|
|
193
|
+
},
|
|
194
|
+
"cpp": {
|
|
195
|
+
"printf",
|
|
196
|
+
"scanf",
|
|
197
|
+
"malloc",
|
|
198
|
+
"free",
|
|
199
|
+
"calloc",
|
|
200
|
+
"realloc",
|
|
201
|
+
"memcpy",
|
|
202
|
+
"memset",
|
|
203
|
+
"strcpy",
|
|
204
|
+
"strlen",
|
|
205
|
+
"strcmp",
|
|
206
|
+
"strcat",
|
|
207
|
+
"exit",
|
|
208
|
+
"fopen",
|
|
209
|
+
"fclose",
|
|
210
|
+
"fprintf",
|
|
211
|
+
"sprintf",
|
|
212
|
+
"sizeof",
|
|
213
|
+
"std",
|
|
214
|
+
"cout",
|
|
215
|
+
"cin",
|
|
216
|
+
"endl",
|
|
217
|
+
"vector",
|
|
218
|
+
"string",
|
|
219
|
+
"map",
|
|
220
|
+
"set",
|
|
221
|
+
"list",
|
|
222
|
+
"shared_ptr",
|
|
223
|
+
"unique_ptr",
|
|
224
|
+
"make_shared",
|
|
225
|
+
"make_unique",
|
|
226
|
+
"move",
|
|
227
|
+
},
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# Common builtin/standard library method names to avoid incorrect resolution during global fallback
|
|
232
|
+
COMMON_BUILTIN_METHODS: set[str] = {
|
|
233
|
+
"append",
|
|
234
|
+
"decode",
|
|
235
|
+
"encode",
|
|
236
|
+
"insert",
|
|
237
|
+
"remove",
|
|
238
|
+
"contains",
|
|
239
|
+
"push",
|
|
240
|
+
"pop",
|
|
241
|
+
"split",
|
|
242
|
+
"join",
|
|
243
|
+
"map",
|
|
244
|
+
"filter",
|
|
245
|
+
"reduce",
|
|
246
|
+
"forEach",
|
|
247
|
+
"sorted",
|
|
248
|
+
"count",
|
|
249
|
+
"length",
|
|
250
|
+
"size",
|
|
251
|
+
"isEmpty",
|
|
252
|
+
"resume",
|
|
253
|
+
"cancel",
|
|
254
|
+
"suspend",
|
|
255
|
+
"start",
|
|
256
|
+
"stop",
|
|
257
|
+
"send",
|
|
258
|
+
"receive",
|
|
259
|
+
# Added common programming language method/constructor names
|
|
260
|
+
"len",
|
|
261
|
+
"new",
|
|
262
|
+
"is_empty",
|
|
263
|
+
"clone",
|
|
264
|
+
"default",
|
|
265
|
+
"parse",
|
|
266
|
+
"format",
|
|
267
|
+
"read",
|
|
268
|
+
"write",
|
|
269
|
+
"close",
|
|
270
|
+
"flush",
|
|
271
|
+
"to_string",
|
|
272
|
+
"to_str",
|
|
273
|
+
"as_str",
|
|
274
|
+
"as_ref",
|
|
275
|
+
"as_mut",
|
|
276
|
+
"unwrap",
|
|
277
|
+
"expect",
|
|
278
|
+
"iter",
|
|
279
|
+
"iter_mut",
|
|
280
|
+
"into_iter",
|
|
281
|
+
"next",
|
|
282
|
+
"into",
|
|
283
|
+
"from",
|
|
284
|
+
"ok",
|
|
285
|
+
"err",
|
|
286
|
+
"clear",
|
|
287
|
+
"get",
|
|
288
|
+
"set",
|
|
289
|
+
"add",
|
|
290
|
+
"keys",
|
|
291
|
+
"values",
|
|
292
|
+
"items",
|
|
293
|
+
"update",
|
|
294
|
+
"copy",
|
|
295
|
+
"find",
|
|
296
|
+
"index",
|
|
297
|
+
"last",
|
|
298
|
+
"first",
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class FileSymbolScope:
|
|
303
|
+
def __init__(self, file_path: str, language: str):
|
|
304
|
+
self.file_path = file_path
|
|
305
|
+
self.language = language
|
|
306
|
+
# Maps local symbol name -> fully qualified Node ID (e.g. {"MyClass": "foo.py::MyClass"})
|
|
307
|
+
self.declared_symbols: dict[str, str] = {}
|
|
308
|
+
# Maps import alias or local name -> (target_file_id, original_name)
|
|
309
|
+
self.imported_symbols: dict[str, tuple[str, str]] = {}
|
|
310
|
+
# List of target files that were wildcard imported (e.g. from X import *)
|
|
311
|
+
self.wildcard_imports: list[str] = []
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def build_graph(extractions: list[ExtractionResult], workspace_dir: Path) -> nx.DiGraph:
|
|
315
|
+
"""
|
|
316
|
+
Assembles a list of ExtractionResults into a single directed graph
|
|
317
|
+
and resolves call, inherit, and import edges using a two-pass scope resolver.
|
|
318
|
+
"""
|
|
319
|
+
G = nx.DiGraph()
|
|
320
|
+
|
|
321
|
+
# 1. Add all nodes to the graph
|
|
322
|
+
for ext in extractions:
|
|
323
|
+
for node in ext.nodes:
|
|
324
|
+
G.add_node(node.id, **node.model_dump())
|
|
325
|
+
|
|
326
|
+
node_ids = set(G.nodes)
|
|
327
|
+
|
|
328
|
+
# Helper: resolve local file path from Go/Python/C/C++ import targets
|
|
329
|
+
def resolve_import_to_file_node(source_file: str, target: str) -> str | None:
|
|
330
|
+
# Check if target is a direct relative/absolute file path
|
|
331
|
+
# (either starting with '.' or containing '/' or having a C/C++ file extension)
|
|
332
|
+
is_path_target = target.startswith(".") or "/" in target or "\\" in target
|
|
333
|
+
if not is_path_target and file_languages.get(source_file) in ("c", "cpp"):
|
|
334
|
+
is_path_target = any(
|
|
335
|
+
target.endswith(ext)
|
|
336
|
+
for ext in (".h", ".hpp", ".hxx", ".c", ".cpp", ".cc", ".cxx")
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if is_path_target:
|
|
340
|
+
source_dir = (Path(workspace_dir) / Path(source_file)).parent
|
|
341
|
+
try:
|
|
342
|
+
resolved_path = (source_dir / target).resolve()
|
|
343
|
+
rel_path = str(resolved_path.relative_to(workspace_dir))
|
|
344
|
+
if rel_path in node_ids:
|
|
345
|
+
return rel_path
|
|
346
|
+
# Try adding standard extensions
|
|
347
|
+
for suff in (".h", ".hpp", ".hxx", ".c", ".cpp", ".cc", ".cxx"):
|
|
348
|
+
check_path = rel_path + suff
|
|
349
|
+
if check_path in node_ids:
|
|
350
|
+
return check_path
|
|
351
|
+
except Exception:
|
|
352
|
+
pass
|
|
353
|
+
|
|
354
|
+
# Global fallback search for this filename in the workspace (for C/C++ includes)
|
|
355
|
+
target_name = Path(target).name
|
|
356
|
+
for nid in node_ids:
|
|
357
|
+
if G.nodes[nid]["type"] == "file":
|
|
358
|
+
if Path(nid).name == target_name:
|
|
359
|
+
return nid
|
|
360
|
+
return None
|
|
361
|
+
|
|
362
|
+
if target.startswith("."):
|
|
363
|
+
source_dir = Path(workspace_dir) / Path(source_file).parent
|
|
364
|
+
try:
|
|
365
|
+
resolved_path = (source_dir / target).resolve()
|
|
366
|
+
rel_path = str(resolved_path.relative_to(workspace_dir))
|
|
367
|
+
|
|
368
|
+
for suff in (".py", ".ts", ".js", ".go", ".rs", ".swift"):
|
|
369
|
+
check_path = rel_path + suff
|
|
370
|
+
if check_path in node_ids:
|
|
371
|
+
return check_path
|
|
372
|
+
check_init = str(Path(rel_path) / f"__init__{suff}")
|
|
373
|
+
if check_init in node_ids:
|
|
374
|
+
return check_init
|
|
375
|
+
if rel_path in node_ids:
|
|
376
|
+
return rel_path
|
|
377
|
+
except Exception:
|
|
378
|
+
pass
|
|
379
|
+
|
|
380
|
+
target_path_part = target.replace(".", "/")
|
|
381
|
+
for nid in node_ids:
|
|
382
|
+
if G.nodes[nid]["type"] == "file":
|
|
383
|
+
if (
|
|
384
|
+
nid.replace("\\", "/").endswith(target_path_part)
|
|
385
|
+
or nid.replace("\\", "/").endswith(target_path_part + ".py")
|
|
386
|
+
or nid.replace("\\", "/").endswith(
|
|
387
|
+
target_path_part + "/__init__.py"
|
|
388
|
+
)
|
|
389
|
+
or nid.replace("\\", "/").endswith(target_path_part + ".go")
|
|
390
|
+
or nid.replace("\\", "/").endswith(target_path_part + ".rs")
|
|
391
|
+
):
|
|
392
|
+
return nid
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
# Pass 1: Build Symbol Scopes
|
|
396
|
+
scopes: dict[str, FileSymbolScope] = {}
|
|
397
|
+
file_languages: dict[str, str] = {}
|
|
398
|
+
|
|
399
|
+
for nid, data in G.nodes(data=True):
|
|
400
|
+
if data.get("type") == "file":
|
|
401
|
+
suffix = Path(nid).suffix.lower()
|
|
402
|
+
lang = "python"
|
|
403
|
+
for lang_name, exts in {
|
|
404
|
+
"python": {".py"},
|
|
405
|
+
"javascript": {".js", ".mjs", ".cjs"},
|
|
406
|
+
"typescript": {".ts", ".tsx"},
|
|
407
|
+
"kotlin": {".kt", ".kts"},
|
|
408
|
+
"go": {".go"},
|
|
409
|
+
"rust": {".rs"},
|
|
410
|
+
"swift": {".swift"},
|
|
411
|
+
"c": {".c", ".h"},
|
|
412
|
+
"cpp": {".cpp", ".cc", ".cxx", ".hpp", ".hxx"},
|
|
413
|
+
}.items():
|
|
414
|
+
if suffix in exts:
|
|
415
|
+
lang = lang_name
|
|
416
|
+
break
|
|
417
|
+
file_languages[nid] = lang
|
|
418
|
+
scopes[nid] = FileSymbolScope(nid, lang)
|
|
419
|
+
|
|
420
|
+
# Populate declared symbols for each scope
|
|
421
|
+
for nid, data in G.nodes(data=True):
|
|
422
|
+
sf = data.get("source_file")
|
|
423
|
+
ntype = data.get("type")
|
|
424
|
+
label = data.get("label")
|
|
425
|
+
if sf and ntype != "file" and label and sf in scopes:
|
|
426
|
+
scopes[sf].declared_symbols[label] = nid
|
|
427
|
+
|
|
428
|
+
# Populate imported symbols for each scope
|
|
429
|
+
for ext in extractions:
|
|
430
|
+
# Find file node
|
|
431
|
+
file_node = next((n for n in ext.nodes if n.type == "file"), None)
|
|
432
|
+
if not file_node:
|
|
433
|
+
continue
|
|
434
|
+
file_id = file_node.id
|
|
435
|
+
if file_id not in scopes:
|
|
436
|
+
continue
|
|
437
|
+
|
|
438
|
+
for edge in ext.edges:
|
|
439
|
+
if edge.relation == "imports":
|
|
440
|
+
target_file_id = resolve_import_to_file_node(file_id, edge.target)
|
|
441
|
+
if target_file_id:
|
|
442
|
+
# In C/C++, importing/including a header imports all its symbols as wildcard imports
|
|
443
|
+
if scopes[file_id].language in ("c", "cpp"):
|
|
444
|
+
scopes[file_id].wildcard_imports.append(target_file_id)
|
|
445
|
+
|
|
446
|
+
# Parse import_map
|
|
447
|
+
if edge.import_map:
|
|
448
|
+
for local_name, original_name in edge.import_map.items():
|
|
449
|
+
if original_name == "*":
|
|
450
|
+
scopes[file_id].wildcard_imports.append(target_file_id)
|
|
451
|
+
else:
|
|
452
|
+
scopes[file_id].imported_symbols[local_name] = (
|
|
453
|
+
target_file_id,
|
|
454
|
+
original_name,
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
# Direct import of a module name (e.g. import module_b)
|
|
458
|
+
stem = Path(target_file_id).stem
|
|
459
|
+
scopes[file_id].imported_symbols[stem] = (target_file_id, stem)
|
|
460
|
+
|
|
461
|
+
# Resolve symbol helper using the scope chain
|
|
462
|
+
def resolve_symbol(caller_id: str, callee_name: str) -> str | None:
|
|
463
|
+
caller_data = G.nodes.get(caller_id)
|
|
464
|
+
if not caller_data:
|
|
465
|
+
return None
|
|
466
|
+
source_file = caller_data["source_file"]
|
|
467
|
+
|
|
468
|
+
lang = file_languages.get(source_file, "python")
|
|
469
|
+
callee_clean = callee_name.replace("::", ".")
|
|
470
|
+
parts = [p.strip() for p in callee_clean.split(".") if p.strip()]
|
|
471
|
+
if not parts:
|
|
472
|
+
return None
|
|
473
|
+
|
|
474
|
+
main_symbol = parts[0]
|
|
475
|
+
rest_of_callee = callee_clean.split(".", 1)[1] if len(parts) > 1 else ""
|
|
476
|
+
|
|
477
|
+
# 1. Builtins / Stdlib Check
|
|
478
|
+
if main_symbol in BUILTIN_FUNCTIONS.get(lang, set()):
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
scope = scopes.get(source_file)
|
|
482
|
+
if not scope:
|
|
483
|
+
return None
|
|
484
|
+
|
|
485
|
+
# Local Scope Type Binding resolution
|
|
486
|
+
local_bindings = caller_data.get("local_bindings", {})
|
|
487
|
+
if len(parts) > 1 and main_symbol in local_bindings:
|
|
488
|
+
receiver_type = local_bindings[main_symbol]
|
|
489
|
+
resolved_class_id = None
|
|
490
|
+
|
|
491
|
+
# Check if it's declared in the same file
|
|
492
|
+
file_cand = f"{source_file}::{receiver_type}"
|
|
493
|
+
if file_cand in node_ids:
|
|
494
|
+
resolved_class_id = file_cand
|
|
495
|
+
|
|
496
|
+
# Check explicit imports
|
|
497
|
+
elif receiver_type in scope.imported_symbols:
|
|
498
|
+
target_file_id, original_name = scope.imported_symbols[receiver_type]
|
|
499
|
+
resolved_class_id = f"{target_file_id}::{original_name}"
|
|
500
|
+
|
|
501
|
+
# Check package siblings (for Go/Swift)
|
|
502
|
+
elif lang in ("go", "swift"):
|
|
503
|
+
caller_dir = Path(source_file).parent
|
|
504
|
+
for nid in node_ids:
|
|
505
|
+
ndata = G.nodes[nid]
|
|
506
|
+
if (
|
|
507
|
+
ndata.get("type") in ("class", "struct", "interface", "enum")
|
|
508
|
+
and ndata.get("label") == receiver_type
|
|
509
|
+
):
|
|
510
|
+
node_file = ndata.get("source_file", "")
|
|
511
|
+
if node_file and Path(node_file).parent == caller_dir:
|
|
512
|
+
resolved_class_id = nid
|
|
513
|
+
break
|
|
514
|
+
|
|
515
|
+
# Global fallback for class/struct name if not found in current module/scope
|
|
516
|
+
if not resolved_class_id:
|
|
517
|
+
for nid in node_ids:
|
|
518
|
+
ndata = G.nodes[nid]
|
|
519
|
+
if (
|
|
520
|
+
ndata.get("type") in ("class", "struct", "interface", "enum")
|
|
521
|
+
and ndata.get("label") == receiver_type
|
|
522
|
+
):
|
|
523
|
+
resolved_class_id = nid
|
|
524
|
+
break
|
|
525
|
+
|
|
526
|
+
if resolved_class_id:
|
|
527
|
+
target_method_id = f"{resolved_class_id}.{rest_of_callee}"
|
|
528
|
+
if target_method_id in node_ids:
|
|
529
|
+
return target_method_id
|
|
530
|
+
target_method_id = f"{resolved_class_id}.{parts[-1]}"
|
|
531
|
+
if target_method_id in node_ids:
|
|
532
|
+
return target_method_id
|
|
533
|
+
|
|
534
|
+
# Cross-file / implementation-to-header fallback for C++ and Python binding boundaries
|
|
535
|
+
method_name = parts[-1]
|
|
536
|
+
for nid in node_ids:
|
|
537
|
+
ndata = G.nodes[nid]
|
|
538
|
+
if (
|
|
539
|
+
ndata.get("type") in ("method", "function")
|
|
540
|
+
and ndata.get("label") == method_name
|
|
541
|
+
):
|
|
542
|
+
parent_class_part = nid.rsplit(".", 1)[0] if "." in nid else ""
|
|
543
|
+
parent_class_name = (
|
|
544
|
+
parent_class_part.rsplit("::", 1)[-1]
|
|
545
|
+
if "::" in parent_class_part
|
|
546
|
+
else parent_class_part
|
|
547
|
+
)
|
|
548
|
+
if (
|
|
549
|
+
parent_class_name == receiver_type
|
|
550
|
+
or parent_class_name.endswith(f".{receiver_type}")
|
|
551
|
+
):
|
|
552
|
+
return nid
|
|
553
|
+
else:
|
|
554
|
+
# Known type but not defined in the workspace -> external/standard library type.
|
|
555
|
+
# Bypassing global fallback to prevent incorrect resolution of its methods.
|
|
556
|
+
return None
|
|
557
|
+
|
|
558
|
+
# 2. Local lexical scope check
|
|
559
|
+
# self / this / cls references
|
|
560
|
+
if main_symbol in ("self", "this", "cls"):
|
|
561
|
+
if "." in caller_id:
|
|
562
|
+
parent_class_id = caller_id.rsplit(".", 1)[0]
|
|
563
|
+
if rest_of_callee:
|
|
564
|
+
target_candidate = f"{parent_class_id}.{rest_of_callee}"
|
|
565
|
+
if target_candidate in node_ids:
|
|
566
|
+
return target_candidate
|
|
567
|
+
target_candidate = f"{parent_class_id}.{parts[-1]}"
|
|
568
|
+
if target_candidate in node_ids:
|
|
569
|
+
return target_candidate
|
|
570
|
+
|
|
571
|
+
# Inside current class context
|
|
572
|
+
if "." in caller_id:
|
|
573
|
+
parent_class_id = caller_id.rsplit(".", 1)[0]
|
|
574
|
+
target_candidate = f"{parent_class_id}.{main_symbol}"
|
|
575
|
+
if target_candidate in node_ids:
|
|
576
|
+
if rest_of_callee:
|
|
577
|
+
sub_target = f"{target_candidate}.{rest_of_callee}"
|
|
578
|
+
if sub_target in node_ids:
|
|
579
|
+
return sub_target
|
|
580
|
+
return target_candidate
|
|
581
|
+
|
|
582
|
+
# File-level scope check
|
|
583
|
+
file_candidate = f"{source_file}::{main_symbol}"
|
|
584
|
+
if file_candidate in node_ids:
|
|
585
|
+
if rest_of_callee:
|
|
586
|
+
sub_target = f"{file_candidate}.{rest_of_callee}"
|
|
587
|
+
if sub_target in node_ids:
|
|
588
|
+
return sub_target
|
|
589
|
+
return file_candidate
|
|
590
|
+
|
|
591
|
+
# 3. Package scope check (for Go, Swift sibling files)
|
|
592
|
+
if lang in ("go", "swift"):
|
|
593
|
+
caller_dir = Path(source_file).parent
|
|
594
|
+
for nid in node_ids:
|
|
595
|
+
ndata = G.nodes[nid]
|
|
596
|
+
if ndata.get("type") == "file":
|
|
597
|
+
continue
|
|
598
|
+
node_file = ndata.get("source_file", "")
|
|
599
|
+
if node_file and Path(node_file).parent == caller_dir:
|
|
600
|
+
if nid.endswith(f"::{main_symbol}"):
|
|
601
|
+
if rest_of_callee:
|
|
602
|
+
sub_target = f"{nid}.{rest_of_callee}"
|
|
603
|
+
if sub_target in node_ids:
|
|
604
|
+
return sub_target
|
|
605
|
+
return nid
|
|
606
|
+
|
|
607
|
+
# 4. Explicit imports and aliases check
|
|
608
|
+
if main_symbol in scope.imported_symbols:
|
|
609
|
+
target_file_id, original_name = scope.imported_symbols[main_symbol]
|
|
610
|
+
if original_name == "*" or original_name == Path(target_file_id).stem:
|
|
611
|
+
if rest_of_callee:
|
|
612
|
+
target_candidate = f"{target_file_id}::{rest_of_callee}"
|
|
613
|
+
if target_candidate in node_ids:
|
|
614
|
+
return target_candidate
|
|
615
|
+
for nid in node_ids:
|
|
616
|
+
if G.nodes[nid].get(
|
|
617
|
+
"source_file"
|
|
618
|
+
) == target_file_id and nid.endswith(f".{parts[-1]}"):
|
|
619
|
+
return nid
|
|
620
|
+
else:
|
|
621
|
+
target_candidate = f"{target_file_id}::{main_symbol}"
|
|
622
|
+
if target_candidate in node_ids:
|
|
623
|
+
return target_candidate
|
|
624
|
+
return target_file_id
|
|
625
|
+
else:
|
|
626
|
+
target_candidate = f"{target_file_id}::{original_name}"
|
|
627
|
+
if target_candidate in node_ids:
|
|
628
|
+
if rest_of_callee:
|
|
629
|
+
sub_target = f"{target_candidate}.{rest_of_callee}"
|
|
630
|
+
if sub_target in node_ids:
|
|
631
|
+
return sub_target
|
|
632
|
+
return target_candidate
|
|
633
|
+
return target_candidate
|
|
634
|
+
|
|
635
|
+
# 5. Wildcard imports check
|
|
636
|
+
for target_file_id in scope.wildcard_imports:
|
|
637
|
+
target_candidate = f"{target_file_id}::{main_symbol}"
|
|
638
|
+
if target_candidate in node_ids:
|
|
639
|
+
if rest_of_callee:
|
|
640
|
+
sub_target = f"{target_candidate}.{rest_of_callee}"
|
|
641
|
+
if sub_target in node_ids:
|
|
642
|
+
return sub_target
|
|
643
|
+
return target_candidate
|
|
644
|
+
|
|
645
|
+
# 6. Global fallback check
|
|
646
|
+
if main_symbol in {
|
|
647
|
+
"os",
|
|
648
|
+
"sys",
|
|
649
|
+
"json",
|
|
650
|
+
"time",
|
|
651
|
+
"math",
|
|
652
|
+
"re",
|
|
653
|
+
"pathlib",
|
|
654
|
+
"logging",
|
|
655
|
+
"subprocess",
|
|
656
|
+
"shutil",
|
|
657
|
+
"hashlib",
|
|
658
|
+
"urllib",
|
|
659
|
+
"socket",
|
|
660
|
+
"threading",
|
|
661
|
+
"multiprocessing",
|
|
662
|
+
"typing",
|
|
663
|
+
"collections",
|
|
664
|
+
"itertools",
|
|
665
|
+
"functools",
|
|
666
|
+
"logger",
|
|
667
|
+
"log",
|
|
668
|
+
"console",
|
|
669
|
+
"pytest",
|
|
670
|
+
"unittest",
|
|
671
|
+
"fmt",
|
|
672
|
+
"sync",
|
|
673
|
+
"context",
|
|
674
|
+
"strings",
|
|
675
|
+
"bytes",
|
|
676
|
+
"errors",
|
|
677
|
+
"net",
|
|
678
|
+
"http",
|
|
679
|
+
"process",
|
|
680
|
+
"document",
|
|
681
|
+
"window",
|
|
682
|
+
"global",
|
|
683
|
+
"fs",
|
|
684
|
+
"path",
|
|
685
|
+
"std",
|
|
686
|
+
"core",
|
|
687
|
+
"env",
|
|
688
|
+
"Logger",
|
|
689
|
+
} or any(p in {"logger", "log", "logging", "console"} for p in parts):
|
|
690
|
+
return None
|
|
691
|
+
|
|
692
|
+
search_label = parts[-1] if len(parts) > 1 else main_symbol
|
|
693
|
+
if len(parts) > 1 and search_label in COMMON_BUILTIN_METHODS:
|
|
694
|
+
return None
|
|
695
|
+
|
|
696
|
+
candidates = []
|
|
697
|
+
for nid, ndata in G.nodes(data=True):
|
|
698
|
+
if ndata.get("label") == search_label and ndata.get("type") != "file":
|
|
699
|
+
candidates.append(nid)
|
|
700
|
+
|
|
701
|
+
if len(candidates) == 1:
|
|
702
|
+
return candidates[0]
|
|
703
|
+
elif len(candidates) > 1:
|
|
704
|
+
caller_parent_dir = Path(source_file).parent
|
|
705
|
+
near_candidates = [
|
|
706
|
+
c
|
|
707
|
+
for c in candidates
|
|
708
|
+
if Path(G.nodes[c]["source_file"]).parent == caller_parent_dir
|
|
709
|
+
]
|
|
710
|
+
if len(near_candidates) == 1:
|
|
711
|
+
return near_candidates[0]
|
|
712
|
+
|
|
713
|
+
return None
|
|
714
|
+
|
|
715
|
+
# Pass 2: Process and resolve edges
|
|
716
|
+
for ext in extractions:
|
|
717
|
+
for edge in ext.edges:
|
|
718
|
+
src = edge.source
|
|
719
|
+
tgt = edge.target
|
|
720
|
+
rel = edge.relation
|
|
721
|
+
|
|
722
|
+
if src == tgt:
|
|
723
|
+
continue
|
|
724
|
+
if src not in node_ids:
|
|
725
|
+
continue
|
|
726
|
+
|
|
727
|
+
resolved_tgt = None
|
|
728
|
+
|
|
729
|
+
if rel == "contains":
|
|
730
|
+
if tgt in node_ids:
|
|
731
|
+
resolved_tgt = tgt
|
|
732
|
+
elif rel == "imports":
|
|
733
|
+
resolved_tgt = resolve_import_to_file_node(
|
|
734
|
+
G.nodes[src]["source_file"], tgt
|
|
735
|
+
)
|
|
736
|
+
elif rel in ("inherits", "implements"):
|
|
737
|
+
resolved_tgt = resolve_symbol(src, tgt)
|
|
738
|
+
elif rel == "calls":
|
|
739
|
+
resolved_tgt = resolve_symbol(src, tgt)
|
|
740
|
+
|
|
741
|
+
if resolved_tgt and resolved_tgt in node_ids:
|
|
742
|
+
if rel == "imports":
|
|
743
|
+
G.add_edge(src, resolved_tgt, relation=rel, raw_target=tgt)
|
|
744
|
+
else:
|
|
745
|
+
G.add_edge(src, resolved_tgt, relation=rel)
|
|
746
|
+
|
|
747
|
+
return G
|