graphlens-python 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphlens_python/__init__.py +5 -0
- graphlens_python/_adapter.py +291 -0
- graphlens_python/_deps.py +191 -0
- graphlens_python/_module_resolver.py +87 -0
- graphlens_python/_project_detector.py +140 -0
- graphlens_python/_visitor.py +734 -0
- graphlens_python-0.1.1.dist-info/METADATA +8 -0
- graphlens_python-0.1.1.dist-info/RECORD +10 -0
- graphlens_python-0.1.1.dist-info/WHEEL +4 -0
- graphlens_python-0.1.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
"""Python CST visitor using tree-sitter — builds graphlens nodes/relations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
import tree_sitter_python as tspython
|
|
10
|
+
from graphlens import (
|
|
11
|
+
GraphLens,
|
|
12
|
+
Node,
|
|
13
|
+
NodeKind,
|
|
14
|
+
Relation,
|
|
15
|
+
RelationKind,
|
|
16
|
+
)
|
|
17
|
+
from graphlens.utils import Span, make_node_id
|
|
18
|
+
from tree_sitter import Language, Parser
|
|
19
|
+
from tree_sitter import Node as TSNode
|
|
20
|
+
|
|
21
|
+
from graphlens_python._module_resolver import resolve_relative_import
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("graphlens_python")
|
|
27
|
+
|
|
28
|
+
_PY_LANGUAGE = Language(tspython.language())
|
|
29
|
+
_parser = Parser(_PY_LANGUAGE)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_python(source: bytes) -> object:
|
|
33
|
+
"""Parse Python source bytes and return a tree-sitter Tree."""
|
|
34
|
+
return _parser.parse(source)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Visitor context
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class ImportClassifier:
|
|
44
|
+
"""
|
|
45
|
+
Classifies an import's origin based on pre-computed name sets.
|
|
46
|
+
|
|
47
|
+
Origin values (stored in ``Node.metadata["origin"]``):
|
|
48
|
+
- ``"stdlib"`` — Python standard library
|
|
49
|
+
- ``"internal"`` — module declared within the same project
|
|
50
|
+
- ``"third_party"`` — package listed in the project's dependency files
|
|
51
|
+
- ``"unknown"`` — none of the above (may be a transitive dep or
|
|
52
|
+
missing)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
stdlib: frozenset[str] = field(default_factory=frozenset)
|
|
56
|
+
third_party: frozenset[str] = field(default_factory=frozenset)
|
|
57
|
+
internal: frozenset[str] = field(default_factory=frozenset)
|
|
58
|
+
|
|
59
|
+
def classify(self, top_level: str) -> str:
|
|
60
|
+
if top_level in self.stdlib:
|
|
61
|
+
return "stdlib"
|
|
62
|
+
if top_level in self.internal:
|
|
63
|
+
return "internal"
|
|
64
|
+
if top_level in self.third_party:
|
|
65
|
+
return "third_party"
|
|
66
|
+
return "unknown"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class VisitorContext:
|
|
71
|
+
"""Immutable context for one file's CST visit."""
|
|
72
|
+
|
|
73
|
+
project_name: str
|
|
74
|
+
file_path: Path
|
|
75
|
+
source_root: Path
|
|
76
|
+
module_qualified_name: str
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
# Main visitor
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PythonASTVisitor:
|
|
85
|
+
"""
|
|
86
|
+
Walks a tree-sitter Python CST and populates a GraphLens.
|
|
87
|
+
|
|
88
|
+
Node types handled:
|
|
89
|
+
module, decorated_definition, class_definition,
|
|
90
|
+
function_definition, import_statement, import_from_statement
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
ctx: VisitorContext,
|
|
96
|
+
graph: GraphLens,
|
|
97
|
+
file_node_id: str,
|
|
98
|
+
source: bytes,
|
|
99
|
+
classifier: ImportClassifier | None = None,
|
|
100
|
+
) -> None:
|
|
101
|
+
self._ctx = ctx
|
|
102
|
+
self._graph = graph
|
|
103
|
+
self._file_node_id = file_node_id
|
|
104
|
+
self._source = source
|
|
105
|
+
self._classifier = classifier or ImportClassifier()
|
|
106
|
+
# Stack of qualified name prefixes (current scope)
|
|
107
|
+
self._scope_stack: list[str] = [ctx.module_qualified_name]
|
|
108
|
+
# Stack of node IDs for emitting CONTAINS/DECLARES relations
|
|
109
|
+
self._container_stack: list[str] = [file_node_id]
|
|
110
|
+
# Stack of NodeKind to know if we're inside a class
|
|
111
|
+
self._kind_stack: list[NodeKind] = [NodeKind.FILE]
|
|
112
|
+
|
|
113
|
+
# -------------------------------------------------------------------------
|
|
114
|
+
# Dispatch
|
|
115
|
+
# -------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
def visit(self, node: TSNode) -> None:
|
|
118
|
+
handler = getattr(self, f"_visit_{node.type}", None)
|
|
119
|
+
if handler:
|
|
120
|
+
handler(node)
|
|
121
|
+
else:
|
|
122
|
+
self._visit_children(node)
|
|
123
|
+
|
|
124
|
+
def _visit_children(self, node: TSNode) -> None:
|
|
125
|
+
for child in node.children:
|
|
126
|
+
self.visit(child)
|
|
127
|
+
|
|
128
|
+
# -------------------------------------------------------------------------
|
|
129
|
+
# Top-level visitors
|
|
130
|
+
# -------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
def _visit_module(self, node: TSNode) -> None:
|
|
133
|
+
self._visit_children(node)
|
|
134
|
+
|
|
135
|
+
def _visit_decorated_definition(self, node: TSNode) -> None:
|
|
136
|
+
decorators = [
|
|
137
|
+
_decorator_name(c) for c in node.children if c.type == "decorator"
|
|
138
|
+
]
|
|
139
|
+
inner = next(
|
|
140
|
+
(
|
|
141
|
+
c
|
|
142
|
+
for c in node.children
|
|
143
|
+
if c.type in ("class_definition", "function_definition")
|
|
144
|
+
),
|
|
145
|
+
None,
|
|
146
|
+
)
|
|
147
|
+
if inner is None:
|
|
148
|
+
return
|
|
149
|
+
if inner.type == "class_definition":
|
|
150
|
+
self._handle_class(inner, decorators)
|
|
151
|
+
else:
|
|
152
|
+
self._handle_function(inner, decorators)
|
|
153
|
+
|
|
154
|
+
def _visit_class_definition(self, node: TSNode) -> None:
|
|
155
|
+
self._handle_class(node, decorators=[])
|
|
156
|
+
|
|
157
|
+
def _visit_function_definition(self, node: TSNode) -> None:
|
|
158
|
+
self._handle_function(node, decorators=[])
|
|
159
|
+
|
|
160
|
+
def _visit_import_statement(self, node: TSNode) -> None:
|
|
161
|
+
# import X / import X.Y / import X as Y
|
|
162
|
+
for child in node.children:
|
|
163
|
+
if child.type == "dotted_name":
|
|
164
|
+
name = _dotted_name(child)
|
|
165
|
+
self._emit_import(
|
|
166
|
+
local_name=name,
|
|
167
|
+
ext_qname=name,
|
|
168
|
+
is_relative=False,
|
|
169
|
+
)
|
|
170
|
+
elif child.type == "aliased_import":
|
|
171
|
+
name_node = next(
|
|
172
|
+
c for c in child.children if c.type == "dotted_name"
|
|
173
|
+
)
|
|
174
|
+
alias_node = next(
|
|
175
|
+
(c for c in child.children if c.type == "identifier"), None
|
|
176
|
+
)
|
|
177
|
+
name = _dotted_name(name_node)
|
|
178
|
+
local = _node_text(alias_node) if alias_node else name
|
|
179
|
+
self._emit_import(
|
|
180
|
+
local_name=local,
|
|
181
|
+
ext_qname=name,
|
|
182
|
+
is_relative=False,
|
|
183
|
+
alias=local if alias_node else None,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def _visit_import_from_statement(self, node: TSNode) -> None:
|
|
187
|
+
children = node.children
|
|
188
|
+
|
|
189
|
+
# Determine source module and relative level
|
|
190
|
+
# (only look before `import` keyword)
|
|
191
|
+
level = 0
|
|
192
|
+
source_module = ""
|
|
193
|
+
for child in children:
|
|
194
|
+
if child.type == "import":
|
|
195
|
+
# everything after this is what's being imported
|
|
196
|
+
break
|
|
197
|
+
if child.type == "relative_import":
|
|
198
|
+
prefix = next(
|
|
199
|
+
(
|
|
200
|
+
c for c in child.children
|
|
201
|
+
if c.type == "import_prefix"
|
|
202
|
+
),
|
|
203
|
+
None,
|
|
204
|
+
)
|
|
205
|
+
if prefix:
|
|
206
|
+
level = _node_text(prefix).count(".")
|
|
207
|
+
mod_node = next(
|
|
208
|
+
(
|
|
209
|
+
c for c in child.children
|
|
210
|
+
if c.type == "dotted_name"
|
|
211
|
+
),
|
|
212
|
+
None,
|
|
213
|
+
)
|
|
214
|
+
mod_name = _dotted_name(mod_node) if mod_node else None
|
|
215
|
+
source_module = resolve_relative_import(
|
|
216
|
+
self._ctx.module_qualified_name, level, mod_name
|
|
217
|
+
)
|
|
218
|
+
elif child.type == "dotted_name":
|
|
219
|
+
source_module = _dotted_name(child)
|
|
220
|
+
|
|
221
|
+
is_relative = level > 0
|
|
222
|
+
|
|
223
|
+
# Collect imported names (after `import` keyword)
|
|
224
|
+
past_import_kw = False
|
|
225
|
+
for child in children:
|
|
226
|
+
if child.type == "import":
|
|
227
|
+
past_import_kw = True
|
|
228
|
+
continue
|
|
229
|
+
if not past_import_kw:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
if child.type == "dotted_name":
|
|
233
|
+
imported = _dotted_name(child)
|
|
234
|
+
ext_qname = (
|
|
235
|
+
f"{source_module}.{imported}"
|
|
236
|
+
if source_module else imported
|
|
237
|
+
)
|
|
238
|
+
self._emit_import(
|
|
239
|
+
local_name=imported,
|
|
240
|
+
ext_qname=ext_qname,
|
|
241
|
+
is_relative=is_relative,
|
|
242
|
+
level=level,
|
|
243
|
+
)
|
|
244
|
+
elif child.type == "aliased_import":
|
|
245
|
+
name_node = next(
|
|
246
|
+
c for c in child.children if c.type == "dotted_name"
|
|
247
|
+
)
|
|
248
|
+
alias_node = next(
|
|
249
|
+
(c for c in child.children if c.type == "identifier"), None
|
|
250
|
+
)
|
|
251
|
+
imported = _dotted_name(name_node)
|
|
252
|
+
local = _node_text(alias_node) if alias_node else imported
|
|
253
|
+
ext_qname = (
|
|
254
|
+
f"{source_module}.{imported}"
|
|
255
|
+
if source_module else imported
|
|
256
|
+
)
|
|
257
|
+
self._emit_import(
|
|
258
|
+
local_name=local,
|
|
259
|
+
ext_qname=ext_qname,
|
|
260
|
+
is_relative=is_relative,
|
|
261
|
+
level=level,
|
|
262
|
+
alias=local if alias_node else None,
|
|
263
|
+
)
|
|
264
|
+
elif child.type == "wildcard_import":
|
|
265
|
+
ext_qname = f"{source_module}.*" if source_module else "*"
|
|
266
|
+
self._emit_import(
|
|
267
|
+
local_name="*",
|
|
268
|
+
ext_qname=ext_qname,
|
|
269
|
+
is_relative=is_relative,
|
|
270
|
+
level=level,
|
|
271
|
+
is_star=True,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# -------------------------------------------------------------------------
|
|
275
|
+
# Class and function handlers
|
|
276
|
+
# -------------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
def _handle_class(self, node: TSNode, decorators: list[str]) -> None:
|
|
279
|
+
name_node = next(
|
|
280
|
+
(c for c in node.children if c.type == "identifier"), None
|
|
281
|
+
)
|
|
282
|
+
if name_node is None:
|
|
283
|
+
return
|
|
284
|
+
name = _node_text(name_node)
|
|
285
|
+
qname = f"{self._scope_stack[-1]}.{name}"
|
|
286
|
+
|
|
287
|
+
# Extract base classes from argument_list
|
|
288
|
+
bases: list[str] = []
|
|
289
|
+
arg_list = next(
|
|
290
|
+
(c for c in node.children if c.type == "argument_list"), None
|
|
291
|
+
)
|
|
292
|
+
if arg_list:
|
|
293
|
+
for c in arg_list.children:
|
|
294
|
+
base_name = _name_from_node(c)
|
|
295
|
+
if base_name:
|
|
296
|
+
bases.append(base_name)
|
|
297
|
+
|
|
298
|
+
is_abstract = "ABC" in bases or "ABCMeta" in bases
|
|
299
|
+
class_node = self._make_node(
|
|
300
|
+
NodeKind.CLASS,
|
|
301
|
+
qname,
|
|
302
|
+
name,
|
|
303
|
+
node,
|
|
304
|
+
metadata={
|
|
305
|
+
"decorators": decorators,
|
|
306
|
+
"bases": bases,
|
|
307
|
+
"is_abstract": is_abstract,
|
|
308
|
+
},
|
|
309
|
+
)
|
|
310
|
+
self._add_node_with_relation(class_node, RelationKind.DECLARES)
|
|
311
|
+
|
|
312
|
+
# INHERITS_FROM
|
|
313
|
+
for base_name in bases:
|
|
314
|
+
sym = self._get_or_create_external_symbol(base_name)
|
|
315
|
+
self._graph.add_relation(
|
|
316
|
+
Relation(
|
|
317
|
+
source_id=class_node.id,
|
|
318
|
+
target_id=sym.id,
|
|
319
|
+
kind=RelationKind.INHERITS_FROM,
|
|
320
|
+
)
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
self._push(qname, class_node.id, NodeKind.CLASS)
|
|
324
|
+
body = next((c for c in node.children if c.type == "block"), None)
|
|
325
|
+
if body:
|
|
326
|
+
self._visit_children(body)
|
|
327
|
+
self._pop()
|
|
328
|
+
|
|
329
|
+
def _handle_function(self, node: TSNode, decorators: list[str]) -> None:
|
|
330
|
+
is_async = any(c.type == "async" for c in node.children)
|
|
331
|
+
parent_kind = self._kind_stack[-1]
|
|
332
|
+
kind = (
|
|
333
|
+
NodeKind.METHOD if parent_kind == NodeKind.CLASS
|
|
334
|
+
else NodeKind.FUNCTION
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
name_node = next(
|
|
338
|
+
(c for c in node.children if c.type == "identifier"), None
|
|
339
|
+
)
|
|
340
|
+
if name_node is None:
|
|
341
|
+
return
|
|
342
|
+
name = _node_text(name_node)
|
|
343
|
+
qname = f"{self._scope_stack[-1]}.{name}"
|
|
344
|
+
|
|
345
|
+
# Return type annotation
|
|
346
|
+
return_annotation: str | None = None
|
|
347
|
+
type_node = next(
|
|
348
|
+
(
|
|
349
|
+
c
|
|
350
|
+
for c in node.children
|
|
351
|
+
if c.type == "type" and c != node.children[0]
|
|
352
|
+
),
|
|
353
|
+
None,
|
|
354
|
+
)
|
|
355
|
+
if type_node:
|
|
356
|
+
return_annotation = _node_text(type_node)
|
|
357
|
+
|
|
358
|
+
func_node = self._make_node(
|
|
359
|
+
kind,
|
|
360
|
+
qname,
|
|
361
|
+
name,
|
|
362
|
+
node,
|
|
363
|
+
metadata={
|
|
364
|
+
"decorators": decorators,
|
|
365
|
+
"is_async": is_async,
|
|
366
|
+
"is_classmethod": "classmethod" in decorators,
|
|
367
|
+
"is_staticmethod": "staticmethod" in decorators,
|
|
368
|
+
"is_property": "property" in decorators,
|
|
369
|
+
"return_annotation": return_annotation,
|
|
370
|
+
},
|
|
371
|
+
)
|
|
372
|
+
self._add_node_with_relation(func_node, RelationKind.DECLARES)
|
|
373
|
+
|
|
374
|
+
self._push(qname, func_node.id, kind)
|
|
375
|
+
|
|
376
|
+
# Parameters
|
|
377
|
+
params_node = next(
|
|
378
|
+
(c for c in node.children if c.type == "parameters"), None
|
|
379
|
+
)
|
|
380
|
+
if params_node:
|
|
381
|
+
self._extract_parameters(params_node, func_node.id, qname)
|
|
382
|
+
|
|
383
|
+
# Body: extract calls + visit nested defs
|
|
384
|
+
body = next((c for c in node.children if c.type == "block"), None)
|
|
385
|
+
if body:
|
|
386
|
+
self._extract_calls(body, func_node.id)
|
|
387
|
+
# Visit nested class/function definitions
|
|
388
|
+
for child in body.children:
|
|
389
|
+
if child.type in (
|
|
390
|
+
"function_definition",
|
|
391
|
+
"class_definition",
|
|
392
|
+
"decorated_definition",
|
|
393
|
+
):
|
|
394
|
+
self.visit(child)
|
|
395
|
+
|
|
396
|
+
self._pop()
|
|
397
|
+
|
|
398
|
+
# -------------------------------------------------------------------------
|
|
399
|
+
# Parameter extraction
|
|
400
|
+
# -------------------------------------------------------------------------
|
|
401
|
+
|
|
402
|
+
def _extract_parameters(
|
|
403
|
+
self, params_node: TSNode, function_id: str, function_qname: str
|
|
404
|
+
) -> None:
|
|
405
|
+
for child in params_node.children:
|
|
406
|
+
param_name: str | None = None
|
|
407
|
+
annotation: str | None = None
|
|
408
|
+
has_default = False
|
|
409
|
+
is_variadic = False
|
|
410
|
+
|
|
411
|
+
if child.type == "identifier":
|
|
412
|
+
param_name = _node_text(child)
|
|
413
|
+
|
|
414
|
+
elif child.type == "default_parameter":
|
|
415
|
+
id_node = next(
|
|
416
|
+
(c for c in child.children if c.type == "identifier"), None
|
|
417
|
+
)
|
|
418
|
+
param_name = _node_text(id_node) if id_node else None
|
|
419
|
+
has_default = True
|
|
420
|
+
|
|
421
|
+
elif child.type == "typed_parameter":
|
|
422
|
+
id_node = next(
|
|
423
|
+
(c for c in child.children if c.type == "identifier"), None
|
|
424
|
+
)
|
|
425
|
+
param_name = _node_text(id_node) if id_node else None
|
|
426
|
+
type_node = next(
|
|
427
|
+
(c for c in child.children if c.type == "type"), None
|
|
428
|
+
)
|
|
429
|
+
annotation = _node_text(type_node) if type_node else None
|
|
430
|
+
|
|
431
|
+
elif child.type == "typed_default_parameter":
|
|
432
|
+
id_node = next(
|
|
433
|
+
(c for c in child.children if c.type == "identifier"), None
|
|
434
|
+
)
|
|
435
|
+
param_name = _node_text(id_node) if id_node else None
|
|
436
|
+
type_node = next(
|
|
437
|
+
(c for c in child.children if c.type == "type"), None
|
|
438
|
+
)
|
|
439
|
+
annotation = _node_text(type_node) if type_node else None
|
|
440
|
+
has_default = True
|
|
441
|
+
|
|
442
|
+
elif child.type in {
|
|
443
|
+
"list_splat_pattern", "dictionary_splat_pattern"
|
|
444
|
+
}:
|
|
445
|
+
id_node = next(
|
|
446
|
+
(c for c in child.children if c.type == "identifier"), None
|
|
447
|
+
)
|
|
448
|
+
param_name = _node_text(id_node) if id_node else None
|
|
449
|
+
is_variadic = True
|
|
450
|
+
|
|
451
|
+
if not param_name:
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
param_qname = f"{function_qname}.{param_name}"
|
|
455
|
+
param_node = self._make_node(
|
|
456
|
+
NodeKind.PARAMETER,
|
|
457
|
+
param_qname,
|
|
458
|
+
param_name,
|
|
459
|
+
child,
|
|
460
|
+
metadata={
|
|
461
|
+
"is_self": param_name == "self",
|
|
462
|
+
"is_cls": param_name == "cls",
|
|
463
|
+
"annotation": annotation,
|
|
464
|
+
"has_default": has_default,
|
|
465
|
+
"is_variadic": is_variadic,
|
|
466
|
+
},
|
|
467
|
+
)
|
|
468
|
+
self._safe_add_node(param_node)
|
|
469
|
+
self._graph.add_relation(
|
|
470
|
+
Relation(
|
|
471
|
+
source_id=function_id,
|
|
472
|
+
target_id=param_node.id,
|
|
473
|
+
kind=RelationKind.DECLARES,
|
|
474
|
+
)
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# -------------------------------------------------------------------------
|
|
478
|
+
# Call extraction
|
|
479
|
+
# -------------------------------------------------------------------------
|
|
480
|
+
|
|
481
|
+
def _extract_calls(self, body: TSNode, caller_id: str) -> None:
|
|
482
|
+
"""Find all call nodes in body and emit CALLS relations."""
|
|
483
|
+
for child in body.children:
|
|
484
|
+
self._find_calls_in_node(child, caller_id)
|
|
485
|
+
|
|
486
|
+
def _find_calls_in_node(self, node: TSNode, caller_id: str) -> None:
|
|
487
|
+
if node.type == "call":
|
|
488
|
+
func_node = next(
|
|
489
|
+
(
|
|
490
|
+
c
|
|
491
|
+
for c in node.children
|
|
492
|
+
if c.type in ("identifier", "attribute")
|
|
493
|
+
),
|
|
494
|
+
None,
|
|
495
|
+
)
|
|
496
|
+
if func_node:
|
|
497
|
+
callee_name = _name_from_node(func_node)
|
|
498
|
+
if callee_name:
|
|
499
|
+
sym_id = make_node_id(
|
|
500
|
+
self._ctx.project_name,
|
|
501
|
+
callee_name,
|
|
502
|
+
NodeKind.SYMBOL.value,
|
|
503
|
+
)
|
|
504
|
+
if sym_id not in self._graph.nodes:
|
|
505
|
+
self._graph.add_node(
|
|
506
|
+
Node(
|
|
507
|
+
id=sym_id,
|
|
508
|
+
kind=NodeKind.SYMBOL,
|
|
509
|
+
qualified_name=callee_name,
|
|
510
|
+
name=callee_name.split(".")[-1],
|
|
511
|
+
span=_make_span(node),
|
|
512
|
+
)
|
|
513
|
+
)
|
|
514
|
+
self._graph.add_relation(
|
|
515
|
+
Relation(
|
|
516
|
+
source_id=caller_id,
|
|
517
|
+
target_id=sym_id,
|
|
518
|
+
kind=RelationKind.CALLS,
|
|
519
|
+
)
|
|
520
|
+
)
|
|
521
|
+
# Don't recurse into nested function/class definitions
|
|
522
|
+
if node.type not in (
|
|
523
|
+
"function_definition",
|
|
524
|
+
"class_definition",
|
|
525
|
+
"decorated_definition",
|
|
526
|
+
):
|
|
527
|
+
for child in node.children:
|
|
528
|
+
self._find_calls_in_node(child, caller_id)
|
|
529
|
+
|
|
530
|
+
# -------------------------------------------------------------------------
|
|
531
|
+
# Import helper
|
|
532
|
+
# -------------------------------------------------------------------------
|
|
533
|
+
|
|
534
|
+
def _emit_import( # noqa: PLR0913
|
|
535
|
+
self,
|
|
536
|
+
*,
|
|
537
|
+
local_name: str,
|
|
538
|
+
ext_qname: str,
|
|
539
|
+
is_relative: bool,
|
|
540
|
+
level: int = 0,
|
|
541
|
+
alias: str | None = None,
|
|
542
|
+
is_star: bool = False,
|
|
543
|
+
) -> None:
|
|
544
|
+
top_level = ext_qname.split(".", maxsplit=1)[0]
|
|
545
|
+
origin = (
|
|
546
|
+
"internal" if is_relative
|
|
547
|
+
else self._classifier.classify(top_level)
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
import_qname = f"{self._scope_stack[-1]}.{local_name}"
|
|
551
|
+
import_node = self._make_node(
|
|
552
|
+
NodeKind.IMPORT,
|
|
553
|
+
import_qname,
|
|
554
|
+
local_name,
|
|
555
|
+
metadata={
|
|
556
|
+
"alias": alias,
|
|
557
|
+
"is_relative": is_relative,
|
|
558
|
+
"level": level,
|
|
559
|
+
"original_name": ext_qname,
|
|
560
|
+
"is_star": is_star,
|
|
561
|
+
"origin": origin,
|
|
562
|
+
},
|
|
563
|
+
)
|
|
564
|
+
self._add_node_with_relation(import_node, RelationKind.DECLARES)
|
|
565
|
+
|
|
566
|
+
# For internal imports: resolve to the MODULE node if it already
|
|
567
|
+
# exists in the graph (it will if the module was processed before
|
|
568
|
+
# this file). Otherwise fall back to EXTERNAL_SYMBOL so the edge
|
|
569
|
+
# is never missing.
|
|
570
|
+
resolve_target_id: str | None = None
|
|
571
|
+
if origin == "internal":
|
|
572
|
+
resolve_target_id = _find_module_node_id(self._graph, ext_qname)
|
|
573
|
+
|
|
574
|
+
if resolve_target_id is None:
|
|
575
|
+
ext_sym = self._get_or_create_external_symbol(
|
|
576
|
+
ext_qname, origin=origin
|
|
577
|
+
)
|
|
578
|
+
resolve_target_id = ext_sym.id
|
|
579
|
+
|
|
580
|
+
self._graph.add_relation(
|
|
581
|
+
Relation(
|
|
582
|
+
source_id=self._file_node_id,
|
|
583
|
+
target_id=resolve_target_id,
|
|
584
|
+
kind=RelationKind.IMPORTS,
|
|
585
|
+
)
|
|
586
|
+
)
|
|
587
|
+
self._graph.add_relation(
|
|
588
|
+
Relation(
|
|
589
|
+
source_id=import_node.id,
|
|
590
|
+
target_id=resolve_target_id,
|
|
591
|
+
kind=RelationKind.RESOLVES_TO,
|
|
592
|
+
)
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
# -------------------------------------------------------------------------
|
|
596
|
+
# Node helpers
|
|
597
|
+
# -------------------------------------------------------------------------
|
|
598
|
+
|
|
599
|
+
def _get_or_create_external_symbol(
|
|
600
|
+
self, qname: str, origin: str = "unknown"
|
|
601
|
+
) -> Node:
|
|
602
|
+
sym_id = make_node_id(
|
|
603
|
+
self._ctx.project_name, qname, NodeKind.EXTERNAL_SYMBOL.value
|
|
604
|
+
)
|
|
605
|
+
if sym_id not in self._graph.nodes:
|
|
606
|
+
self._graph.add_node(
|
|
607
|
+
Node(
|
|
608
|
+
id=sym_id,
|
|
609
|
+
kind=NodeKind.EXTERNAL_SYMBOL,
|
|
610
|
+
qualified_name=qname,
|
|
611
|
+
name=qname.rsplit(".", maxsplit=1)[-1],
|
|
612
|
+
metadata={"origin": origin},
|
|
613
|
+
)
|
|
614
|
+
)
|
|
615
|
+
return self._graph.nodes[sym_id]
|
|
616
|
+
|
|
617
|
+
def _add_node_with_relation(
|
|
618
|
+
self, node: Node, rel_kind: RelationKind
|
|
619
|
+
) -> None:
|
|
620
|
+
self._safe_add_node(node)
|
|
621
|
+
self._graph.add_relation(
|
|
622
|
+
Relation(
|
|
623
|
+
source_id=self._container_stack[-1],
|
|
624
|
+
target_id=node.id,
|
|
625
|
+
kind=rel_kind,
|
|
626
|
+
)
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
def _safe_add_node(self, node: Node) -> None:
|
|
630
|
+
if node.id not in self._graph.nodes:
|
|
631
|
+
self._graph.add_node(node)
|
|
632
|
+
|
|
633
|
+
def _make_node(
|
|
634
|
+
self,
|
|
635
|
+
kind: NodeKind,
|
|
636
|
+
qualified_name: str,
|
|
637
|
+
name: str,
|
|
638
|
+
ts_node: TSNode | None = None,
|
|
639
|
+
metadata: dict[str, object] | None = None,
|
|
640
|
+
) -> Node:
|
|
641
|
+
return Node(
|
|
642
|
+
id=make_node_id(
|
|
643
|
+
self._ctx.project_name, qualified_name, kind.value
|
|
644
|
+
),
|
|
645
|
+
kind=kind,
|
|
646
|
+
qualified_name=qualified_name,
|
|
647
|
+
name=name,
|
|
648
|
+
file_path=str(self._ctx.file_path),
|
|
649
|
+
span=_make_span(ts_node) if ts_node else None,
|
|
650
|
+
metadata=metadata or {},
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
def _push(self, qname: str, node_id: str, kind: NodeKind) -> None:
|
|
654
|
+
self._scope_stack.append(qname)
|
|
655
|
+
self._container_stack.append(node_id)
|
|
656
|
+
self._kind_stack.append(kind)
|
|
657
|
+
|
|
658
|
+
def _pop(self) -> None:
|
|
659
|
+
self._scope_stack.pop()
|
|
660
|
+
self._container_stack.pop()
|
|
661
|
+
self._kind_stack.pop()
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
# ---------------------------------------------------------------------------
|
|
665
|
+
# Module-level helpers
|
|
666
|
+
# ---------------------------------------------------------------------------
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _node_text(node: TSNode) -> str:
|
|
670
|
+
return node.text.decode("utf-8")
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def _dotted_name(node: TSNode) -> str:
|
|
674
|
+
"""Extract a dotted name string from a dotted_name node."""
|
|
675
|
+
return "".join(_node_text(c) for c in node.children if c.type != ",")
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def _name_from_node(node: TSNode) -> str:
|
|
679
|
+
"""Extract a dotted name string from identifier or attribute nodes."""
|
|
680
|
+
if node.type == "identifier":
|
|
681
|
+
return _node_text(node)
|
|
682
|
+
if node.type == "attribute":
|
|
683
|
+
parent = _name_from_node(node.children[0])
|
|
684
|
+
attr = _node_text(node.children[-1])
|
|
685
|
+
return f"{parent}.{attr}" if parent else attr
|
|
686
|
+
return ""
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _decorator_name(decorator_node: TSNode) -> str:
|
|
690
|
+
"""Extract decorator name from a decorator node."""
|
|
691
|
+
for child in decorator_node.children:
|
|
692
|
+
if child.type in ("identifier", "attribute", "call"):
|
|
693
|
+
name = _name_from_node(child)
|
|
694
|
+
if name:
|
|
695
|
+
return name
|
|
696
|
+
return ""
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
def _find_module_node_id(graph: GraphLens, qname: str) -> str | None:
|
|
700
|
+
"""
|
|
701
|
+
Return the ID of a MODULE node matching qname or its longest prefix.
|
|
702
|
+
|
|
703
|
+
Tries exact match first (``mypackage.utils``), then walks up the
|
|
704
|
+
hierarchy (``mypackage``) so that ``from mypackage.utils import Foo``
|
|
705
|
+
resolves to the ``mypackage.utils`` MODULE even when Foo is not its
|
|
706
|
+
own node yet.
|
|
707
|
+
"""
|
|
708
|
+
parts = qname.split(".")
|
|
709
|
+
for length in range(len(parts), 0, -1):
|
|
710
|
+
candidate = ".".join(parts[:length])
|
|
711
|
+
for node in graph.nodes.values():
|
|
712
|
+
if (
|
|
713
|
+
node.kind == NodeKind.MODULE
|
|
714
|
+
and node.qualified_name == candidate
|
|
715
|
+
):
|
|
716
|
+
return node.id
|
|
717
|
+
return None
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _make_span(node: TSNode | None) -> Span | None:
|
|
721
|
+
"""Convert tree-sitter node positions to a Span (1-based)."""
|
|
722
|
+
if node is None:
|
|
723
|
+
return None
|
|
724
|
+
try:
|
|
725
|
+
sr, sc = node.start_point
|
|
726
|
+
er, ec = node.end_point
|
|
727
|
+
return Span(
|
|
728
|
+
start_line=sr + 1,
|
|
729
|
+
start_col=sc + 1,
|
|
730
|
+
end_line=er + 1,
|
|
731
|
+
end_col=ec + 1,
|
|
732
|
+
)
|
|
733
|
+
except Exception:
|
|
734
|
+
return None
|