graphlens-php 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphlens_php/__init__.py +9 -0
- graphlens_php/_adapter.py +427 -0
- graphlens_php/_deps.py +164 -0
- graphlens_php/_module_resolver.py +109 -0
- graphlens_php/_project_detector.py +76 -0
- graphlens_php/_resolver.py +596 -0
- graphlens_php/_visitor.py +809 -0
- graphlens_php-0.7.0.dist-info/METADATA +8 -0
- graphlens_php-0.7.0.dist-info/RECORD +11 -0
- graphlens_php-0.7.0.dist-info/WHEEL +4 -0
- graphlens_php-0.7.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,809 @@
|
|
|
1
|
+
"""PHP CST visitor using tree-sitter — builds graphlens nodes/relations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
import tree_sitter_php as tsphp
|
|
10
|
+
from graphlens import (
|
|
11
|
+
GraphLens,
|
|
12
|
+
Node,
|
|
13
|
+
NodeKind,
|
|
14
|
+
Relation,
|
|
15
|
+
RelationKind,
|
|
16
|
+
)
|
|
17
|
+
from graphlens.utils import Span, make_node_id
|
|
18
|
+
from tree_sitter import Language, Parser, Tree
|
|
19
|
+
from tree_sitter import Node as TSNode
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger("graphlens_php")
|
|
25
|
+
|
|
26
|
+
_PHP_LANGUAGE = Language(tsphp.language_php())
|
|
27
|
+
_parser = Parser(_PHP_LANGUAGE)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def parse_php(source: bytes) -> Tree:
|
|
31
|
+
"""Parse PHP source bytes and return a tree-sitter Tree."""
|
|
32
|
+
return _parser.parse(source)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def extract_namespace(root: TSNode) -> str:
|
|
36
|
+
"""
|
|
37
|
+
Return the first declared namespace in a file (``""`` for global).
|
|
38
|
+
|
|
39
|
+
PSR-4 projects declare exactly one namespace per file; we take the first
|
|
40
|
+
``namespace_definition`` as authoritative for the file's qualified-name
|
|
41
|
+
prefix.
|
|
42
|
+
"""
|
|
43
|
+
for child in root.children:
|
|
44
|
+
if child.type == "namespace_definition":
|
|
45
|
+
name_node = child.child_by_field_name("name")
|
|
46
|
+
if name_node is not None:
|
|
47
|
+
return _node_text(name_node).strip("\\")
|
|
48
|
+
return ""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# Occurrence reference (use-site record)
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class OccurrenceRef:
|
|
58
|
+
"""
|
|
59
|
+
A use-site that the resolver will bind to a definition.
|
|
60
|
+
|
|
61
|
+
Coordinates are 1-based (matching Span convention).
|
|
62
|
+
|
|
63
|
+
Roles:
|
|
64
|
+
``call`` — call-site of a function/method/constructor
|
|
65
|
+
``read`` — property/constant/name read
|
|
66
|
+
``write`` — property assignment target
|
|
67
|
+
``annotation`` — parameter / return / property type
|
|
68
|
+
``base`` — class parent, implemented interface, or used trait
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
role: str
|
|
72
|
+
line: int
|
|
73
|
+
col: int
|
|
74
|
+
enclosing_id: str
|
|
75
|
+
span: Span
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# Import classification
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class ImportClassifier:
|
|
85
|
+
"""
|
|
86
|
+
Classifies a ``use`` import's origin from pre-computed name sets.
|
|
87
|
+
|
|
88
|
+
Origin values (stored in ``Node.metadata["origin"]``):
|
|
89
|
+
- ``"stdlib"`` — a PHP built-in class (unqualified ``use``)
|
|
90
|
+
- ``"internal"`` — a namespace declared within the project (PSR-4)
|
|
91
|
+
- ``"third_party"`` — a Composer vendor's namespace
|
|
92
|
+
- ``"unknown"`` — none of the above
|
|
93
|
+
|
|
94
|
+
``internal`` is matched case-sensitively on the namespace's top segment;
|
|
95
|
+
``third_party`` is matched against the lowercased segment (Composer
|
|
96
|
+
vendors are lowercase). See ``_deps`` for why vendor prefixes are used.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
stdlib: frozenset[str] = field(default_factory=frozenset)
|
|
100
|
+
third_party: frozenset[str] = field(default_factory=frozenset)
|
|
101
|
+
internal: frozenset[str] = field(default_factory=frozenset)
|
|
102
|
+
|
|
103
|
+
def classify(self, top_level: str, *, is_single: bool) -> str:
|
|
104
|
+
if top_level in self.internal:
|
|
105
|
+
return "internal"
|
|
106
|
+
if top_level.lower() in self.third_party:
|
|
107
|
+
return "third_party"
|
|
108
|
+
if is_single and top_level in self.stdlib:
|
|
109
|
+
return "stdlib"
|
|
110
|
+
return "unknown"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class VisitorContext:
|
|
115
|
+
"""Immutable context for one file's CST visit."""
|
|
116
|
+
|
|
117
|
+
project_name: str
|
|
118
|
+
file_path: Path
|
|
119
|
+
namespace: str
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
# Main visitor
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class PhpASTVisitor:
|
|
128
|
+
"""
|
|
129
|
+
Walks a tree-sitter PHP CST and populates a GraphLens.
|
|
130
|
+
|
|
131
|
+
Structural declarations (classes, interfaces, traits, enums, functions,
|
|
132
|
+
methods, properties, constants, parameters, imports) become nodes with
|
|
133
|
+
``DECLARES``/``IMPORTS``/``RESOLVES_TO`` edges. Use-sites (calls, type
|
|
134
|
+
references, base classes, property reads/writes) are collected as
|
|
135
|
+
:class:`OccurrenceRef` for the post-visit resolution pass — this visitor
|
|
136
|
+
never emits ``CALLS``/``REFERENCES``/``HAS_TYPE``/``INHERITS_FROM``.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
_NESTED_DEF_TYPES = (
|
|
140
|
+
"class_declaration",
|
|
141
|
+
"interface_declaration",
|
|
142
|
+
"trait_declaration",
|
|
143
|
+
"enum_declaration",
|
|
144
|
+
"function_definition",
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def __init__( # noqa: PLR0913
|
|
148
|
+
self,
|
|
149
|
+
ctx: VisitorContext,
|
|
150
|
+
graph: GraphLens,
|
|
151
|
+
file_node_id: str,
|
|
152
|
+
source: bytes,
|
|
153
|
+
classifier: ImportClassifier | None = None,
|
|
154
|
+
modules: dict[str, str] | None = None,
|
|
155
|
+
) -> None:
|
|
156
|
+
self._ctx = ctx
|
|
157
|
+
self._graph = graph
|
|
158
|
+
self._file_node_id = file_node_id
|
|
159
|
+
self._source = source
|
|
160
|
+
self._classifier = classifier or ImportClassifier()
|
|
161
|
+
# Shared namespace-qualified-name → MODULE node id index, populated by
|
|
162
|
+
# the adapter as files are processed. Used to resolve internal imports
|
|
163
|
+
# to their MODULE node by longest-prefix without scanning the graph.
|
|
164
|
+
self._modules = modules if modules is not None else {}
|
|
165
|
+
# Stack of qualified-name prefixes (current scope); "" = global ns
|
|
166
|
+
self._scope_stack: list[str] = [ctx.namespace]
|
|
167
|
+
# Stack of node IDs for emitting DECLARES relations
|
|
168
|
+
self._container_stack: list[str] = [file_node_id]
|
|
169
|
+
# Stack of NodeKind to know if we are inside a class
|
|
170
|
+
self._kind_stack: list[NodeKind] = [NodeKind.FILE]
|
|
171
|
+
# Occurrence use-sites collected during this visit
|
|
172
|
+
self.occurrences: list[OccurrenceRef] = []
|
|
173
|
+
self.abs_file_path: str = str(ctx.file_path)
|
|
174
|
+
|
|
175
|
+
# -------------------------------------------------------------------------
|
|
176
|
+
# Dispatch
|
|
177
|
+
# -------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
def visit(self, node: TSNode) -> None:
|
|
180
|
+
handler = getattr(self, f"_visit_{node.type}", None)
|
|
181
|
+
if handler:
|
|
182
|
+
handler(node)
|
|
183
|
+
else:
|
|
184
|
+
self._visit_children(node)
|
|
185
|
+
|
|
186
|
+
def _visit_children(self, node: TSNode) -> None:
|
|
187
|
+
for child in node.children:
|
|
188
|
+
self.visit(child)
|
|
189
|
+
|
|
190
|
+
def _visit_namespace_definition(self, node: TSNode) -> None:
|
|
191
|
+
# Scope is already seeded from the file's namespace; descend so the
|
|
192
|
+
# block form ``namespace X { ... }`` has its body processed too.
|
|
193
|
+
self._visit_children(node)
|
|
194
|
+
|
|
195
|
+
# -------------------------------------------------------------------------
|
|
196
|
+
# Declarations
|
|
197
|
+
# -------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
def _visit_class_declaration(self, node: TSNode) -> None:
|
|
200
|
+
self._handle_class(node, is_abstract=_has_abstract(node))
|
|
201
|
+
|
|
202
|
+
def _visit_interface_declaration(self, node: TSNode) -> None:
|
|
203
|
+
self._handle_class(node, is_interface=True)
|
|
204
|
+
|
|
205
|
+
def _visit_trait_declaration(self, node: TSNode) -> None:
|
|
206
|
+
self._handle_class(node, is_trait=True)
|
|
207
|
+
|
|
208
|
+
def _visit_enum_declaration(self, node: TSNode) -> None:
|
|
209
|
+
self._handle_class(node, is_enum=True)
|
|
210
|
+
|
|
211
|
+
def _handle_class(
|
|
212
|
+
self,
|
|
213
|
+
node: TSNode,
|
|
214
|
+
*,
|
|
215
|
+
is_interface: bool = False,
|
|
216
|
+
is_trait: bool = False,
|
|
217
|
+
is_enum: bool = False,
|
|
218
|
+
is_abstract: bool = False,
|
|
219
|
+
) -> None:
|
|
220
|
+
name_node = node.child_by_field_name("name")
|
|
221
|
+
if name_node is None: # pragma: no cover - defensive
|
|
222
|
+
return
|
|
223
|
+
name = _node_text(name_node)
|
|
224
|
+
qname = self._qualify(name)
|
|
225
|
+
|
|
226
|
+
class_node = self._make_node(
|
|
227
|
+
NodeKind.CLASS,
|
|
228
|
+
qname,
|
|
229
|
+
name,
|
|
230
|
+
node,
|
|
231
|
+
metadata={
|
|
232
|
+
"is_interface": is_interface,
|
|
233
|
+
"is_trait": is_trait,
|
|
234
|
+
"is_enum": is_enum,
|
|
235
|
+
"is_abstract": is_abstract,
|
|
236
|
+
},
|
|
237
|
+
name_node=name_node,
|
|
238
|
+
)
|
|
239
|
+
self._add_node_with_relation(class_node, RelationKind.DECLARES)
|
|
240
|
+
|
|
241
|
+
# Base classes / interfaces (extends + implements)
|
|
242
|
+
for clause_type in ("base_clause", "class_interface_clause"):
|
|
243
|
+
clause = next(
|
|
244
|
+
(c for c in node.children if c.type == clause_type), None
|
|
245
|
+
)
|
|
246
|
+
if clause is not None:
|
|
247
|
+
for ref in _type_refs(clause):
|
|
248
|
+
self._record_occurrence("base", ref, class_node.id)
|
|
249
|
+
|
|
250
|
+
self._push(qname, class_node.id, NodeKind.CLASS)
|
|
251
|
+
body = node.child_by_field_name("body")
|
|
252
|
+
if body is not None: # pragma: no cover - classes always have a body
|
|
253
|
+
self._visit_children(body)
|
|
254
|
+
self._pop()
|
|
255
|
+
|
|
256
|
+
def _visit_use_declaration(self, node: TSNode) -> None:
|
|
257
|
+
"""Trait use inside a class body — modelled as a ``base`` edge."""
|
|
258
|
+
for ref in _type_refs(node):
|
|
259
|
+
self._record_occurrence("base", ref, self._container_stack[-1])
|
|
260
|
+
|
|
261
|
+
def _visit_function_definition(self, node: TSNode) -> None:
|
|
262
|
+
self._handle_function(node)
|
|
263
|
+
|
|
264
|
+
def _visit_method_declaration(self, node: TSNode) -> None:
|
|
265
|
+
self._handle_function(node)
|
|
266
|
+
|
|
267
|
+
def _handle_function(self, node: TSNode) -> None:
|
|
268
|
+
name_node = node.child_by_field_name("name")
|
|
269
|
+
if name_node is None: # pragma: no cover - defensive
|
|
270
|
+
return
|
|
271
|
+
name = _node_text(name_node)
|
|
272
|
+
qname = self._qualify(name)
|
|
273
|
+
kind = (
|
|
274
|
+
NodeKind.METHOD
|
|
275
|
+
if self._kind_stack[-1] == NodeKind.CLASS
|
|
276
|
+
else NodeKind.FUNCTION
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
func_node = self._make_node(
|
|
280
|
+
kind,
|
|
281
|
+
qname,
|
|
282
|
+
name,
|
|
283
|
+
node,
|
|
284
|
+
metadata={
|
|
285
|
+
"is_static": _has_modifier(node, "static_modifier"),
|
|
286
|
+
"is_abstract": _has_abstract(node),
|
|
287
|
+
"visibility": _visibility(node),
|
|
288
|
+
},
|
|
289
|
+
name_node=name_node,
|
|
290
|
+
)
|
|
291
|
+
self._add_node_with_relation(func_node, RelationKind.DECLARES)
|
|
292
|
+
|
|
293
|
+
return_type = node.child_by_field_name("return_type")
|
|
294
|
+
if return_type is not None:
|
|
295
|
+
self._record_type(return_type, func_node.id)
|
|
296
|
+
|
|
297
|
+
self._push(qname, func_node.id, kind)
|
|
298
|
+
params = node.child_by_field_name("parameters")
|
|
299
|
+
if params is not None: # pragma: no cover - always present
|
|
300
|
+
self._extract_parameters(params, func_node.id, qname)
|
|
301
|
+
body = node.child_by_field_name("body")
|
|
302
|
+
if body is not None:
|
|
303
|
+
self._walk_body(body, func_node.id)
|
|
304
|
+
self._pop()
|
|
305
|
+
|
|
306
|
+
def _extract_parameters(
|
|
307
|
+
self, params_node: TSNode, function_id: str, function_qname: str
|
|
308
|
+
) -> None:
|
|
309
|
+
for child in params_node.children:
|
|
310
|
+
if child.type not in (
|
|
311
|
+
"simple_parameter",
|
|
312
|
+
"variadic_parameter",
|
|
313
|
+
"property_promotion_parameter",
|
|
314
|
+
):
|
|
315
|
+
continue
|
|
316
|
+
var_node = child.child_by_field_name("name")
|
|
317
|
+
id_node = _name_child(var_node) if var_node is not None else None
|
|
318
|
+
if id_node is None: # pragma: no cover - defensive
|
|
319
|
+
continue
|
|
320
|
+
param_name = _node_text(id_node)
|
|
321
|
+
type_node = child.child_by_field_name("type")
|
|
322
|
+
is_promoted = child.type == "property_promotion_parameter"
|
|
323
|
+
|
|
324
|
+
param_node = self._make_node(
|
|
325
|
+
NodeKind.PARAMETER,
|
|
326
|
+
f"{function_qname}\\{param_name}",
|
|
327
|
+
param_name,
|
|
328
|
+
child,
|
|
329
|
+
metadata={
|
|
330
|
+
"is_variadic": child.type == "variadic_parameter",
|
|
331
|
+
"is_promoted": is_promoted,
|
|
332
|
+
"has_default": child.child_by_field_name("default_value")
|
|
333
|
+
is not None,
|
|
334
|
+
},
|
|
335
|
+
name_node=id_node,
|
|
336
|
+
)
|
|
337
|
+
self._safe_add_node(param_node)
|
|
338
|
+
self._graph.add_relation(
|
|
339
|
+
Relation(
|
|
340
|
+
source_id=function_id,
|
|
341
|
+
target_id=param_node.id,
|
|
342
|
+
kind=RelationKind.DECLARES,
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
if type_node is not None:
|
|
346
|
+
self._record_type(type_node, param_node.id)
|
|
347
|
+
|
|
348
|
+
def _visit_property_declaration(self, node: TSNode) -> None:
|
|
349
|
+
type_node = node.child_by_field_name("type")
|
|
350
|
+
for element in node.children:
|
|
351
|
+
if element.type != "property_element":
|
|
352
|
+
continue
|
|
353
|
+
var_node = element.child_by_field_name("name")
|
|
354
|
+
id_node = _name_child(var_node) if var_node is not None else None
|
|
355
|
+
if id_node is None: # pragma: no cover - defensive
|
|
356
|
+
continue
|
|
357
|
+
name = _node_text(id_node)
|
|
358
|
+
prop_node = self._make_node(
|
|
359
|
+
NodeKind.ATTRIBUTE,
|
|
360
|
+
self._qualify(name),
|
|
361
|
+
name,
|
|
362
|
+
element,
|
|
363
|
+
metadata={"visibility": _visibility(node)},
|
|
364
|
+
name_node=id_node,
|
|
365
|
+
)
|
|
366
|
+
self._add_node_with_relation(prop_node, RelationKind.DECLARES)
|
|
367
|
+
if type_node is not None:
|
|
368
|
+
self._record_type(type_node, prop_node.id)
|
|
369
|
+
|
|
370
|
+
def _visit_const_declaration(self, node: TSNode) -> None:
|
|
371
|
+
in_class = self._kind_stack[-1] == NodeKind.CLASS
|
|
372
|
+
kind = NodeKind.ATTRIBUTE if in_class else NodeKind.VARIABLE
|
|
373
|
+
for element in node.children:
|
|
374
|
+
if element.type != "const_element":
|
|
375
|
+
continue
|
|
376
|
+
name_node = next(
|
|
377
|
+
(c for c in element.children if c.type == "name"), None
|
|
378
|
+
)
|
|
379
|
+
if name_node is None: # pragma: no cover - defensive
|
|
380
|
+
continue
|
|
381
|
+
name = _node_text(name_node)
|
|
382
|
+
const_node = self._make_node(
|
|
383
|
+
kind,
|
|
384
|
+
self._qualify(name),
|
|
385
|
+
name,
|
|
386
|
+
element,
|
|
387
|
+
metadata={"is_constant": True},
|
|
388
|
+
name_node=name_node,
|
|
389
|
+
)
|
|
390
|
+
self._add_node_with_relation(const_node, RelationKind.DECLARES)
|
|
391
|
+
|
|
392
|
+
def _visit_enum_case(self, node: TSNode) -> None:
|
|
393
|
+
name_node = node.child_by_field_name("name")
|
|
394
|
+
if name_node is None: # pragma: no cover - defensive
|
|
395
|
+
return
|
|
396
|
+
name = _node_text(name_node)
|
|
397
|
+
case_node = self._make_node(
|
|
398
|
+
NodeKind.ATTRIBUTE,
|
|
399
|
+
self._qualify(name),
|
|
400
|
+
name,
|
|
401
|
+
node,
|
|
402
|
+
metadata={"is_enum_case": True},
|
|
403
|
+
name_node=name_node,
|
|
404
|
+
)
|
|
405
|
+
self._add_node_with_relation(case_node, RelationKind.DECLARES)
|
|
406
|
+
|
|
407
|
+
# -------------------------------------------------------------------------
|
|
408
|
+
# Imports
|
|
409
|
+
# -------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
def _visit_namespace_use_declaration(self, node: TSNode) -> None:
|
|
412
|
+
group = next(
|
|
413
|
+
(c for c in node.children if c.type == "namespace_use_group"),
|
|
414
|
+
None,
|
|
415
|
+
)
|
|
416
|
+
if group is not None:
|
|
417
|
+
prefix_node = next(
|
|
418
|
+
(c for c in node.children if c.type == "namespace_name"), None
|
|
419
|
+
)
|
|
420
|
+
prefix = _node_text(prefix_node) if prefix_node else ""
|
|
421
|
+
for clause in group.children:
|
|
422
|
+
if clause.type == "namespace_use_clause":
|
|
423
|
+
self._emit_use_clause(clause, prefix)
|
|
424
|
+
return
|
|
425
|
+
for clause in node.children:
|
|
426
|
+
if clause.type == "namespace_use_clause":
|
|
427
|
+
self._emit_use_clause(clause, "")
|
|
428
|
+
|
|
429
|
+
def _emit_use_clause(self, clause: TSNode, prefix: str) -> None:
|
|
430
|
+
path_node = next(
|
|
431
|
+
(
|
|
432
|
+
c
|
|
433
|
+
for c in clause.children
|
|
434
|
+
if c.type in ("qualified_name", "name")
|
|
435
|
+
),
|
|
436
|
+
None,
|
|
437
|
+
)
|
|
438
|
+
if path_node is None: # pragma: no cover - defensive
|
|
439
|
+
return
|
|
440
|
+
path = _node_text(path_node).strip("\\")
|
|
441
|
+
ext_qname = f"{prefix}\\{path}" if prefix else path
|
|
442
|
+
ext_qname = ext_qname.strip("\\")
|
|
443
|
+
alias_node = clause.child_by_field_name("alias")
|
|
444
|
+
local = (
|
|
445
|
+
_node_text(alias_node)
|
|
446
|
+
if alias_node is not None
|
|
447
|
+
else ext_qname.rsplit("\\", maxsplit=1)[-1]
|
|
448
|
+
)
|
|
449
|
+
self._emit_import(local_name=local, ext_qname=ext_qname)
|
|
450
|
+
|
|
451
|
+
def _emit_import(self, *, local_name: str, ext_qname: str) -> None:
|
|
452
|
+
top = ext_qname.split("\\", maxsplit=1)[0]
|
|
453
|
+
is_single = "\\" not in ext_qname
|
|
454
|
+
origin = self._classifier.classify(top, is_single=is_single)
|
|
455
|
+
|
|
456
|
+
import_node = self._make_node(
|
|
457
|
+
NodeKind.IMPORT,
|
|
458
|
+
self._qualify(local_name),
|
|
459
|
+
local_name,
|
|
460
|
+
metadata={
|
|
461
|
+
"alias": local_name
|
|
462
|
+
if local_name != ext_qname.rsplit("\\", maxsplit=1)[-1]
|
|
463
|
+
else None,
|
|
464
|
+
"original_name": ext_qname,
|
|
465
|
+
"origin": origin,
|
|
466
|
+
},
|
|
467
|
+
)
|
|
468
|
+
self._add_node_with_relation(import_node, RelationKind.DECLARES)
|
|
469
|
+
|
|
470
|
+
target_id: str | None = None
|
|
471
|
+
if origin == "internal":
|
|
472
|
+
target_id = self._lookup_module(ext_qname)
|
|
473
|
+
if target_id is None:
|
|
474
|
+
target_id = self._get_or_create_external_symbol(
|
|
475
|
+
ext_qname, origin=origin
|
|
476
|
+
).id
|
|
477
|
+
|
|
478
|
+
self._graph.add_relation(
|
|
479
|
+
Relation(
|
|
480
|
+
source_id=self._file_node_id,
|
|
481
|
+
target_id=target_id,
|
|
482
|
+
kind=RelationKind.IMPORTS,
|
|
483
|
+
)
|
|
484
|
+
)
|
|
485
|
+
self._graph.add_relation(
|
|
486
|
+
Relation(
|
|
487
|
+
source_id=import_node.id,
|
|
488
|
+
target_id=target_id,
|
|
489
|
+
kind=RelationKind.RESOLVES_TO,
|
|
490
|
+
)
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# -------------------------------------------------------------------------
|
|
494
|
+
# Value scanning (calls / reads / writes)
|
|
495
|
+
# -------------------------------------------------------------------------
|
|
496
|
+
|
|
497
|
+
def _visit_expression_statement(self, node: TSNode) -> None:
|
|
498
|
+
"""Scan top-level / namespace-scope statements for use-sites."""
|
|
499
|
+
for child in node.children:
|
|
500
|
+
self._scan_value(child, self._container_stack[-1])
|
|
501
|
+
|
|
502
|
+
def _walk_body(self, body: TSNode, enclosing_id: str) -> None:
|
|
503
|
+
"""Walk a function/method body, recording use-sites once each."""
|
|
504
|
+
for child in body.children:
|
|
505
|
+
if child.type in self._NESTED_DEF_TYPES:
|
|
506
|
+
self.visit(child)
|
|
507
|
+
else:
|
|
508
|
+
self._scan_value(child, enclosing_id)
|
|
509
|
+
|
|
510
|
+
def _scan_value(self, node: TSNode, enclosing_id: str) -> None: # noqa: PLR0911, PLR0912
|
|
511
|
+
"""Record ``call``/``read``/``write`` occurrences in an expression."""
|
|
512
|
+
t = node.type
|
|
513
|
+
if t == "function_call_expression":
|
|
514
|
+
fn = node.child_by_field_name("function")
|
|
515
|
+
if fn is not None: # pragma: no cover - always present
|
|
516
|
+
callee = _callee_name(fn)
|
|
517
|
+
if callee is not None:
|
|
518
|
+
self._record_occurrence("call", callee, enclosing_id)
|
|
519
|
+
elif fn.type not in ("name", "qualified_name"):
|
|
520
|
+
self._scan_value(fn, enclosing_id)
|
|
521
|
+
self._scan_arguments(node, enclosing_id)
|
|
522
|
+
return
|
|
523
|
+
if t in (
|
|
524
|
+
"member_call_expression",
|
|
525
|
+
"nullsafe_member_call_expression",
|
|
526
|
+
"scoped_call_expression",
|
|
527
|
+
):
|
|
528
|
+
name_node = node.child_by_field_name("name")
|
|
529
|
+
if name_node is not None and name_node.type == "name":
|
|
530
|
+
self._record_occurrence("call", name_node, enclosing_id)
|
|
531
|
+
obj = node.child_by_field_name("object")
|
|
532
|
+
if obj is not None:
|
|
533
|
+
self._scan_value(obj, enclosing_id)
|
|
534
|
+
self._scan_arguments(node, enclosing_id)
|
|
535
|
+
return
|
|
536
|
+
if t == "object_creation_expression":
|
|
537
|
+
cls = _object_creation_class(node)
|
|
538
|
+
if cls is not None:
|
|
539
|
+
callee = _callee_name(cls)
|
|
540
|
+
if callee is not None: # pragma: no cover - always a name
|
|
541
|
+
self._record_occurrence("call", callee, enclosing_id)
|
|
542
|
+
self._scan_arguments(node, enclosing_id)
|
|
543
|
+
return
|
|
544
|
+
if t in (
|
|
545
|
+
"member_access_expression",
|
|
546
|
+
"nullsafe_member_access_expression",
|
|
547
|
+
):
|
|
548
|
+
name_node = node.child_by_field_name("name")
|
|
549
|
+
if name_node is not None and name_node.type == "name":
|
|
550
|
+
self._record_occurrence("read", name_node, enclosing_id)
|
|
551
|
+
obj = node.child_by_field_name("object")
|
|
552
|
+
if obj is not None: # pragma: no cover - always present
|
|
553
|
+
self._scan_value(obj, enclosing_id)
|
|
554
|
+
return
|
|
555
|
+
if t == "class_constant_access_expression":
|
|
556
|
+
# The trailing name leaf is the constant (or ``class``) reference.
|
|
557
|
+
self._record_occurrence("read", node.children[-1], enclosing_id)
|
|
558
|
+
return
|
|
559
|
+
if t == "assignment_expression":
|
|
560
|
+
self._scan_assignment(node, enclosing_id)
|
|
561
|
+
return
|
|
562
|
+
if t == "name":
|
|
563
|
+
self._record_occurrence("read", node, enclosing_id)
|
|
564
|
+
return
|
|
565
|
+
if t == "qualified_name":
|
|
566
|
+
last = _last_name(node)
|
|
567
|
+
if last is not None: # pragma: no cover - always has a name leaf
|
|
568
|
+
self._record_occurrence("read", last, enclosing_id)
|
|
569
|
+
return
|
|
570
|
+
if t == "variable_name":
|
|
571
|
+
return # local variable — not resolvable across files
|
|
572
|
+
for child in node.children:
|
|
573
|
+
self._scan_value(child, enclosing_id)
|
|
574
|
+
|
|
575
|
+
def _scan_assignment(self, node: TSNode, enclosing_id: str) -> None:
|
|
576
|
+
left = node.child_by_field_name("left")
|
|
577
|
+
right = node.child_by_field_name("right")
|
|
578
|
+
if left is not None and left.type in (
|
|
579
|
+
"member_access_expression",
|
|
580
|
+
"nullsafe_member_access_expression",
|
|
581
|
+
):
|
|
582
|
+
name_node = left.child_by_field_name("name")
|
|
583
|
+
if name_node is not None and name_node.type == "name":
|
|
584
|
+
self._record_occurrence("write", name_node, enclosing_id)
|
|
585
|
+
obj = left.child_by_field_name("object")
|
|
586
|
+
if obj is not None: # pragma: no cover - always present
|
|
587
|
+
self._scan_value(obj, enclosing_id)
|
|
588
|
+
elif left is not None: # pragma: no cover - always present
|
|
589
|
+
self._scan_value(left, enclosing_id)
|
|
590
|
+
if right is not None: # pragma: no cover - always present
|
|
591
|
+
self._scan_value(right, enclosing_id)
|
|
592
|
+
|
|
593
|
+
def _scan_arguments(self, node: TSNode, enclosing_id: str) -> None:
|
|
594
|
+
args = node.child_by_field_name("arguments")
|
|
595
|
+
if args is None:
|
|
596
|
+
return
|
|
597
|
+
for arg in args.children:
|
|
598
|
+
if arg.type == "argument":
|
|
599
|
+
for child in arg.children:
|
|
600
|
+
self._scan_value(child, enclosing_id)
|
|
601
|
+
|
|
602
|
+
def _record_type(self, type_node: TSNode, enclosing_id: str) -> None:
|
|
603
|
+
for ref in _type_refs(type_node):
|
|
604
|
+
self._record_occurrence("annotation", ref, enclosing_id)
|
|
605
|
+
|
|
606
|
+
def _record_occurrence(
|
|
607
|
+
self, role: str, name_node: TSNode, enclosing_id: str
|
|
608
|
+
) -> None:
|
|
609
|
+
span = _make_span(name_node)
|
|
610
|
+
if span is None: # pragma: no cover - defensive
|
|
611
|
+
return
|
|
612
|
+
self.occurrences.append(
|
|
613
|
+
OccurrenceRef(
|
|
614
|
+
role=role,
|
|
615
|
+
line=span.start_line,
|
|
616
|
+
col=span.start_col,
|
|
617
|
+
enclosing_id=enclosing_id,
|
|
618
|
+
span=span,
|
|
619
|
+
)
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
# -------------------------------------------------------------------------
|
|
623
|
+
# Node helpers
|
|
624
|
+
# -------------------------------------------------------------------------
|
|
625
|
+
|
|
626
|
+
def _qualify(self, name: str) -> str:
|
|
627
|
+
scope = self._scope_stack[-1]
|
|
628
|
+
return f"{scope}\\{name}" if scope else name
|
|
629
|
+
|
|
630
|
+
def _get_or_create_external_symbol(
|
|
631
|
+
self, qname: str, origin: str = "unknown"
|
|
632
|
+
) -> Node:
|
|
633
|
+
sym_id = make_node_id(
|
|
634
|
+
self._ctx.project_name, qname, NodeKind.EXTERNAL_SYMBOL.value
|
|
635
|
+
)
|
|
636
|
+
if sym_id not in self._graph.nodes:
|
|
637
|
+
self._graph.add_node(
|
|
638
|
+
Node(
|
|
639
|
+
id=sym_id,
|
|
640
|
+
kind=NodeKind.EXTERNAL_SYMBOL,
|
|
641
|
+
qualified_name=qname,
|
|
642
|
+
name=qname.rsplit("\\", maxsplit=1)[-1],
|
|
643
|
+
metadata={"origin": origin},
|
|
644
|
+
)
|
|
645
|
+
)
|
|
646
|
+
return self._graph.nodes[sym_id]
|
|
647
|
+
|
|
648
|
+
def _add_node_with_relation(
|
|
649
|
+
self, node: Node, rel_kind: RelationKind
|
|
650
|
+
) -> None:
|
|
651
|
+
self._safe_add_node(node)
|
|
652
|
+
self._graph.add_relation(
|
|
653
|
+
Relation(
|
|
654
|
+
source_id=self._container_stack[-1],
|
|
655
|
+
target_id=node.id,
|
|
656
|
+
kind=rel_kind,
|
|
657
|
+
)
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
def _safe_add_node(self, node: Node) -> None:
|
|
661
|
+
if node.id not in self._graph.nodes:
|
|
662
|
+
self._graph.add_node(node)
|
|
663
|
+
|
|
664
|
+
def _make_node( # noqa: PLR0913
|
|
665
|
+
self,
|
|
666
|
+
kind: NodeKind,
|
|
667
|
+
qualified_name: str,
|
|
668
|
+
name: str,
|
|
669
|
+
ts_node: TSNode | None = None,
|
|
670
|
+
metadata: dict[str, object] | None = None,
|
|
671
|
+
name_node: TSNode | None = None,
|
|
672
|
+
) -> Node:
|
|
673
|
+
md = dict(metadata or {})
|
|
674
|
+
if name_node is not None:
|
|
675
|
+
name_span = _make_span(name_node)
|
|
676
|
+
if name_span is not None: # pragma: no cover - always valid here
|
|
677
|
+
md["name_span"] = name_span
|
|
678
|
+
return Node(
|
|
679
|
+
id=make_node_id(
|
|
680
|
+
self._ctx.project_name, qualified_name, kind.value
|
|
681
|
+
),
|
|
682
|
+
kind=kind,
|
|
683
|
+
qualified_name=qualified_name,
|
|
684
|
+
name=name,
|
|
685
|
+
file_path=str(self._ctx.file_path),
|
|
686
|
+
span=_make_span(ts_node) if ts_node else None,
|
|
687
|
+
metadata=md,
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
def _lookup_module(self, qname: str) -> str | None:
|
|
691
|
+
r"""
|
|
692
|
+
Return the MODULE id for ``qname`` or its longest namespace prefix.
|
|
693
|
+
|
|
694
|
+
``App\\Model\\User`` resolves to the ``App\\Model`` namespace MODULE
|
|
695
|
+
even when the ``User`` class is not yet its own node. Uses the shared
|
|
696
|
+
``modules`` index (O(depth) lookups) rather than scanning the graph.
|
|
697
|
+
"""
|
|
698
|
+
parts = qname.split("\\")
|
|
699
|
+
for length in range(len(parts), 0, -1):
|
|
700
|
+
candidate = "\\".join(parts[:length])
|
|
701
|
+
module_id = self._modules.get(candidate)
|
|
702
|
+
if module_id is not None:
|
|
703
|
+
return module_id
|
|
704
|
+
return None
|
|
705
|
+
|
|
706
|
+
def _push(self, qname: str, node_id: str, kind: NodeKind) -> None:
|
|
707
|
+
self._scope_stack.append(qname)
|
|
708
|
+
self._container_stack.append(node_id)
|
|
709
|
+
self._kind_stack.append(kind)
|
|
710
|
+
|
|
711
|
+
def _pop(self) -> None:
|
|
712
|
+
self._scope_stack.pop()
|
|
713
|
+
self._container_stack.pop()
|
|
714
|
+
self._kind_stack.pop()
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
# ---------------------------------------------------------------------------
|
|
718
|
+
# Module-level helpers
|
|
719
|
+
# ---------------------------------------------------------------------------
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def _node_text(node: TSNode) -> str:
|
|
723
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _name_child(node: TSNode) -> TSNode | None:
|
|
727
|
+
"""Return the ``name`` token of a ``variable_name`` (``$x`` → ``x``)."""
|
|
728
|
+
return next((c for c in node.children if c.type == "name"), None)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def _last_name(node: TSNode) -> TSNode | None:
|
|
732
|
+
"""Return the last ``name`` leaf of a qualified name."""
|
|
733
|
+
result: TSNode | None = None
|
|
734
|
+
for child in node.children:
|
|
735
|
+
if child.type == "name":
|
|
736
|
+
result = child
|
|
737
|
+
return result
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def _callee_name(node: TSNode) -> TSNode | None:
|
|
741
|
+
"""Return the name token identifying a called function/class."""
|
|
742
|
+
if node.type == "name":
|
|
743
|
+
return node
|
|
744
|
+
if node.type == "qualified_name":
|
|
745
|
+
return _last_name(node)
|
|
746
|
+
return None
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def _object_creation_class(node: TSNode) -> TSNode | None:
|
|
750
|
+
"""Return the class node of a ``new X(...)`` expression."""
|
|
751
|
+
for child in node.children:
|
|
752
|
+
if child.type in ("name", "qualified_name"):
|
|
753
|
+
return child
|
|
754
|
+
return None
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
def _type_refs(node: TSNode) -> list[TSNode]:
|
|
758
|
+
"""Collect class-name leaves from a type or heritage clause."""
|
|
759
|
+
out: list[TSNode] = []
|
|
760
|
+
_collect_type_refs(node, out)
|
|
761
|
+
return out
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def _collect_type_refs(node: TSNode, out: list[TSNode]) -> None:
|
|
765
|
+
t = node.type
|
|
766
|
+
if t in ("primitive_type", "null", "bottom_type"):
|
|
767
|
+
return
|
|
768
|
+
if t == "qualified_name":
|
|
769
|
+
last = _last_name(node)
|
|
770
|
+
if last is not None: # pragma: no cover - always has a name leaf
|
|
771
|
+
out.append(last)
|
|
772
|
+
return
|
|
773
|
+
if t == "name":
|
|
774
|
+
out.append(node)
|
|
775
|
+
return
|
|
776
|
+
for child in node.children:
|
|
777
|
+
_collect_type_refs(child, out)
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
def _has_modifier(node: TSNode, modifier_type: str) -> bool:
|
|
781
|
+
return any(c.type == modifier_type for c in node.children)
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def _has_abstract(node: TSNode) -> bool:
|
|
785
|
+
return _has_modifier(node, "abstract_modifier")
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _visibility(node: TSNode) -> str:
|
|
789
|
+
for child in node.children:
|
|
790
|
+
if child.type == "visibility_modifier":
|
|
791
|
+
return _node_text(child)
|
|
792
|
+
return "public"
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def _make_span(node: TSNode | None) -> Span | None:
|
|
796
|
+
"""Convert tree-sitter node positions to a Span (1-based)."""
|
|
797
|
+
if node is None: # pragma: no cover - callers guard against None
|
|
798
|
+
return None
|
|
799
|
+
try:
|
|
800
|
+
sr, sc = node.start_point
|
|
801
|
+
er, ec = node.end_point
|
|
802
|
+
return Span(
|
|
803
|
+
start_line=sr + 1,
|
|
804
|
+
start_col=sc + 1,
|
|
805
|
+
end_line=er + 1,
|
|
806
|
+
end_col=ec + 1,
|
|
807
|
+
)
|
|
808
|
+
except Exception: # pragma: no cover - defensive
|
|
809
|
+
return None
|