graphlens-php 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,809 @@
1
+ """PHP CST visitor using tree-sitter — builds graphlens nodes/relations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass, field
7
+ from typing import TYPE_CHECKING
8
+
9
+ import tree_sitter_php as tsphp
10
+ from graphlens import (
11
+ GraphLens,
12
+ Node,
13
+ NodeKind,
14
+ Relation,
15
+ RelationKind,
16
+ )
17
+ from graphlens.utils import Span, make_node_id
18
+ from tree_sitter import Language, Parser, Tree
19
+ from tree_sitter import Node as TSNode
20
+
21
+ if TYPE_CHECKING:
22
+ from pathlib import Path
23
+
24
+ logger = logging.getLogger("graphlens_php")
25
+
26
+ _PHP_LANGUAGE = Language(tsphp.language_php())
27
+ _parser = Parser(_PHP_LANGUAGE)
28
+
29
+
30
+ def parse_php(source: bytes) -> Tree:
31
+ """Parse PHP source bytes and return a tree-sitter Tree."""
32
+ return _parser.parse(source)
33
+
34
+
35
+ def extract_namespace(root: TSNode) -> str:
36
+ """
37
+ Return the first declared namespace in a file (``""`` for global).
38
+
39
+ PSR-4 projects declare exactly one namespace per file; we take the first
40
+ ``namespace_definition`` as authoritative for the file's qualified-name
41
+ prefix.
42
+ """
43
+ for child in root.children:
44
+ if child.type == "namespace_definition":
45
+ name_node = child.child_by_field_name("name")
46
+ if name_node is not None:
47
+ return _node_text(name_node).strip("\\")
48
+ return ""
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Occurrence reference (use-site record)
53
+ # ---------------------------------------------------------------------------
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class OccurrenceRef:
58
+ """
59
+ A use-site that the resolver will bind to a definition.
60
+
61
+ Coordinates are 1-based (matching Span convention).
62
+
63
+ Roles:
64
+ ``call`` — call-site of a function/method/constructor
65
+ ``read`` — property/constant/name read
66
+ ``write`` — property assignment target
67
+ ``annotation`` — parameter / return / property type
68
+ ``base`` — class parent, implemented interface, or used trait
69
+ """
70
+
71
+ role: str
72
+ line: int
73
+ col: int
74
+ enclosing_id: str
75
+ span: Span
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Import classification
80
+ # ---------------------------------------------------------------------------
81
+
82
+
83
+ @dataclass
84
+ class ImportClassifier:
85
+ """
86
+ Classifies a ``use`` import's origin from pre-computed name sets.
87
+
88
+ Origin values (stored in ``Node.metadata["origin"]``):
89
+ - ``"stdlib"`` — a PHP built-in class (unqualified ``use``)
90
+ - ``"internal"`` — a namespace declared within the project (PSR-4)
91
+ - ``"third_party"`` — a Composer vendor's namespace
92
+ - ``"unknown"`` — none of the above
93
+
94
+ ``internal`` is matched case-sensitively on the namespace's top segment;
95
+ ``third_party`` is matched against the lowercased segment (Composer
96
+ vendors are lowercase). See ``_deps`` for why vendor prefixes are used.
97
+ """
98
+
99
+ stdlib: frozenset[str] = field(default_factory=frozenset)
100
+ third_party: frozenset[str] = field(default_factory=frozenset)
101
+ internal: frozenset[str] = field(default_factory=frozenset)
102
+
103
+ def classify(self, top_level: str, *, is_single: bool) -> str:
104
+ if top_level in self.internal:
105
+ return "internal"
106
+ if top_level.lower() in self.third_party:
107
+ return "third_party"
108
+ if is_single and top_level in self.stdlib:
109
+ return "stdlib"
110
+ return "unknown"
111
+
112
+
113
+ @dataclass
114
+ class VisitorContext:
115
+ """Immutable context for one file's CST visit."""
116
+
117
+ project_name: str
118
+ file_path: Path
119
+ namespace: str
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Main visitor
124
+ # ---------------------------------------------------------------------------
125
+
126
+
127
+ class PhpASTVisitor:
128
+ """
129
+ Walks a tree-sitter PHP CST and populates a GraphLens.
130
+
131
+ Structural declarations (classes, interfaces, traits, enums, functions,
132
+ methods, properties, constants, parameters, imports) become nodes with
133
+ ``DECLARES``/``IMPORTS``/``RESOLVES_TO`` edges. Use-sites (calls, type
134
+ references, base classes, property reads/writes) are collected as
135
+ :class:`OccurrenceRef` for the post-visit resolution pass — this visitor
136
+ never emits ``CALLS``/``REFERENCES``/``HAS_TYPE``/``INHERITS_FROM``.
137
+ """
138
+
139
+ _NESTED_DEF_TYPES = (
140
+ "class_declaration",
141
+ "interface_declaration",
142
+ "trait_declaration",
143
+ "enum_declaration",
144
+ "function_definition",
145
+ )
146
+
147
+ def __init__( # noqa: PLR0913
148
+ self,
149
+ ctx: VisitorContext,
150
+ graph: GraphLens,
151
+ file_node_id: str,
152
+ source: bytes,
153
+ classifier: ImportClassifier | None = None,
154
+ modules: dict[str, str] | None = None,
155
+ ) -> None:
156
+ self._ctx = ctx
157
+ self._graph = graph
158
+ self._file_node_id = file_node_id
159
+ self._source = source
160
+ self._classifier = classifier or ImportClassifier()
161
+ # Shared namespace-qualified-name → MODULE node id index, populated by
162
+ # the adapter as files are processed. Used to resolve internal imports
163
+ # to their MODULE node by longest-prefix without scanning the graph.
164
+ self._modules = modules if modules is not None else {}
165
+ # Stack of qualified-name prefixes (current scope); "" = global ns
166
+ self._scope_stack: list[str] = [ctx.namespace]
167
+ # Stack of node IDs for emitting DECLARES relations
168
+ self._container_stack: list[str] = [file_node_id]
169
+ # Stack of NodeKind to know if we are inside a class
170
+ self._kind_stack: list[NodeKind] = [NodeKind.FILE]
171
+ # Occurrence use-sites collected during this visit
172
+ self.occurrences: list[OccurrenceRef] = []
173
+ self.abs_file_path: str = str(ctx.file_path)
174
+
175
+ # -------------------------------------------------------------------------
176
+ # Dispatch
177
+ # -------------------------------------------------------------------------
178
+
179
+ def visit(self, node: TSNode) -> None:
180
+ handler = getattr(self, f"_visit_{node.type}", None)
181
+ if handler:
182
+ handler(node)
183
+ else:
184
+ self._visit_children(node)
185
+
186
+ def _visit_children(self, node: TSNode) -> None:
187
+ for child in node.children:
188
+ self.visit(child)
189
+
190
+ def _visit_namespace_definition(self, node: TSNode) -> None:
191
+ # Scope is already seeded from the file's namespace; descend so the
192
+ # block form ``namespace X { ... }`` has its body processed too.
193
+ self._visit_children(node)
194
+
195
+ # -------------------------------------------------------------------------
196
+ # Declarations
197
+ # -------------------------------------------------------------------------
198
+
199
+ def _visit_class_declaration(self, node: TSNode) -> None:
200
+ self._handle_class(node, is_abstract=_has_abstract(node))
201
+
202
+ def _visit_interface_declaration(self, node: TSNode) -> None:
203
+ self._handle_class(node, is_interface=True)
204
+
205
+ def _visit_trait_declaration(self, node: TSNode) -> None:
206
+ self._handle_class(node, is_trait=True)
207
+
208
+ def _visit_enum_declaration(self, node: TSNode) -> None:
209
+ self._handle_class(node, is_enum=True)
210
+
211
+ def _handle_class(
212
+ self,
213
+ node: TSNode,
214
+ *,
215
+ is_interface: bool = False,
216
+ is_trait: bool = False,
217
+ is_enum: bool = False,
218
+ is_abstract: bool = False,
219
+ ) -> None:
220
+ name_node = node.child_by_field_name("name")
221
+ if name_node is None: # pragma: no cover - defensive
222
+ return
223
+ name = _node_text(name_node)
224
+ qname = self._qualify(name)
225
+
226
+ class_node = self._make_node(
227
+ NodeKind.CLASS,
228
+ qname,
229
+ name,
230
+ node,
231
+ metadata={
232
+ "is_interface": is_interface,
233
+ "is_trait": is_trait,
234
+ "is_enum": is_enum,
235
+ "is_abstract": is_abstract,
236
+ },
237
+ name_node=name_node,
238
+ )
239
+ self._add_node_with_relation(class_node, RelationKind.DECLARES)
240
+
241
+ # Base classes / interfaces (extends + implements)
242
+ for clause_type in ("base_clause", "class_interface_clause"):
243
+ clause = next(
244
+ (c for c in node.children if c.type == clause_type), None
245
+ )
246
+ if clause is not None:
247
+ for ref in _type_refs(clause):
248
+ self._record_occurrence("base", ref, class_node.id)
249
+
250
+ self._push(qname, class_node.id, NodeKind.CLASS)
251
+ body = node.child_by_field_name("body")
252
+ if body is not None: # pragma: no cover - classes always have a body
253
+ self._visit_children(body)
254
+ self._pop()
255
+
256
+ def _visit_use_declaration(self, node: TSNode) -> None:
257
+ """Trait use inside a class body — modelled as a ``base`` edge."""
258
+ for ref in _type_refs(node):
259
+ self._record_occurrence("base", ref, self._container_stack[-1])
260
+
261
+ def _visit_function_definition(self, node: TSNode) -> None:
262
+ self._handle_function(node)
263
+
264
+ def _visit_method_declaration(self, node: TSNode) -> None:
265
+ self._handle_function(node)
266
+
267
+ def _handle_function(self, node: TSNode) -> None:
268
+ name_node = node.child_by_field_name("name")
269
+ if name_node is None: # pragma: no cover - defensive
270
+ return
271
+ name = _node_text(name_node)
272
+ qname = self._qualify(name)
273
+ kind = (
274
+ NodeKind.METHOD
275
+ if self._kind_stack[-1] == NodeKind.CLASS
276
+ else NodeKind.FUNCTION
277
+ )
278
+
279
+ func_node = self._make_node(
280
+ kind,
281
+ qname,
282
+ name,
283
+ node,
284
+ metadata={
285
+ "is_static": _has_modifier(node, "static_modifier"),
286
+ "is_abstract": _has_abstract(node),
287
+ "visibility": _visibility(node),
288
+ },
289
+ name_node=name_node,
290
+ )
291
+ self._add_node_with_relation(func_node, RelationKind.DECLARES)
292
+
293
+ return_type = node.child_by_field_name("return_type")
294
+ if return_type is not None:
295
+ self._record_type(return_type, func_node.id)
296
+
297
+ self._push(qname, func_node.id, kind)
298
+ params = node.child_by_field_name("parameters")
299
+ if params is not None: # pragma: no cover - always present
300
+ self._extract_parameters(params, func_node.id, qname)
301
+ body = node.child_by_field_name("body")
302
+ if body is not None:
303
+ self._walk_body(body, func_node.id)
304
+ self._pop()
305
+
306
+ def _extract_parameters(
307
+ self, params_node: TSNode, function_id: str, function_qname: str
308
+ ) -> None:
309
+ for child in params_node.children:
310
+ if child.type not in (
311
+ "simple_parameter",
312
+ "variadic_parameter",
313
+ "property_promotion_parameter",
314
+ ):
315
+ continue
316
+ var_node = child.child_by_field_name("name")
317
+ id_node = _name_child(var_node) if var_node is not None else None
318
+ if id_node is None: # pragma: no cover - defensive
319
+ continue
320
+ param_name = _node_text(id_node)
321
+ type_node = child.child_by_field_name("type")
322
+ is_promoted = child.type == "property_promotion_parameter"
323
+
324
+ param_node = self._make_node(
325
+ NodeKind.PARAMETER,
326
+ f"{function_qname}\\{param_name}",
327
+ param_name,
328
+ child,
329
+ metadata={
330
+ "is_variadic": child.type == "variadic_parameter",
331
+ "is_promoted": is_promoted,
332
+ "has_default": child.child_by_field_name("default_value")
333
+ is not None,
334
+ },
335
+ name_node=id_node,
336
+ )
337
+ self._safe_add_node(param_node)
338
+ self._graph.add_relation(
339
+ Relation(
340
+ source_id=function_id,
341
+ target_id=param_node.id,
342
+ kind=RelationKind.DECLARES,
343
+ )
344
+ )
345
+ if type_node is not None:
346
+ self._record_type(type_node, param_node.id)
347
+
348
+ def _visit_property_declaration(self, node: TSNode) -> None:
349
+ type_node = node.child_by_field_name("type")
350
+ for element in node.children:
351
+ if element.type != "property_element":
352
+ continue
353
+ var_node = element.child_by_field_name("name")
354
+ id_node = _name_child(var_node) if var_node is not None else None
355
+ if id_node is None: # pragma: no cover - defensive
356
+ continue
357
+ name = _node_text(id_node)
358
+ prop_node = self._make_node(
359
+ NodeKind.ATTRIBUTE,
360
+ self._qualify(name),
361
+ name,
362
+ element,
363
+ metadata={"visibility": _visibility(node)},
364
+ name_node=id_node,
365
+ )
366
+ self._add_node_with_relation(prop_node, RelationKind.DECLARES)
367
+ if type_node is not None:
368
+ self._record_type(type_node, prop_node.id)
369
+
370
+ def _visit_const_declaration(self, node: TSNode) -> None:
371
+ in_class = self._kind_stack[-1] == NodeKind.CLASS
372
+ kind = NodeKind.ATTRIBUTE if in_class else NodeKind.VARIABLE
373
+ for element in node.children:
374
+ if element.type != "const_element":
375
+ continue
376
+ name_node = next(
377
+ (c for c in element.children if c.type == "name"), None
378
+ )
379
+ if name_node is None: # pragma: no cover - defensive
380
+ continue
381
+ name = _node_text(name_node)
382
+ const_node = self._make_node(
383
+ kind,
384
+ self._qualify(name),
385
+ name,
386
+ element,
387
+ metadata={"is_constant": True},
388
+ name_node=name_node,
389
+ )
390
+ self._add_node_with_relation(const_node, RelationKind.DECLARES)
391
+
392
+ def _visit_enum_case(self, node: TSNode) -> None:
393
+ name_node = node.child_by_field_name("name")
394
+ if name_node is None: # pragma: no cover - defensive
395
+ return
396
+ name = _node_text(name_node)
397
+ case_node = self._make_node(
398
+ NodeKind.ATTRIBUTE,
399
+ self._qualify(name),
400
+ name,
401
+ node,
402
+ metadata={"is_enum_case": True},
403
+ name_node=name_node,
404
+ )
405
+ self._add_node_with_relation(case_node, RelationKind.DECLARES)
406
+
407
+ # -------------------------------------------------------------------------
408
+ # Imports
409
+ # -------------------------------------------------------------------------
410
+
411
+ def _visit_namespace_use_declaration(self, node: TSNode) -> None:
412
+ group = next(
413
+ (c for c in node.children if c.type == "namespace_use_group"),
414
+ None,
415
+ )
416
+ if group is not None:
417
+ prefix_node = next(
418
+ (c for c in node.children if c.type == "namespace_name"), None
419
+ )
420
+ prefix = _node_text(prefix_node) if prefix_node else ""
421
+ for clause in group.children:
422
+ if clause.type == "namespace_use_clause":
423
+ self._emit_use_clause(clause, prefix)
424
+ return
425
+ for clause in node.children:
426
+ if clause.type == "namespace_use_clause":
427
+ self._emit_use_clause(clause, "")
428
+
429
+ def _emit_use_clause(self, clause: TSNode, prefix: str) -> None:
430
+ path_node = next(
431
+ (
432
+ c
433
+ for c in clause.children
434
+ if c.type in ("qualified_name", "name")
435
+ ),
436
+ None,
437
+ )
438
+ if path_node is None: # pragma: no cover - defensive
439
+ return
440
+ path = _node_text(path_node).strip("\\")
441
+ ext_qname = f"{prefix}\\{path}" if prefix else path
442
+ ext_qname = ext_qname.strip("\\")
443
+ alias_node = clause.child_by_field_name("alias")
444
+ local = (
445
+ _node_text(alias_node)
446
+ if alias_node is not None
447
+ else ext_qname.rsplit("\\", maxsplit=1)[-1]
448
+ )
449
+ self._emit_import(local_name=local, ext_qname=ext_qname)
450
+
451
+ def _emit_import(self, *, local_name: str, ext_qname: str) -> None:
452
+ top = ext_qname.split("\\", maxsplit=1)[0]
453
+ is_single = "\\" not in ext_qname
454
+ origin = self._classifier.classify(top, is_single=is_single)
455
+
456
+ import_node = self._make_node(
457
+ NodeKind.IMPORT,
458
+ self._qualify(local_name),
459
+ local_name,
460
+ metadata={
461
+ "alias": local_name
462
+ if local_name != ext_qname.rsplit("\\", maxsplit=1)[-1]
463
+ else None,
464
+ "original_name": ext_qname,
465
+ "origin": origin,
466
+ },
467
+ )
468
+ self._add_node_with_relation(import_node, RelationKind.DECLARES)
469
+
470
+ target_id: str | None = None
471
+ if origin == "internal":
472
+ target_id = self._lookup_module(ext_qname)
473
+ if target_id is None:
474
+ target_id = self._get_or_create_external_symbol(
475
+ ext_qname, origin=origin
476
+ ).id
477
+
478
+ self._graph.add_relation(
479
+ Relation(
480
+ source_id=self._file_node_id,
481
+ target_id=target_id,
482
+ kind=RelationKind.IMPORTS,
483
+ )
484
+ )
485
+ self._graph.add_relation(
486
+ Relation(
487
+ source_id=import_node.id,
488
+ target_id=target_id,
489
+ kind=RelationKind.RESOLVES_TO,
490
+ )
491
+ )
492
+
493
+ # -------------------------------------------------------------------------
494
+ # Value scanning (calls / reads / writes)
495
+ # -------------------------------------------------------------------------
496
+
497
+ def _visit_expression_statement(self, node: TSNode) -> None:
498
+ """Scan top-level / namespace-scope statements for use-sites."""
499
+ for child in node.children:
500
+ self._scan_value(child, self._container_stack[-1])
501
+
502
+ def _walk_body(self, body: TSNode, enclosing_id: str) -> None:
503
+ """Walk a function/method body, recording use-sites once each."""
504
+ for child in body.children:
505
+ if child.type in self._NESTED_DEF_TYPES:
506
+ self.visit(child)
507
+ else:
508
+ self._scan_value(child, enclosing_id)
509
+
510
+ def _scan_value(self, node: TSNode, enclosing_id: str) -> None: # noqa: PLR0911, PLR0912
511
+ """Record ``call``/``read``/``write`` occurrences in an expression."""
512
+ t = node.type
513
+ if t == "function_call_expression":
514
+ fn = node.child_by_field_name("function")
515
+ if fn is not None: # pragma: no cover - always present
516
+ callee = _callee_name(fn)
517
+ if callee is not None:
518
+ self._record_occurrence("call", callee, enclosing_id)
519
+ elif fn.type not in ("name", "qualified_name"):
520
+ self._scan_value(fn, enclosing_id)
521
+ self._scan_arguments(node, enclosing_id)
522
+ return
523
+ if t in (
524
+ "member_call_expression",
525
+ "nullsafe_member_call_expression",
526
+ "scoped_call_expression",
527
+ ):
528
+ name_node = node.child_by_field_name("name")
529
+ if name_node is not None and name_node.type == "name":
530
+ self._record_occurrence("call", name_node, enclosing_id)
531
+ obj = node.child_by_field_name("object")
532
+ if obj is not None:
533
+ self._scan_value(obj, enclosing_id)
534
+ self._scan_arguments(node, enclosing_id)
535
+ return
536
+ if t == "object_creation_expression":
537
+ cls = _object_creation_class(node)
538
+ if cls is not None:
539
+ callee = _callee_name(cls)
540
+ if callee is not None: # pragma: no cover - always a name
541
+ self._record_occurrence("call", callee, enclosing_id)
542
+ self._scan_arguments(node, enclosing_id)
543
+ return
544
+ if t in (
545
+ "member_access_expression",
546
+ "nullsafe_member_access_expression",
547
+ ):
548
+ name_node = node.child_by_field_name("name")
549
+ if name_node is not None and name_node.type == "name":
550
+ self._record_occurrence("read", name_node, enclosing_id)
551
+ obj = node.child_by_field_name("object")
552
+ if obj is not None: # pragma: no cover - always present
553
+ self._scan_value(obj, enclosing_id)
554
+ return
555
+ if t == "class_constant_access_expression":
556
+ # The trailing name leaf is the constant (or ``class``) reference.
557
+ self._record_occurrence("read", node.children[-1], enclosing_id)
558
+ return
559
+ if t == "assignment_expression":
560
+ self._scan_assignment(node, enclosing_id)
561
+ return
562
+ if t == "name":
563
+ self._record_occurrence("read", node, enclosing_id)
564
+ return
565
+ if t == "qualified_name":
566
+ last = _last_name(node)
567
+ if last is not None: # pragma: no cover - always has a name leaf
568
+ self._record_occurrence("read", last, enclosing_id)
569
+ return
570
+ if t == "variable_name":
571
+ return # local variable — not resolvable across files
572
+ for child in node.children:
573
+ self._scan_value(child, enclosing_id)
574
+
575
+ def _scan_assignment(self, node: TSNode, enclosing_id: str) -> None:
576
+ left = node.child_by_field_name("left")
577
+ right = node.child_by_field_name("right")
578
+ if left is not None and left.type in (
579
+ "member_access_expression",
580
+ "nullsafe_member_access_expression",
581
+ ):
582
+ name_node = left.child_by_field_name("name")
583
+ if name_node is not None and name_node.type == "name":
584
+ self._record_occurrence("write", name_node, enclosing_id)
585
+ obj = left.child_by_field_name("object")
586
+ if obj is not None: # pragma: no cover - always present
587
+ self._scan_value(obj, enclosing_id)
588
+ elif left is not None: # pragma: no cover - always present
589
+ self._scan_value(left, enclosing_id)
590
+ if right is not None: # pragma: no cover - always present
591
+ self._scan_value(right, enclosing_id)
592
+
593
+ def _scan_arguments(self, node: TSNode, enclosing_id: str) -> None:
594
+ args = node.child_by_field_name("arguments")
595
+ if args is None:
596
+ return
597
+ for arg in args.children:
598
+ if arg.type == "argument":
599
+ for child in arg.children:
600
+ self._scan_value(child, enclosing_id)
601
+
602
+ def _record_type(self, type_node: TSNode, enclosing_id: str) -> None:
603
+ for ref in _type_refs(type_node):
604
+ self._record_occurrence("annotation", ref, enclosing_id)
605
+
606
+ def _record_occurrence(
607
+ self, role: str, name_node: TSNode, enclosing_id: str
608
+ ) -> None:
609
+ span = _make_span(name_node)
610
+ if span is None: # pragma: no cover - defensive
611
+ return
612
+ self.occurrences.append(
613
+ OccurrenceRef(
614
+ role=role,
615
+ line=span.start_line,
616
+ col=span.start_col,
617
+ enclosing_id=enclosing_id,
618
+ span=span,
619
+ )
620
+ )
621
+
622
+ # -------------------------------------------------------------------------
623
+ # Node helpers
624
+ # -------------------------------------------------------------------------
625
+
626
+ def _qualify(self, name: str) -> str:
627
+ scope = self._scope_stack[-1]
628
+ return f"{scope}\\{name}" if scope else name
629
+
630
+ def _get_or_create_external_symbol(
631
+ self, qname: str, origin: str = "unknown"
632
+ ) -> Node:
633
+ sym_id = make_node_id(
634
+ self._ctx.project_name, qname, NodeKind.EXTERNAL_SYMBOL.value
635
+ )
636
+ if sym_id not in self._graph.nodes:
637
+ self._graph.add_node(
638
+ Node(
639
+ id=sym_id,
640
+ kind=NodeKind.EXTERNAL_SYMBOL,
641
+ qualified_name=qname,
642
+ name=qname.rsplit("\\", maxsplit=1)[-1],
643
+ metadata={"origin": origin},
644
+ )
645
+ )
646
+ return self._graph.nodes[sym_id]
647
+
648
+ def _add_node_with_relation(
649
+ self, node: Node, rel_kind: RelationKind
650
+ ) -> None:
651
+ self._safe_add_node(node)
652
+ self._graph.add_relation(
653
+ Relation(
654
+ source_id=self._container_stack[-1],
655
+ target_id=node.id,
656
+ kind=rel_kind,
657
+ )
658
+ )
659
+
660
+ def _safe_add_node(self, node: Node) -> None:
661
+ if node.id not in self._graph.nodes:
662
+ self._graph.add_node(node)
663
+
664
+ def _make_node( # noqa: PLR0913
665
+ self,
666
+ kind: NodeKind,
667
+ qualified_name: str,
668
+ name: str,
669
+ ts_node: TSNode | None = None,
670
+ metadata: dict[str, object] | None = None,
671
+ name_node: TSNode | None = None,
672
+ ) -> Node:
673
+ md = dict(metadata or {})
674
+ if name_node is not None:
675
+ name_span = _make_span(name_node)
676
+ if name_span is not None: # pragma: no cover - always valid here
677
+ md["name_span"] = name_span
678
+ return Node(
679
+ id=make_node_id(
680
+ self._ctx.project_name, qualified_name, kind.value
681
+ ),
682
+ kind=kind,
683
+ qualified_name=qualified_name,
684
+ name=name,
685
+ file_path=str(self._ctx.file_path),
686
+ span=_make_span(ts_node) if ts_node else None,
687
+ metadata=md,
688
+ )
689
+
690
+ def _lookup_module(self, qname: str) -> str | None:
691
+ r"""
692
+ Return the MODULE id for ``qname`` or its longest namespace prefix.
693
+
694
+ ``App\\Model\\User`` resolves to the ``App\\Model`` namespace MODULE
695
+ even when the ``User`` class is not yet its own node. Uses the shared
696
+ ``modules`` index (O(depth) lookups) rather than scanning the graph.
697
+ """
698
+ parts = qname.split("\\")
699
+ for length in range(len(parts), 0, -1):
700
+ candidate = "\\".join(parts[:length])
701
+ module_id = self._modules.get(candidate)
702
+ if module_id is not None:
703
+ return module_id
704
+ return None
705
+
706
+ def _push(self, qname: str, node_id: str, kind: NodeKind) -> None:
707
+ self._scope_stack.append(qname)
708
+ self._container_stack.append(node_id)
709
+ self._kind_stack.append(kind)
710
+
711
+ def _pop(self) -> None:
712
+ self._scope_stack.pop()
713
+ self._container_stack.pop()
714
+ self._kind_stack.pop()
715
+
716
+
717
+ # ---------------------------------------------------------------------------
718
+ # Module-level helpers
719
+ # ---------------------------------------------------------------------------
720
+
721
+
722
+ def _node_text(node: TSNode) -> str:
723
+ return node.text.decode("utf-8") if node.text is not None else ""
724
+
725
+
726
+ def _name_child(node: TSNode) -> TSNode | None:
727
+ """Return the ``name`` token of a ``variable_name`` (``$x`` → ``x``)."""
728
+ return next((c for c in node.children if c.type == "name"), None)
729
+
730
+
731
+ def _last_name(node: TSNode) -> TSNode | None:
732
+ """Return the last ``name`` leaf of a qualified name."""
733
+ result: TSNode | None = None
734
+ for child in node.children:
735
+ if child.type == "name":
736
+ result = child
737
+ return result
738
+
739
+
740
+ def _callee_name(node: TSNode) -> TSNode | None:
741
+ """Return the name token identifying a called function/class."""
742
+ if node.type == "name":
743
+ return node
744
+ if node.type == "qualified_name":
745
+ return _last_name(node)
746
+ return None
747
+
748
+
749
+ def _object_creation_class(node: TSNode) -> TSNode | None:
750
+ """Return the class node of a ``new X(...)`` expression."""
751
+ for child in node.children:
752
+ if child.type in ("name", "qualified_name"):
753
+ return child
754
+ return None
755
+
756
+
757
+ def _type_refs(node: TSNode) -> list[TSNode]:
758
+ """Collect class-name leaves from a type or heritage clause."""
759
+ out: list[TSNode] = []
760
+ _collect_type_refs(node, out)
761
+ return out
762
+
763
+
764
+ def _collect_type_refs(node: TSNode, out: list[TSNode]) -> None:
765
+ t = node.type
766
+ if t in ("primitive_type", "null", "bottom_type"):
767
+ return
768
+ if t == "qualified_name":
769
+ last = _last_name(node)
770
+ if last is not None: # pragma: no cover - always has a name leaf
771
+ out.append(last)
772
+ return
773
+ if t == "name":
774
+ out.append(node)
775
+ return
776
+ for child in node.children:
777
+ _collect_type_refs(child, out)
778
+
779
+
780
+ def _has_modifier(node: TSNode, modifier_type: str) -> bool:
781
+ return any(c.type == modifier_type for c in node.children)
782
+
783
+
784
+ def _has_abstract(node: TSNode) -> bool:
785
+ return _has_modifier(node, "abstract_modifier")
786
+
787
+
788
+ def _visibility(node: TSNode) -> str:
789
+ for child in node.children:
790
+ if child.type == "visibility_modifier":
791
+ return _node_text(child)
792
+ return "public"
793
+
794
+
795
+ def _make_span(node: TSNode | None) -> Span | None:
796
+ """Convert tree-sitter node positions to a Span (1-based)."""
797
+ if node is None: # pragma: no cover - callers guard against None
798
+ return None
799
+ try:
800
+ sr, sc = node.start_point
801
+ er, ec = node.end_point
802
+ return Span(
803
+ start_line=sr + 1,
804
+ start_col=sc + 1,
805
+ end_line=er + 1,
806
+ end_col=ec + 1,
807
+ )
808
+ except Exception: # pragma: no cover - defensive
809
+ return None