graphlens-php 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.3
2
+ Name: graphlens-php
3
+ Version: 0.7.0
4
+ Summary: PHP language adapter for graphlens
5
+ Requires-Dist: graphlens
6
+ Requires-Dist: tree-sitter>=0.24
7
+ Requires-Dist: tree-sitter-php>=0.23
8
+ Requires-Python: >=3.13
@@ -0,0 +1,38 @@
1
+ [project]
2
+ name = "graphlens-php"
3
+ version = "0.7.0"
4
+ description = "PHP language adapter for graphlens"
5
+ requires-python = ">=3.13"
6
+ dependencies = [
7
+ "graphlens",
8
+ "tree-sitter>=0.24",
9
+ "tree-sitter-php>=0.23",
10
+ ]
11
+
12
+ [build-system]
13
+ requires = ["uv_build>=0.9.18,<0.12.0"]
14
+ build-backend = "uv_build"
15
+
16
+ [tool.uv.sources]
17
+ graphlens = { workspace = true }
18
+
19
+ [project.entry-points."graphlens.adapters"]
20
+ php = "graphlens_php:PhpAdapter"
21
+
22
+ [tool.bandit]
23
+ skips = ["B101", "B404", "B603"]
24
+
25
+ [tool.pytest.ini_options]
26
+ testpaths = ["tests"]
27
+
28
+ [tool.coverage.run]
29
+ source = ["graphlens", "graphlens_php"]
30
+
31
+ [tool.coverage.report]
32
+ fail_under = 100
33
+ show_missing = true
34
+ exclude_lines = [
35
+ "pragma: no cover",
36
+ "if TYPE_CHECKING:",
37
+ "\\.\\.\\.",
38
+ ]
@@ -0,0 +1,9 @@
1
+ """graphlens_php — PHP language adapter for graphlens."""
2
+
3
+ from graphlens_php._adapter import PhpAdapter
4
+ from graphlens_php._resolver import PhpantomResolver
5
+
6
+ __all__ = [
7
+ "PhpAdapter",
8
+ "PhpantomResolver",
9
+ ]
@@ -0,0 +1,427 @@
1
+ """PhpAdapter — orchestrates PHP project analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING
9
+
10
+ from graphlens import (
11
+ RESOLVER_METRICS_KEY,
12
+ RESOLVER_STATUS_KEY,
13
+ AdapterError,
14
+ GraphLens,
15
+ LanguageAdapter,
16
+ Node,
17
+ NodeKind,
18
+ Relation,
19
+ RelationKind,
20
+ ResolverMetrics,
21
+ ResolverStatus,
22
+ )
23
+ from graphlens.utils import SpanIndex, make_node_id
24
+ from graphlens.utils.roots import filter_nested_root_files
25
+
26
+ from graphlens_php._deps import (
27
+ PHP_DEFAULT_DEP_PARSERS,
28
+ get_stdlib_names,
29
+ )
30
+ from graphlens_php._module_resolver import (
31
+ internal_namespace_tops,
32
+ path_to_namespace,
33
+ )
34
+ from graphlens_php._project_detector import (
35
+ EXCLUDED_DIRS,
36
+ detect_project_name,
37
+ find_php_roots,
38
+ is_php_project,
39
+ )
40
+ from graphlens_php._resolver import PhpantomResolver
41
+ from graphlens_php._visitor import (
42
+ ImportClassifier,
43
+ OccurrenceRef,
44
+ PhpASTVisitor,
45
+ VisitorContext,
46
+ extract_namespace,
47
+ parse_php,
48
+ )
49
+
50
+ if TYPE_CHECKING:
51
+ from graphlens.contracts import DependencyFileParser, SymbolResolver
52
+
53
+ logger = logging.getLogger("graphlens_php")
54
+
55
+ _STDLIB = get_stdlib_names()
56
+
57
+ # Role → RelationKind mapping for the resolution pass.
58
+ _ROLE_TO_KIND: dict[str, RelationKind] = {
59
+ "call": RelationKind.CALLS,
60
+ "base": RelationKind.INHERITS_FROM,
61
+ "annotation": RelationKind.HAS_TYPE,
62
+ "read": RelationKind.REFERENCES,
63
+ "write": RelationKind.REFERENCES,
64
+ }
65
+
66
+
67
+ class PhpAdapter(LanguageAdapter):
68
+ """Language adapter for PHP / Composer projects."""
69
+
70
+ def __init__(
71
+ self,
72
+ dep_parsers: list[DependencyFileParser] | None = None,
73
+ resolver: SymbolResolver | None = None,
74
+ ) -> None:
75
+ """
76
+ Initialize the PHP adapter.
77
+
78
+ Args:
79
+ dep_parsers: parsers used to extract Composer vendor prefixes from
80
+ manifest files (composer.json, composer.lock). Pass a custom
81
+ list to support non-standard setups. Defaults to
82
+ ``PHP_DEFAULT_DEP_PARSERS``.
83
+ resolver: symbol resolver used for cross-file resolution of calls,
84
+ references, type uses, and base classes. Defaults to
85
+ ``PhpantomResolver`` (drives the ``phpantom_lsp`` Rust binary;
86
+ degrades to a structure-only graph when it is absent). Inject a
87
+ custom ``SymbolResolver`` subclass to override.
88
+
89
+ """
90
+ self._dep_parsers = (
91
+ dep_parsers
92
+ if dep_parsers is not None
93
+ else PHP_DEFAULT_DEP_PARSERS
94
+ )
95
+ self._resolver = (
96
+ resolver if resolver is not None else PhpantomResolver()
97
+ )
98
+
99
+ def language(self) -> str:
100
+ return "php"
101
+
102
+ def file_extensions(self) -> set[str]:
103
+ return {".php", ".phtml", ".inc"}
104
+
105
+ def can_handle(self, project_root: str | Path) -> bool:
106
+ return is_php_project(Path(project_root))
107
+
108
+ def collect_files(self, project_root: str | Path) -> list[Path]:
109
+ """
110
+ Return all PHP source files under ``project_root``.
111
+
112
+ Overrides the core default to also skip PHP-specific non-source
113
+ directories — most importantly ``vendor/`` (Composer's installed
114
+ third-party tree, PHP's equivalent of ``node_modules``), plus build
115
+ and cache dirs. Without this a real app with dependencies installed
116
+ would index thousands of third-party files as project source.
117
+ """
118
+ root = Path(project_root)
119
+ extensions = self.file_extensions()
120
+ return sorted(
121
+ p
122
+ for p in root.rglob("*")
123
+ if p.is_file()
124
+ and p.suffix in extensions
125
+ and not (EXCLUDED_DIRS & set(p.relative_to(root).parts))
126
+ )
127
+
128
+ def analyze(
129
+ self,
130
+ project_root: str | Path,
131
+ files: list[Path] | None = None,
132
+ *,
133
+ strict: bool = False,
134
+ ) -> GraphLens:
135
+ project_root = Path(project_root).resolve()
136
+ graph = GraphLens()
137
+ statuses: list[ResolverStatus] = []
138
+ metrics = ResolverMetrics()
139
+
140
+ if files is not None:
141
+ metrics.merge(
142
+ _analyze_root(
143
+ graph,
144
+ project_root,
145
+ project_root,
146
+ files,
147
+ self._dep_parsers,
148
+ self._resolver,
149
+ )
150
+ )
151
+ statuses.append(self._resolver.status())
152
+ else:
153
+ php_roots = find_php_roots(project_root)
154
+ for php_root in php_roots:
155
+ root_files = self.collect_files(php_root)
156
+ root_files = filter_nested_root_files(
157
+ root_files,
158
+ php_root,
159
+ php_roots,
160
+ )
161
+ metrics.merge(
162
+ _analyze_root(
163
+ graph,
164
+ project_root,
165
+ php_root,
166
+ root_files,
167
+ self._dep_parsers,
168
+ self._resolver,
169
+ )
170
+ )
171
+ statuses.append(self._resolver.status())
172
+
173
+ status = ResolverStatus.combine(statuses)
174
+ graph.metadata[RESOLVER_STATUS_KEY] = status.value
175
+ graph.metadata[RESOLVER_METRICS_KEY] = metrics.as_dict()
176
+ if strict and status is not ResolverStatus.OK:
177
+ msg = (
178
+ f"PHP resolver status is '{status.value}'; refusing to "
179
+ "return a degraded graph in strict mode"
180
+ )
181
+ raise AdapterError(msg)
182
+ return graph
183
+
184
+
185
+ def _analyze_root( # noqa: PLR0913
186
+ graph: GraphLens,
187
+ project_root: Path,
188
+ php_root: Path,
189
+ files: list[Path],
190
+ dep_parsers: list[DependencyFileParser],
191
+ resolver: SymbolResolver,
192
+ ) -> ResolverMetrics:
193
+ """Analyze one PHP project root and populate graph in-place."""
194
+ project_name = detect_project_name(php_root)
195
+
196
+ classifier = ImportClassifier(
197
+ stdlib=_STDLIB,
198
+ third_party=_collect_third_party(php_root, dep_parsers),
199
+ internal=frozenset(internal_namespace_tops(php_root)),
200
+ )
201
+
202
+ project_id = make_node_id(
203
+ project_name, project_name, NodeKind.PROJECT.value
204
+ )
205
+ if project_id not in graph.nodes:
206
+ graph.add_node(
207
+ Node(
208
+ id=project_id,
209
+ kind=NodeKind.PROJECT,
210
+ qualified_name=project_name,
211
+ name=project_name,
212
+ )
213
+ )
214
+
215
+ modules: dict[str, str] = {}
216
+ all_occurrences: list[tuple[str, OccurrenceRef]] = []
217
+
218
+ for file in files:
219
+ try:
220
+ source_bytes = file.read_bytes()
221
+ except OSError as e:
222
+ logger.warning("Cannot read %s: %s — skipping", file, e)
223
+ continue
224
+
225
+ tree = parse_php(source_bytes)
226
+ if tree.root_node.has_error:
227
+ logger.warning(
228
+ "Parse errors in %s — continuing with partial results", file
229
+ )
230
+
231
+ namespace = extract_namespace(tree.root_node) or path_to_namespace(
232
+ file, php_root
233
+ )
234
+
235
+ try:
236
+ relative_path = str(file.relative_to(project_root))
237
+ except ValueError: # pragma: no cover - unusual monorepo layout
238
+ relative_path = str(file.relative_to(php_root))
239
+
240
+ file_id = make_node_id(
241
+ project_name, relative_path, NodeKind.FILE.value
242
+ )
243
+ if file_id not in graph.nodes:
244
+ graph.add_node(
245
+ Node(
246
+ id=file_id,
247
+ kind=NodeKind.FILE,
248
+ qualified_name=relative_path,
249
+ name=file.name,
250
+ file_path=relative_path,
251
+ )
252
+ )
253
+ container_id = (
254
+ _ensure_module_chain(
255
+ graph, project_name, namespace, modules
256
+ )
257
+ if namespace
258
+ else project_id
259
+ )
260
+ graph.add_relation(
261
+ Relation(
262
+ source_id=container_id,
263
+ target_id=file_id,
264
+ kind=RelationKind.CONTAINS,
265
+ )
266
+ )
267
+
268
+ ctx = VisitorContext(
269
+ project_name=project_name,
270
+ file_path=file,
271
+ namespace=namespace,
272
+ )
273
+ visitor = PhpASTVisitor(
274
+ ctx, graph, file_id, source_bytes, classifier, modules
275
+ )
276
+ visitor.visit(tree.root_node)
277
+ all_occurrences.extend(
278
+ (visitor.abs_file_path, o) for o in visitor.occurrences
279
+ )
280
+
281
+ # Resolution pass: bind occurrences to real nodes or EXTERNAL_SYMBOL.
282
+ span_index = SpanIndex.from_graph(graph)
283
+ resolver.prepare(php_root, files)
284
+ metrics = _resolve_occurrences(
285
+ graph, project_name, resolver, span_index, all_occurrences
286
+ )
287
+
288
+ # PROJECT --CONTAINS--> top-level namespace modules.
289
+ for qname, module_id in modules.items():
290
+ if "\\" not in qname:
291
+ graph.add_relation(
292
+ Relation(
293
+ source_id=project_id,
294
+ target_id=module_id,
295
+ kind=RelationKind.CONTAINS,
296
+ )
297
+ )
298
+ return metrics
299
+
300
+
301
+ def _collect_third_party(
302
+ php_root: Path, dep_parsers: list[DependencyFileParser]
303
+ ) -> frozenset[str]:
304
+ third_party: set[str] = set()
305
+ for parser in dep_parsers:
306
+ if parser.can_parse(php_root):
307
+ third_party.update(parser.parse(php_root))
308
+ return frozenset(third_party)
309
+
310
+
311
+ def _ensure_external_symbol(
312
+ graph: GraphLens, project_name: str, qname: str, origin: str
313
+ ) -> str:
314
+ """Return the id of an EXTERNAL_SYMBOL node for ``qname`` (creates it)."""
315
+ sym_id = make_node_id(
316
+ project_name, qname, NodeKind.EXTERNAL_SYMBOL.value
317
+ )
318
+ if sym_id not in graph.nodes:
319
+ graph.add_node(
320
+ Node(
321
+ id=sym_id,
322
+ kind=NodeKind.EXTERNAL_SYMBOL,
323
+ qualified_name=qname,
324
+ name=qname.rsplit("\\", maxsplit=1)[-1],
325
+ metadata={"origin": origin},
326
+ )
327
+ )
328
+ return sym_id
329
+
330
+
331
+ def _resolve_occurrences(
332
+ graph: GraphLens,
333
+ project_name: str,
334
+ resolver: SymbolResolver,
335
+ span_index: SpanIndex,
336
+ occurrences: list[tuple[str, OccurrenceRef]],
337
+ ) -> ResolverMetrics:
338
+ """Resolve accumulated occurrences and emit edges (batched)."""
339
+ metrics = ResolverMetrics(queries=len(occurrences))
340
+ if not occurrences:
341
+ return metrics
342
+ queries: list[tuple[Path, int, int]] = [
343
+ (Path(p), o.line, o.col) for (p, o) in occurrences
344
+ ]
345
+ start = time.perf_counter()
346
+ refs = resolver.resolve_all(queries)
347
+ metrics.seconds = time.perf_counter() - start
348
+ for (_p, occ), ref in zip(occurrences, refs, strict=True):
349
+ if ref is None:
350
+ metrics.unresolved += 1
351
+ continue
352
+ metrics.resolved += 1
353
+ rel_kind = _ROLE_TO_KIND[occ.role]
354
+ target_id: str | None = None
355
+ if ref.origin == "internal" and ref.file_path is not None:
356
+ target_id = span_index.at(
357
+ str(ref.file_path), ref.line, ref.col
358
+ )
359
+ if target_id is None:
360
+ metrics.external += 1
361
+ fallback_qname = (
362
+ ref.full_name
363
+ if ref.full_name
364
+ else f"{occ.role}@{occ.line}:{occ.col}"
365
+ )
366
+ target_id = _ensure_external_symbol(
367
+ graph, project_name, fallback_qname, ref.origin
368
+ )
369
+ else:
370
+ metrics.internal += 1
371
+ metadata: dict[str, object] = {"span": occ.span}
372
+ if occ.role in ("read", "write"):
373
+ metadata["access"] = occ.role
374
+ graph.add_relation(
375
+ Relation(
376
+ source_id=occ.enclosing_id,
377
+ target_id=target_id,
378
+ kind=rel_kind,
379
+ metadata=metadata,
380
+ )
381
+ )
382
+ return metrics
383
+
384
+
385
+ def _ensure_module_chain(
386
+ graph: GraphLens,
387
+ project_name: str,
388
+ namespace: str,
389
+ modules: dict[str, str],
390
+ ) -> str:
391
+ r"""
392
+ Ensure MODULE nodes exist for the full namespace chain ``A\\B\\C``.
393
+
394
+ Returns the node ID of the leaf namespace module and links parents to
395
+ children via CONTAINS.
396
+ """
397
+ parts = namespace.split("\\")
398
+ parent_id: str | None = None
399
+
400
+ for i in range(1, len(parts) + 1):
401
+ qname = "\\".join(parts[:i])
402
+ if qname not in modules:
403
+ node_id = make_node_id(
404
+ project_name, qname, NodeKind.MODULE.value
405
+ )
406
+ graph.add_node(
407
+ Node(
408
+ id=node_id,
409
+ kind=NodeKind.MODULE,
410
+ qualified_name=qname,
411
+ name=parts[i - 1],
412
+ )
413
+ )
414
+ modules[qname] = node_id
415
+
416
+ if parent_id is not None:
417
+ graph.add_relation(
418
+ Relation(
419
+ source_id=parent_id,
420
+ target_id=node_id,
421
+ kind=RelationKind.CONTAINS,
422
+ )
423
+ )
424
+
425
+ parent_id = modules[qname]
426
+
427
+ return modules[namespace]
@@ -0,0 +1,164 @@
1
+ r"""
2
+ Dependency file parsers for PHP / Composer projects.
3
+
4
+ PHP poses a classification wrinkle: Composer package names (``vendor/pkg``)
5
+ are not the namespaces that ``use`` statements reference. There is no
6
+ reliable manifest-only mapping from ``symfony/console`` to
7
+ ``Symfony\\Component\\Console`` without inspecting the installed package.
8
+
9
+ These parsers therefore return the set of **vendor prefixes** (the part
10
+ before the ``/``, lowercased), which the :class:`ImportClassifier` matches
11
+ against the lowercased top-level segment of an imported namespace. This
12
+ resolves the common case (``Symfony`` ↔ ``symfony/*``, ``Monolog`` ↔
13
+ ``monolog/monolog``, ``Psr`` ↔ ``psr/log``) from the manifest alone; the
14
+ type-aware resolver corrects the rest from the real ``vendor/`` tree when it
15
+ is installed.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ from typing import TYPE_CHECKING
22
+
23
+ from graphlens.contracts import DependencyFileParser
24
+
25
+ if TYPE_CHECKING:
26
+ from pathlib import Path
27
+
28
+
29
+ def _vendor_prefix(package: str) -> str:
30
+ """Return the lowercased vendor segment of a ``vendor/package`` name."""
31
+ if not isinstance(package, str) or "/" not in package:
32
+ return ""
33
+ return package.split("/", maxsplit=1)[0].strip().lower()
34
+
35
+
36
+ class ComposerJsonDepsParser(DependencyFileParser):
37
+ """
38
+ Reads declared dependencies from ``composer.json``.
39
+
40
+ Collects ``require`` and ``require-dev`` so test-only packages (e.g.
41
+ ``phpunit/phpunit``) are classified as ``third_party`` rather than
42
+ ``unknown``. Platform requirements (``php``, ``ext-*``, ``lib-*``) are
43
+ skipped. Returns vendor prefixes (see module docstring).
44
+ """
45
+
46
+ def can_parse(self, project_root: Path) -> bool:
47
+ return (project_root / "composer.json").exists()
48
+
49
+ def parse(self, project_root: Path) -> frozenset[str]:
50
+ path = project_root / "composer.json"
51
+ try:
52
+ data = json.loads(path.read_text(encoding="utf-8"))
53
+ except (OSError, json.JSONDecodeError):
54
+ return frozenset()
55
+ if not isinstance(data, dict):
56
+ return frozenset()
57
+
58
+ vendors: set[str] = set()
59
+ for section in ("require", "require-dev"):
60
+ block = data.get(section)
61
+ if not isinstance(block, dict):
62
+ continue
63
+ for package in block:
64
+ if package in ("php",) or package.startswith(
65
+ ("ext-", "lib-", "php-")
66
+ ):
67
+ continue
68
+ vendor = _vendor_prefix(package)
69
+ if vendor:
70
+ vendors.add(vendor)
71
+ return frozenset(vendors)
72
+
73
+
74
+ class ComposerLockDepsParser(DependencyFileParser):
75
+ """
76
+ Reads resolved packages from ``composer.lock``.
77
+
78
+ Covers both ``packages`` and ``packages-dev`` so transitive dependencies
79
+ that are imported directly still classify as ``third_party``.
80
+ """
81
+
82
+ def can_parse(self, project_root: Path) -> bool:
83
+ return (project_root / "composer.lock").exists()
84
+
85
+ def parse(self, project_root: Path) -> frozenset[str]:
86
+ path = project_root / "composer.lock"
87
+ try:
88
+ data = json.loads(path.read_text(encoding="utf-8"))
89
+ except (OSError, json.JSONDecodeError):
90
+ return frozenset()
91
+ if not isinstance(data, dict):
92
+ return frozenset()
93
+
94
+ vendors: set[str] = set()
95
+ for section in ("packages", "packages-dev"):
96
+ block = data.get(section)
97
+ if not isinstance(block, list):
98
+ continue
99
+ for entry in block:
100
+ if not isinstance(entry, dict):
101
+ continue
102
+ vendor = _vendor_prefix(entry.get("name", ""))
103
+ if vendor:
104
+ vendors.add(vendor)
105
+ return frozenset(vendors)
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Default parser list for PhpAdapter
110
+ # ---------------------------------------------------------------------------
111
+
112
+ PHP_DEFAULT_DEP_PARSERS: list[DependencyFileParser] = [
113
+ ComposerJsonDepsParser(),
114
+ ComposerLockDepsParser(),
115
+ ]
116
+
117
+
118
+ # ---------------------------------------------------------------------------
119
+ # Built-in / "stdlib" names
120
+ # ---------------------------------------------------------------------------
121
+ #
122
+ # PHP has no module-style standard library: built-in functions are global,
123
+ # and built-in classes live in the global namespace. A ``use`` of one of
124
+ # these names (always a single, unqualified segment, e.g. ``use DateTime;``)
125
+ # is therefore the signal we classify as ``stdlib``.
126
+
127
+ _BUILTIN_CLASSES: frozenset[str] = frozenset({
128
+ # Core / SPL
129
+ "stdClass", "Closure", "Generator", "WeakMap", "WeakReference",
130
+ "ArrayObject", "ArrayIterator", "ArrayAccess", "Countable", "Iterator",
131
+ "IteratorAggregate", "Traversable", "Stringable", "JsonSerializable",
132
+ "Serializable", "UnitEnum", "BackedEnum", "SplStack", "SplQueue",
133
+ "SplDoublyLinkedList", "SplFixedArray", "SplObjectStorage",
134
+ "SplPriorityQueue", "SplHeap", "SplMinHeap", "SplMaxHeap",
135
+ "SplFileObject", "SplFileInfo", "SplTempFileObject", "DirectoryIterator",
136
+ "RecursiveIteratorIterator", "RecursiveDirectoryIterator",
137
+ "FilesystemIterator",
138
+ # Exceptions / errors
139
+ "Throwable", "Exception", "Error", "TypeError", "ValueError",
140
+ "ArgumentCountError", "ArithmeticError", "DivisionByZeroError",
141
+ "RuntimeException", "LogicException", "InvalidArgumentException",
142
+ "OutOfRangeException", "OutOfBoundsException", "LengthException",
143
+ "DomainException", "RangeException", "UnexpectedValueException",
144
+ "UnderflowException", "OverflowException", "BadFunctionCallException",
145
+ "BadMethodCallException", "JsonException",
146
+ # Date / time
147
+ "DateTime", "DateTimeImmutable", "DateTimeInterface", "DateInterval",
148
+ "DateTimeZone", "DatePeriod",
149
+ # Reflection
150
+ "ReflectionClass", "ReflectionObject", "ReflectionMethod",
151
+ "ReflectionFunction", "ReflectionProperty", "ReflectionParameter",
152
+ "ReflectionNamedType", "ReflectionEnum", "ReflectionAttribute",
153
+ "ReflectionException", "Attribute",
154
+ # Common extensions bundled with PHP
155
+ "PDO", "PDOStatement", "PDOException", "mysqli", "SQLite3",
156
+ "DOMDocument", "DOMElement", "DOMNode", "SimpleXMLElement",
157
+ "XMLReader", "XMLWriter", "ZipArchive", "finfo", "IntlDateFormatter",
158
+ "NumberFormatter", "Collator", "Locale", "CURLFile", "GMP", "BcMath",
159
+ })
160
+
161
+
162
+ def get_stdlib_names() -> frozenset[str]:
163
+ """Return the set of PHP built-in class names treated as ``stdlib``."""
164
+ return _BUILTIN_CLASSES