graphlens-php 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,596 @@
1
+ """
2
+ PHP symbol resolver.
3
+
4
+ ``PhpantomResolver`` is the only resolver: it drives a ``phpantom_lsp --stdio``
5
+ subprocess (PHPantom, a self-contained Rust LSP server — no PHP runtime needed)
6
+ over stdio via :class:`_PhpLspClient`, resolving the project's occurrences at
7
+ thousands of ``textDocument/definition`` per second through the pipelined batch
8
+ path. When the ``phpantom_lsp`` binary is absent it degrades automatically:
9
+ :meth:`PhpantomResolver.status` reports :data:`ResolverStatus.UNAVAILABLE` and
10
+ every query returns ``None``/``[]``, so the structural graph is still produced
11
+ with only the type-aware edges dropped.
12
+
13
+ The resolver never raises: every error returns ``None``/``[]`` so the
14
+ structural graph is always produced, only the type-aware edges degrade.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import contextlib
20
+ import json
21
+ import logging
22
+ import os
23
+ import select
24
+ import shutil
25
+ import subprocess
26
+ import threading
27
+ import time
28
+ from pathlib import Path
29
+ from urllib.parse import unquote
30
+
31
+ from graphlens.contracts import Occurrence, Query, ResolvedRef, SymbolResolver
32
+ from graphlens.status import ResolverStatus
33
+
34
+ logger = logging.getLogger("graphlens_php")
35
+
36
+
37
+ def _uri_to_path(uri: str) -> Path | None:
38
+ """Convert a ``file://`` URI to a ``Path``; None for other schemes."""
39
+ if not uri.startswith("file://"):
40
+ return None
41
+ return Path(unquote(uri[7:]))
42
+
43
+
44
+ class _PhpLspClient: # pragma: no cover - integration transport
45
+ """
46
+ Minimal synchronous LSP JSON-RPC client over stdio.
47
+
48
+ Holds only the JSON-RPC framing, lifecycle, and pipelined batch — no
49
+ PHPantom-specific logic — so the resolver stays separate from the wire
50
+ protocol. The spawn ``argv`` (``phpantom_lsp --stdio``) and ``name`` (used
51
+ only for log messages) are passed in by :class:`PhpantomResolver`.
52
+ """
53
+
54
+ def __init__(
55
+ self, project_root: Path, argv: list[str], name: str = "php-lsp"
56
+ ) -> None:
57
+ self._name = name
58
+ self._proc: subprocess.Popen = subprocess.Popen( # type: ignore[type-arg]
59
+ argv,
60
+ stdin=subprocess.PIPE,
61
+ stdout=subprocess.PIPE,
62
+ stderr=subprocess.DEVNULL,
63
+ cwd=str(project_root),
64
+ )
65
+ self._next_id = 0
66
+ self._opened_uris: set[str] = set()
67
+ # Serialize stdin writes: the pipelined batch runs a writer thread
68
+ # while the main thread may still write MethodNotFound replies, so
69
+ # two threads can race on stdin and interleave a frame's bytes.
70
+ self._write_lock = threading.Lock()
71
+ self._initialize(project_root)
72
+
73
+ # ------------------------------------------------------------------
74
+ # Transport
75
+ # ------------------------------------------------------------------
76
+
77
+ def _write(self, msg: dict) -> None: # type: ignore[type-arg]
78
+ if self._proc.stdin is None or self._proc.poll() is not None:
79
+ return
80
+ body = json.dumps(msg, separators=(",", ":")).encode()
81
+ header = f"Content-Length: {len(body)}\r\n\r\n".encode()
82
+ try:
83
+ with self._write_lock:
84
+ self._proc.stdin.write(header + body)
85
+ self._proc.stdin.flush()
86
+ except OSError:
87
+ pass
88
+
89
+ def _read_frame(self) -> dict | None: # type: ignore[type-arg]
90
+ """Read one LSP frame from stdout (caller guarantees data is ready)."""
91
+ content_length = 0
92
+ try:
93
+ while True:
94
+ raw = self._proc.stdout.readline() # type: ignore[union-attr]
95
+ if not raw:
96
+ return None # EOF — server exited
97
+ stripped = raw.strip()
98
+ if not stripped:
99
+ break # blank line ends LSP headers
100
+ if stripped.lower().startswith(b"content-length:"):
101
+ content_length = int(stripped.split(b":", 1)[1].strip())
102
+ if not content_length:
103
+ return {}
104
+ body = self._proc.stdout.read(content_length) # type: ignore[union-attr]
105
+ return json.loads(body) if body else {}
106
+ except (OSError, ValueError, json.JSONDecodeError) as exc:
107
+ logger.debug("%s read error: %s", self._name, exc)
108
+ return None
109
+
110
+ def _read_one(self, timeout: float = 30.0) -> dict | None: # type: ignore[type-arg]
111
+ if self._proc.stdout is None or self._proc.poll() is not None:
112
+ return None
113
+ ready, _, _ = select.select([self._proc.stdout], [], [], timeout)
114
+ if not ready:
115
+ logger.warning("%s timed out after %.0fs", self._name, timeout)
116
+ return None
117
+ return self._read_frame()
118
+
119
+ def _drain_until_quiet(
120
+ self,
121
+ quiet: float = 1.0,
122
+ budget: float = 120.0,
123
+ writer: threading.Thread | None = None,
124
+ ) -> None:
125
+ """
126
+ Drain server notifications until the stream goes quiet.
127
+
128
+ Reads and discards server-initiated messages until none arrives for
129
+ ``quiet`` seconds (or ``budget`` elapses). PHPantom builds its
130
+ cross-file index asynchronously after ``didOpen``
131
+ and emits a burst of ``window/logMessage`` / ``publishDiagnostics``
132
+ notifications while it works, with no explicit "index ready" signal.
133
+ Definition queries issued before that burst settles resolve to null,
134
+ so the batch path drains the burst first.
135
+
136
+ ``writer`` is an optional thread still feeding stdin (the ``didOpen``
137
+ sender). While it is alive a lull does **not** end the drain: we must
138
+ keep reading so phpantom's stdout never fills and blocks it — which
139
+ would deadlock the writer against our own pending write. Quiet only
140
+ counts once the writer has finished.
141
+ """
142
+ if self._proc.stdout is None or self._proc.poll() is not None:
143
+ return
144
+ deadline = time.monotonic() + budget
145
+ while time.monotonic() < deadline:
146
+ ready, _, _ = select.select([self._proc.stdout], [], [], quiet)
147
+ if not ready:
148
+ if writer is not None and writer.is_alive():
149
+ continue # opener still sending — keep draining its output
150
+ return # silence → index settled
151
+ msg = self._read_frame()
152
+ if msg is None:
153
+ return # EOF
154
+ # Answer any server→client request so it is not left pending.
155
+ mid = msg.get("id")
156
+ if "method" in msg and mid is not None:
157
+ self._write(
158
+ {
159
+ "jsonrpc": "2.0",
160
+ "id": mid,
161
+ "error": {
162
+ "code": -32601,
163
+ "message": "Method not found",
164
+ },
165
+ }
166
+ )
167
+
168
+ def _recv_response(
169
+ self, expected_id: int, timeout: float = 30.0
170
+ ) -> dict | None: # type: ignore[type-arg]
171
+ for _ in range(500): # cap to prevent accidental infinite loop
172
+ msg = self._read_one(timeout=timeout)
173
+ if msg is None:
174
+ return None
175
+ msg_id = msg.get("id")
176
+ if "method" in msg:
177
+ if msg_id is not None:
178
+ self._write(
179
+ {
180
+ "jsonrpc": "2.0",
181
+ "id": msg_id,
182
+ "error": {
183
+ "code": -32601,
184
+ "message": "Method not found",
185
+ },
186
+ }
187
+ )
188
+ continue
189
+ if msg_id == expected_id:
190
+ return msg
191
+ logger.warning(
192
+ "%s did not respond to request %d", self._name, expected_id
193
+ )
194
+ return None
195
+
196
+ def _request(
197
+ self, method: str, params: object, timeout: float = 30.0
198
+ ) -> dict | None: # type: ignore[type-arg]
199
+ self._next_id += 1
200
+ mid = self._next_id
201
+ self._write(
202
+ {"jsonrpc": "2.0", "id": mid, "method": method, "params": params}
203
+ )
204
+ return self._recv_response(mid, timeout=timeout)
205
+
206
+ def _notify(self, method: str, params: object) -> None:
207
+ self._write({"jsonrpc": "2.0", "method": method, "params": params})
208
+
209
+ # ------------------------------------------------------------------
210
+ # LSP lifecycle
211
+ # ------------------------------------------------------------------
212
+
213
+ def _initialize(self, project_root: Path) -> None:
214
+ resp = self._request(
215
+ "initialize",
216
+ {
217
+ "processId": os.getpid(),
218
+ "rootUri": project_root.as_uri(),
219
+ "capabilities": {
220
+ "textDocument": {
221
+ "definition": {"dynamicRegistration": False},
222
+ "references": {"dynamicRegistration": False},
223
+ },
224
+ },
225
+ "workspaceFolders": [
226
+ {"uri": project_root.as_uri(), "name": project_root.name},
227
+ ],
228
+ },
229
+ timeout=60.0,
230
+ )
231
+ if resp is not None:
232
+ self._notify("initialized", {})
233
+
234
+ # ------------------------------------------------------------------
235
+ # File management
236
+ # ------------------------------------------------------------------
237
+
238
+ def open_file(self, file: Path) -> str:
239
+ uri = file.as_uri()
240
+ if uri not in self._opened_uris:
241
+ self._opened_uris.add(uri)
242
+ try:
243
+ text = file.read_text(encoding="utf-8", errors="replace")
244
+ except OSError:
245
+ text = ""
246
+ self._notify(
247
+ "textDocument/didOpen",
248
+ {
249
+ "textDocument": {
250
+ "uri": uri,
251
+ "languageId": "php",
252
+ "version": 1,
253
+ "text": text,
254
+ },
255
+ },
256
+ )
257
+ return uri
258
+
259
+ def _build_open_messages(self, files: list[Path]) -> list[dict]: # type: ignore[type-arg]
260
+ """
261
+ Build (don't send) a ``didOpen`` for every not-yet-opened file.
262
+
263
+ Marks each opened. Kept separate from sending so the batch path can
264
+ write these from a
265
+ writer thread: a ``didOpen`` carries the file's full text, so a whole
266
+ project is megabytes — writing it inline would block on a full stdin
267
+ pipe while phpantom blocks on a full stdout pipe (its diagnostics),
268
+ deadlocking both. The caller writes these concurrently with a reader.
269
+ """
270
+ msgs: list[dict] = [] # type: ignore[type-arg]
271
+ for file in files:
272
+ uri = file.as_uri()
273
+ if uri in self._opened_uris:
274
+ continue
275
+ self._opened_uris.add(uri)
276
+ try:
277
+ text = file.read_text(encoding="utf-8", errors="replace")
278
+ except OSError:
279
+ text = ""
280
+ msgs.append(
281
+ {
282
+ "jsonrpc": "2.0",
283
+ "method": "textDocument/didOpen",
284
+ "params": {
285
+ "textDocument": {
286
+ "uri": uri,
287
+ "languageId": "php",
288
+ "version": 1,
289
+ "text": text,
290
+ },
291
+ },
292
+ }
293
+ )
294
+ return msgs
295
+
296
+ def _write_all(self, msgs: list[dict]) -> None: # type: ignore[type-arg]
297
+ """Write every message in order (run on a writer thread)."""
298
+ for msg in msgs:
299
+ self._write(msg)
300
+
301
+ # ------------------------------------------------------------------
302
+ # Queries
303
+ # ------------------------------------------------------------------
304
+
305
+ def definition(self, file: Path, line: int, col: int) -> dict | None: # type: ignore[type-arg]
306
+ uri = self.open_file(file)
307
+ resp = self._request(
308
+ "textDocument/definition",
309
+ {
310
+ "textDocument": {"uri": uri},
311
+ "position": {"line": line - 1, "character": col - 1},
312
+ },
313
+ timeout=30.0,
314
+ )
315
+ if resp is None:
316
+ return None
317
+ return self._first_location(resp.get("result"))
318
+
319
+ @staticmethod
320
+ def _first_location(result: object) -> dict | None: # type: ignore[type-arg]
321
+ """Reduce an LSP definition result to a single Location or None."""
322
+ if not result:
323
+ return None
324
+ if isinstance(result, list):
325
+ return result[0] if result else None
326
+ return result # type: ignore[return-value]
327
+
328
+ def definition_batch(self, queries: list[Query]) -> list[dict | None]: # type: ignore[type-arg]
329
+ """
330
+ Resolve many positions in one pipelined exchange.
331
+
332
+ Two phases, each writing from a writer thread while the main thread
333
+ reads concurrently so a full stdin/stdout pipe can never deadlock:
334
+ (1) send ``didOpen`` for every file and drain the indexer until it
335
+ settles, then (2) send every ``textDocument/definition`` request up
336
+ front and collect responses by JSON-RPC id. Order is preserved;
337
+ unanswered positions stay ``None``. This turns N blocking round-trips
338
+ into one pipelined stream — the entire point of the batch path, and
339
+ what lets PHPantom resolve a whole project's occurrences per second.
340
+ """
341
+ if not queries:
342
+ return []
343
+ results: list[dict | None] = [None] * len(queries)
344
+ if self._proc.poll() is not None:
345
+ return results
346
+ # Phase 1 — open every file, then let the async indexer settle.
347
+ # didOpen carries full file text (megabytes for a big project), so it
348
+ # is written from a writer thread while we drain phpantom's stdout
349
+ # concurrently; otherwise both pipes fill and deadlock. The drain does
350
+ # not treat a lull as "settled" until the opener has finished.
351
+ open_msgs = self._build_open_messages([f for (f, _l, _c) in queries])
352
+ if open_msgs:
353
+ opener = threading.Thread(
354
+ target=self._write_all, args=(open_msgs,), daemon=True
355
+ )
356
+ opener.start()
357
+ self._drain_until_quiet(writer=opener)
358
+ opener.join(timeout=5)
359
+ if self._proc.poll() is not None:
360
+ return results
361
+ # Phase 2 — pipelined definition batch: write requests from a writer
362
+ # thread while we collect responses by id, so a full pipe can't
363
+ # deadlock against our reads.
364
+ id2idx: dict[int, int] = {}
365
+ reqs: list[dict] = [] # type: ignore[type-arg]
366
+ for k, (file, line, col) in enumerate(queries):
367
+ self._next_id += 1
368
+ mid = self._next_id
369
+ id2idx[mid] = k
370
+ reqs.append(
371
+ {
372
+ "jsonrpc": "2.0",
373
+ "id": mid,
374
+ "method": "textDocument/definition",
375
+ "params": {
376
+ "textDocument": {"uri": file.as_uri()},
377
+ "position": {
378
+ "line": line - 1,
379
+ "character": col - 1,
380
+ },
381
+ },
382
+ }
383
+ )
384
+
385
+ writer = threading.Thread(
386
+ target=self._write_all, args=(reqs,), daemon=True
387
+ )
388
+ writer.start()
389
+ got = 0
390
+ while got < len(queries):
391
+ msg = self._read_one(timeout=60.0)
392
+ if msg is None:
393
+ break
394
+ mid = msg.get("id")
395
+ if "method" in msg:
396
+ if mid is not None:
397
+ self._write(
398
+ {
399
+ "jsonrpc": "2.0",
400
+ "id": mid,
401
+ "error": {
402
+ "code": -32601,
403
+ "message": "Method not found",
404
+ },
405
+ }
406
+ )
407
+ continue
408
+ idx = id2idx.get(mid) if mid is not None else None
409
+ if idx is not None:
410
+ results[idx] = self._first_location(msg.get("result"))
411
+ got += 1
412
+ writer.join(timeout=5)
413
+ return results
414
+
415
+ def references(self, file: Path, line: int, col: int) -> list[dict]: # type: ignore[type-arg]
416
+ uri = self.open_file(file)
417
+ resp = self._request(
418
+ "textDocument/references",
419
+ {
420
+ "textDocument": {"uri": uri},
421
+ "position": {"line": line - 1, "character": col - 1},
422
+ "context": {"includeDeclaration": False},
423
+ },
424
+ timeout=30.0,
425
+ )
426
+ if resp is None:
427
+ return []
428
+ result = resp.get("result")
429
+ return result if isinstance(result, list) else []
430
+
431
+ # ------------------------------------------------------------------
432
+ # Cleanup
433
+ # ------------------------------------------------------------------
434
+
435
+ def shutdown(self) -> None:
436
+ if self._proc.poll() is not None:
437
+ return
438
+ try:
439
+ self._request("shutdown", None)
440
+ self._notify("exit", None)
441
+ self._proc.wait(timeout=5)
442
+ except Exception:
443
+ with contextlib.suppress(Exception):
444
+ self._proc.kill()
445
+
446
+ def __del__(self) -> None:
447
+ with contextlib.suppress(Exception):
448
+ self.shutdown()
449
+
450
+
451
+ class PhpantomResolver(SymbolResolver):
452
+ """
453
+ Resolve PHP symbols via a ``phpantom_lsp --stdio`` subprocess.
454
+
455
+ PHPantom is a self-contained Rust LSP server — no PHP runtime required —
456
+ and resolves ``textDocument/definition`` at thousands of queries per
457
+ second through the pipelined batch path. Point ``$GRAPHLENS_PHPANTOM`` at
458
+ the binary, or have ``phpantom_lsp`` / ``phpantom`` on ``PATH``.
459
+
460
+ Spawns one server per :meth:`prepare` call via :class:`_PhpLspClient`. If
461
+ the server cannot be started, :meth:`prepare` logs a warning and all
462
+ queries return ``None``/``[]`` — the structural graph is still produced.
463
+ ``infer_type_at`` always returns ``None``.
464
+ """
465
+
466
+ _engine = "phpantom"
467
+
468
+ def __init__(self) -> None:
469
+ self._client: _PhpLspClient | None = None
470
+ self._root: Path | None = None
471
+
472
+ def _spawn_argv(self) -> list[str]:
473
+ binary = (
474
+ os.environ.get("GRAPHLENS_PHPANTOM")
475
+ or shutil.which("phpantom_lsp")
476
+ or shutil.which("phpantom")
477
+ or "phpantom_lsp"
478
+ )
479
+ return [binary, "--stdio"]
480
+
481
+ def prepare(self, project_root: Path, files: list[Path]) -> None: # noqa: ARG002
482
+ if self._client is not None:
483
+ with contextlib.suppress(Exception):
484
+ self._client.shutdown()
485
+ self._client = None
486
+ self._root = project_root
487
+ try:
488
+ self._client = _PhpLspClient(
489
+ project_root, self._spawn_argv(), name=self._engine
490
+ )
491
+ except Exception:
492
+ logger.warning(
493
+ "Failed to start %s for %s", self._engine, project_root
494
+ )
495
+ self._client = None
496
+
497
+ def definition_at(
498
+ self, file: Path, line: int, col: int
499
+ ) -> ResolvedRef | None:
500
+ if self._client is None:
501
+ return None
502
+ try:
503
+ loc = self._client.definition(file, line, col)
504
+ except Exception:
505
+ return None
506
+ if loc is None:
507
+ return None
508
+ return self._loc_to_ref(loc)
509
+
510
+ def resolve_all(self, queries: list[Query]) -> list[ResolvedRef | None]:
511
+ """
512
+ Resolve every occurrence in one pipelined LSP exchange.
513
+
514
+ Overrides the ``SymbolResolver`` contract's default per-query loop: on
515
+ a large project the resolution pass issues one query per occurrence
516
+ (hundreds of thousands on a big monorepo), and a blocking round-trip
517
+ each would dominate analysis. Batching writes them all up front and
518
+ reads responses by id, so the cost collapses to the server's
519
+ throughput instead of the sum of per-request latencies.
520
+ """
521
+ if self._client is None:
522
+ return [None] * len(queries)
523
+ try:
524
+ locs = self._client.definition_batch(queries)
525
+ except Exception:
526
+ return [None] * len(queries)
527
+ return [
528
+ self._loc_to_ref(loc) if loc is not None else None for loc in locs
529
+ ]
530
+
531
+ def infer_type_at(
532
+ self, file: Path, line: int, col: int # noqa: ARG002
533
+ ) -> ResolvedRef | None:
534
+ return None
535
+
536
+ def status(self) -> ResolverStatus:
537
+ return (
538
+ ResolverStatus.OK
539
+ if self._client is not None
540
+ else ResolverStatus.UNAVAILABLE
541
+ )
542
+
543
+ def references_to(
544
+ self, file: Path, line: int, col: int
545
+ ) -> list[Occurrence]:
546
+ if self._client is None:
547
+ return []
548
+ try:
549
+ locs = self._client.references(file, line, col)
550
+ except Exception:
551
+ return []
552
+ out: list[Occurrence] = []
553
+ for loc in locs:
554
+ fp = _uri_to_path(loc.get("uri", ""))
555
+ if fp is None:
556
+ continue
557
+ start = loc.get("range", {}).get("start", {})
558
+ out.append(
559
+ Occurrence(
560
+ file_path=fp,
561
+ line=start.get("line", 0) + 1,
562
+ col=start.get("character", 0) + 1,
563
+ is_definition=False,
564
+ access="unknown",
565
+ )
566
+ )
567
+ return out
568
+
569
+ def _loc_to_ref(self, loc: dict) -> ResolvedRef: # type: ignore[type-arg]
570
+ fp = _uri_to_path(loc.get("uri", ""))
571
+ start = loc.get("range", {}).get("start", {})
572
+ return ResolvedRef(
573
+ full_name="",
574
+ file_path=fp,
575
+ line=start.get("line", 0) + 1,
576
+ col=start.get("character", 0) + 1,
577
+ kind="",
578
+ origin=self._classify(fp),
579
+ )
580
+
581
+ def _classify(self, file_path: Path | None) -> str:
582
+ if file_path is None:
583
+ return "stdlib"
584
+ parts = file_path.parts
585
+ if "vendor" in parts:
586
+ return "third_party"
587
+ if self._root is not None:
588
+ with contextlib.suppress(ValueError):
589
+ file_path.relative_to(self._root)
590
+ return "internal"
591
+ return "unknown"
592
+
593
+ def __del__(self) -> None:
594
+ if self._client is not None:
595
+ with contextlib.suppress(Exception):
596
+ self._client.shutdown()