graphlens-python 0.2.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  """Python language adapter for graphlens."""
2
2
 
3
3
  from graphlens_python._adapter import PythonAdapter
4
+ from graphlens_python._resolver import TyResolver
4
5
 
5
- __all__ = ["PythonAdapter"]
6
+ __all__ = ["PythonAdapter", "TyResolver"]
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
+ from pathlib import Path
6
7
  from typing import TYPE_CHECKING
7
8
 
8
9
  from graphlens import (
@@ -13,7 +14,8 @@ from graphlens import (
13
14
  Relation,
14
15
  RelationKind,
15
16
  )
16
- from graphlens.utils import make_node_id
17
+ from graphlens.utils import SpanIndex, make_node_id
18
+ from graphlens.utils.roots import filter_nested_root_files
17
19
 
18
20
  from graphlens_python._deps import (
19
21
  PYTHON_DEFAULT_DEP_PARSERS,
@@ -28,22 +30,34 @@ from graphlens_python._project_detector import (
28
30
  find_python_roots,
29
31
  is_python_project,
30
32
  )
33
+ from graphlens_python._resolver import TyResolver
31
34
  from graphlens_python._visitor import (
32
35
  ImportClassifier,
36
+ OccurrenceRef,
33
37
  PythonASTVisitor,
34
38
  VisitorContext,
35
39
  parse_python,
36
40
  )
37
41
 
38
42
  if TYPE_CHECKING:
39
- from pathlib import Path
40
-
41
- from graphlens.contracts import DependencyFileParser
43
+ from graphlens.contracts import DependencyFileParser, SymbolResolver
42
44
 
43
45
  logger = logging.getLogger("graphlens_python")
44
46
 
45
47
  _STDLIB = get_stdlib_names()
46
48
 
49
+ # ---------------------------------------------------------------------------
50
+ # Role → RelationKind mapping
51
+ # ---------------------------------------------------------------------------
52
+
53
+ _ROLE_TO_KIND: dict[str, RelationKind] = {
54
+ "call": RelationKind.CALLS,
55
+ "base": RelationKind.INHERITS_FROM,
56
+ "annotation": RelationKind.HAS_TYPE,
57
+ "read": RelationKind.REFERENCES,
58
+ "write": RelationKind.REFERENCES,
59
+ }
60
+
47
61
 
48
62
  class PythonAdapter(LanguageAdapter):
49
63
  """Language adapter for Python projects."""
@@ -51,6 +65,7 @@ class PythonAdapter(LanguageAdapter):
51
65
  def __init__(
52
66
  self,
53
67
  dep_parsers: list[DependencyFileParser] | None = None,
68
+ resolver: SymbolResolver | None = None,
54
69
  ) -> None:
55
70
  """
56
71
  Initialize the Python adapter.
@@ -62,6 +77,11 @@ class PythonAdapter(LanguageAdapter):
62
77
  non-standard package managers (poetry-only setup,
63
78
  pip-tools, pnpm, etc.).
64
79
  Defaults to ``PYTHON_DEFAULT_DEP_PARSERS``.
80
+ resolver: symbol resolver used for cross-file resolution of
81
+ calls, references, annotations, and base classes.
82
+ Defaults to ``TyResolver`` (requires ``ty`` in PATH).
83
+ Pass ``None`` to disable resolution, or inject a custom
84
+ ``SymbolResolver`` subclass.
65
85
 
66
86
  """
67
87
  self._dep_parsers = (
@@ -69,6 +89,9 @@ class PythonAdapter(LanguageAdapter):
69
89
  if dep_parsers is not None
70
90
  else PYTHON_DEFAULT_DEP_PARSERS
71
91
  )
92
+ self._resolver = (
93
+ resolver if resolver is not None else TyResolver()
94
+ )
72
95
 
73
96
  def language(self) -> str:
74
97
  return "python"
@@ -93,27 +116,36 @@ class PythonAdapter(LanguageAdapter):
93
116
  project_root,
94
117
  files,
95
118
  self._dep_parsers,
119
+ self._resolver,
96
120
  )
97
121
  else:
98
- for py_root in find_python_roots(project_root):
122
+ py_roots = find_python_roots(project_root)
123
+ for py_root in py_roots:
99
124
  root_files = self.collect_files(py_root)
125
+ root_files = filter_nested_root_files(
126
+ root_files,
127
+ py_root,
128
+ py_roots,
129
+ )
100
130
  _analyze_root(
101
131
  graph,
102
132
  project_root,
103
133
  py_root,
104
134
  root_files,
105
135
  self._dep_parsers,
136
+ self._resolver,
106
137
  )
107
138
 
108
139
  return graph
109
140
 
110
141
 
111
- def _analyze_root(
142
+ def _analyze_root( # noqa: PLR0913, PLR0915
112
143
  graph: GraphLens,
113
144
  project_root: Path,
114
145
  py_root: Path,
115
146
  files: list[Path],
116
147
  dep_parsers: list[DependencyFileParser],
148
+ resolver: SymbolResolver,
117
149
  ) -> None:
118
150
  """Analyze one Python project root and populate graph in-place."""
119
151
  project_name = detect_project_name(py_root)
@@ -157,6 +189,7 @@ def _analyze_root(
157
189
  )
158
190
 
159
191
  modules: dict[str, str] = {}
192
+ all_occurrences: list[tuple[str, OccurrenceRef]] = []
160
193
 
161
194
  for file in files:
162
195
  source_root = (
@@ -225,6 +258,16 @@ def _analyze_root(
225
258
  ctx, graph, file_id, source_bytes, classifier
226
259
  )
227
260
  visitor.visit(tree.root_node)
261
+ all_occurrences.extend(
262
+ (visitor.abs_file_path, o) for o in visitor.occurrences
263
+ )
264
+
265
+ # Resolution pass: bind occurrences to real nodes or EXTERNAL_SYMBOL
266
+ span_index = SpanIndex.from_graph(graph)
267
+ resolver.prepare(py_root, files)
268
+ _resolve_occurrences(
269
+ graph, project_name, resolver, span_index, all_occurrences
270
+ )
228
271
 
229
272
  # PROJECT --CONTAINS--> top-level modules
230
273
  top_level = {qn: mid for qn, mid in modules.items() if "." not in qn}
@@ -238,6 +281,107 @@ def _analyze_root(
238
281
  )
239
282
 
240
283
 
284
+ def _ensure_external_symbol(
285
+ graph: GraphLens, project_name: str, qname: str, origin: str
286
+ ) -> str:
287
+ """
288
+ Return the id of an EXTERNAL_SYMBOL node for ``qname``.
289
+
290
+ Creates the node if it does not yet exist in ``graph``.
291
+
292
+ Args:
293
+ graph: the graph to update in-place.
294
+ project_name: used as the namespace for ``make_node_id``.
295
+ qname: fully-qualified name of the external symbol.
296
+ origin: one of ``"stdlib"``, ``"third_party"``, ``"unknown"``,
297
+ or ``"internal"`` (fallback when the module node is absent).
298
+
299
+ Returns:
300
+ The node id of the EXTERNAL_SYMBOL.
301
+
302
+ """
303
+ sym_id = make_node_id(
304
+ project_name, qname, NodeKind.EXTERNAL_SYMBOL.value
305
+ )
306
+ if sym_id not in graph.nodes:
307
+ graph.add_node(
308
+ Node(
309
+ id=sym_id,
310
+ kind=NodeKind.EXTERNAL_SYMBOL,
311
+ qualified_name=qname,
312
+ name=qname.rsplit(".", maxsplit=1)[-1],
313
+ metadata={"origin": origin},
314
+ )
315
+ )
316
+ return sym_id
317
+
318
+
319
+ def _resolve_occurrences(
320
+ graph: GraphLens,
321
+ project_name: str,
322
+ resolver: SymbolResolver,
323
+ span_index: SpanIndex,
324
+ occurrences: list[tuple[str, OccurrenceRef]],
325
+ ) -> None:
326
+ """
327
+ Resolve all accumulated occurrences and emit edges.
328
+
329
+ For each ``(abs_path, occ)`` pair:
330
+
331
+ 1. Ask the resolver for the definition site.
332
+ 2. If the definition is internal, look up the target node id via
333
+ ``span_index.at()``.
334
+ 3. If the node is not found (or origin is external), create/reuse an
335
+ ``EXTERNAL_SYMBOL`` fallback node.
336
+ 4. Emit a ``Relation`` of the appropriate kind, with span metadata
337
+ and, for read/write occurrences, an ``access`` key.
338
+
339
+ Args:
340
+ graph: the graph to update in-place.
341
+ project_name: namespace used for EXTERNAL_SYMBOL node ids.
342
+ resolver: the symbol resolver that was already ``prepare()``d.
343
+ span_index: pre-built index of node spans from ``graph``.
344
+ occurrences: list of ``(absolute_file_path, OccurrenceRef)`` pairs
345
+ collected during the file-visit loop.
346
+
347
+ """
348
+ for abs_path, occ in occurrences:
349
+ rel_kind = _ROLE_TO_KIND[occ.role]
350
+ ref = resolver.definition_at(Path(abs_path), occ.line, occ.col)
351
+ if ref is None:
352
+ continue
353
+ target_id: str | None = None
354
+ if ref.origin == "internal" and ref.file_path is not None:
355
+ target_id = span_index.at(
356
+ str(ref.file_path), ref.line, ref.col
357
+ )
358
+ if target_id is None:
359
+ # When full_name is absent, use a position-qualified key so that
360
+ # distinct unresolved sites don't collapse into the same node.
361
+ fallback_qname = (
362
+ ref.full_name
363
+ if ref.full_name
364
+ else f"{occ.role}@{occ.line}:{occ.col}"
365
+ )
366
+ target_id = _ensure_external_symbol(
367
+ graph,
368
+ project_name,
369
+ fallback_qname,
370
+ ref.origin,
371
+ )
372
+ metadata: dict[str, object] = {"span": occ.span}
373
+ if occ.role in ("read", "write"):
374
+ metadata["access"] = occ.role
375
+ graph.add_relation(
376
+ Relation(
377
+ source_id=occ.enclosing_id,
378
+ target_id=target_id,
379
+ kind=rel_kind,
380
+ metadata=metadata,
381
+ )
382
+ )
383
+
384
+
241
385
  def _find_source_root_for(file: Path, source_roots: list[Path]) -> Path | None:
242
386
  for root in source_roots:
243
387
  try:
@@ -17,7 +17,7 @@ def find_source_roots(project_root: Path, files: list[Path]) -> list[Path]:
17
17
  and any(files)
18
18
  and any(f.is_relative_to(src) for f in files)
19
19
  ):
20
- return [src]
20
+ return [src, project_root]
21
21
  return [project_root]
22
22
 
23
23
 
@@ -6,6 +6,8 @@ import configparser
6
6
  import tomllib
7
7
  from typing import TYPE_CHECKING
8
8
 
9
+ from graphlens.utils import collect_marker_roots
10
+
9
11
  if TYPE_CHECKING:
10
12
  from pathlib import Path
11
13
 
@@ -47,38 +49,21 @@ def find_python_roots(search_root: Path) -> list[Path]:
47
49
  """
48
50
  Find the actual Python project roots within search_root.
49
51
 
50
- If search_root itself has Python markers, returns ``[search_root]``.
51
- Otherwise walks subdirectories for marker files and returns their parent
52
- directories one per distinct Python sub-project. This ensures that
53
- ``detect_project_name`` and source-root resolution use the *correct* root
54
- rather than the monorepo root, giving accurate import mappings.
52
+ Walks for marker files and returns their parent directories — one per
53
+ distinct Python sub-project. A marker at ``search_root`` does not hide
54
+ nested marker roots. This ensures that ``detect_project_name`` and
55
+ source-root resolution use the *correct* root rather than treating the
56
+ whole monorepo as one project.
55
57
 
56
58
  Falls back to ``[search_root]`` when no markers are found anywhere (the
57
59
  directory contains only bare .py scripts with no packaging metadata).
58
60
  """
59
- if _has_python_markers(search_root):
60
- return [search_root]
61
-
62
- roots: list[Path] = []
63
- for marker in PYTHON_MARKERS:
64
- for marker_file in sorted(search_root.rglob(marker)):
65
- rel_parts = marker_file.relative_to(search_root).parts
66
- if _EXCLUDED_DIRS & set(rel_parts):
67
- continue
68
- if marker == "pyproject.toml" and not (
69
- _pyproject_has_project_section(marker_file)
70
- ):
71
- continue
72
- candidate = marker_file.parent
73
- # Skip if already covered by a previously found (ancestor) root
74
- if any(
75
- candidate == r or candidate.is_relative_to(r)
76
- for r in roots
77
- ):
78
- continue
79
- roots.append(candidate)
80
-
81
- return sorted(roots) if roots else [search_root]
61
+ return collect_marker_roots(
62
+ search_root,
63
+ PYTHON_MARKERS,
64
+ excluded_dirs=_EXCLUDED_DIRS,
65
+ marker_filter=_is_valid_python_marker,
66
+ )
82
67
 
83
68
 
84
69
  def detect_project_name(project_root: Path) -> str:
@@ -121,14 +106,17 @@ def _has_python_markers(directory: Path) -> bool:
121
106
  path = directory / marker
122
107
  if not path.exists():
123
108
  continue
124
- if marker == "pyproject.toml":
125
- if _pyproject_has_project_section(path):
126
- return True
127
- else:
109
+ if _is_valid_python_marker(path):
128
110
  return True
129
111
  return False
130
112
 
131
113
 
114
+ def _is_valid_python_marker(path: Path) -> bool:
115
+ if path.name == "pyproject.toml":
116
+ return _pyproject_has_project_section(path)
117
+ return True
118
+
119
+
132
120
  def _pyproject_has_project_section(path: Path) -> bool:
133
121
  try:
134
122
  with path.open("rb") as f: