graphlens-python 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ """Python language adapter for graphlens."""
2
+
3
+ from graphlens_python._adapter import PythonAdapter
4
+
5
+ __all__ = ["PythonAdapter"]
@@ -0,0 +1,291 @@
1
+ """PythonAdapter — orchestrates Python project analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING
7
+
8
+ from graphlens import (
9
+ GraphLens,
10
+ LanguageAdapter,
11
+ Node,
12
+ NodeKind,
13
+ Relation,
14
+ RelationKind,
15
+ )
16
+ from graphlens.utils import make_node_id
17
+
18
+ from graphlens_python._deps import (
19
+ PYTHON_DEFAULT_DEP_PARSERS,
20
+ get_stdlib_names,
21
+ )
22
+ from graphlens_python._module_resolver import (
23
+ file_to_qualified_name,
24
+ find_source_roots,
25
+ )
26
+ from graphlens_python._project_detector import (
27
+ detect_project_name,
28
+ find_python_roots,
29
+ is_python_project,
30
+ )
31
+ from graphlens_python._visitor import (
32
+ ImportClassifier,
33
+ PythonASTVisitor,
34
+ VisitorContext,
35
+ parse_python,
36
+ )
37
+
38
+ if TYPE_CHECKING:
39
+ from pathlib import Path
40
+
41
+ from graphlens.contracts import DependencyFileParser
42
+
43
+ logger = logging.getLogger("graphlens_python")
44
+
45
+ _STDLIB = get_stdlib_names()
46
+
47
+
48
+ class PythonAdapter(LanguageAdapter):
49
+ """Language adapter for Python projects."""
50
+
51
+ def __init__(
52
+ self,
53
+ dep_parsers: list[DependencyFileParser] | None = None,
54
+ ) -> None:
55
+ """
56
+ Initialize the Python adapter.
57
+
58
+ Args:
59
+ dep_parsers: parsers used to extract third-party dependency
60
+ names from manifest files (pyproject.toml,
61
+ requirements.txt, etc.). Pass a custom list to support
62
+ non-standard package managers (poetry-only setup,
63
+ pip-tools, pnpm, etc.).
64
+ Defaults to ``PYTHON_DEFAULT_DEP_PARSERS``.
65
+
66
+ """
67
+ self._dep_parsers = (
68
+ dep_parsers
69
+ if dep_parsers is not None
70
+ else PYTHON_DEFAULT_DEP_PARSERS
71
+ )
72
+
73
+ def language(self) -> str:
74
+ return "python"
75
+
76
+ def file_extensions(self) -> set[str]:
77
+ return {".py", ".pyi"}
78
+
79
+ def can_handle(self, project_root: Path) -> bool:
80
+ return is_python_project(project_root)
81
+
82
+ def analyze(
83
+ self,
84
+ project_root: Path,
85
+ files: list[Path] | None = None,
86
+ ) -> GraphLens:
87
+ graph = GraphLens()
88
+
89
+ if files is not None:
90
+ _analyze_root(
91
+ graph,
92
+ project_root,
93
+ project_root,
94
+ files,
95
+ self._dep_parsers,
96
+ )
97
+ else:
98
+ for py_root in find_python_roots(project_root):
99
+ root_files = self.collect_files(py_root)
100
+ _analyze_root(
101
+ graph,
102
+ project_root,
103
+ py_root,
104
+ root_files,
105
+ self._dep_parsers,
106
+ )
107
+
108
+ return graph
109
+
110
+
111
+ def _analyze_root(
112
+ graph: GraphLens,
113
+ project_root: Path,
114
+ py_root: Path,
115
+ files: list[Path],
116
+ dep_parsers: list[DependencyFileParser],
117
+ ) -> None:
118
+ """Analyze one Python project root and populate graph in-place."""
119
+ project_name = detect_project_name(py_root)
120
+ source_roots = find_source_roots(py_root, files)
121
+
122
+ # --- Pre-pass: collect all internal module top-level names ---------------
123
+ # Derive module qnames from file paths without parsing source — so the
124
+ # ImportClassifier knows which imports are internal before visiting.
125
+ internal_tops: set[str] = set()
126
+ for f in files:
127
+ sr = _find_source_root_for(f, source_roots) or source_roots[0]
128
+ try:
129
+ qname = file_to_qualified_name(f, sr)
130
+ internal_tops.add(qname.split(".")[0])
131
+ except ValueError:
132
+ pass
133
+
134
+ # --- Third-party: parse dependency manifests ----------------------------
135
+ third_party: set[str] = set()
136
+ for parser in dep_parsers:
137
+ if parser.can_parse(py_root):
138
+ third_party.update(parser.parse(py_root))
139
+
140
+ classifier = ImportClassifier(
141
+ stdlib=_STDLIB,
142
+ third_party=frozenset(third_party),
143
+ internal=frozenset(internal_tops),
144
+ )
145
+
146
+ project_id = make_node_id(
147
+ project_name, project_name, NodeKind.PROJECT.value
148
+ )
149
+ if project_id not in graph.nodes:
150
+ graph.add_node(
151
+ Node(
152
+ id=project_id,
153
+ kind=NodeKind.PROJECT,
154
+ qualified_name=project_name,
155
+ name=project_name,
156
+ )
157
+ )
158
+
159
+ modules: dict[str, str] = {}
160
+
161
+ for file in files:
162
+ source_root = (
163
+ _find_source_root_for(file, source_roots) or source_roots[0]
164
+ )
165
+
166
+ try:
167
+ module_qname = file_to_qualified_name(file, source_root)
168
+ except ValueError:
169
+ logger.warning(
170
+ "Cannot compute qualified name for %s, skipping", file
171
+ )
172
+ continue
173
+
174
+ _ensure_module_chain(graph, project_name, module_qname, modules)
175
+
176
+ # FILE node — path stays relative to the original project_root so that
177
+ # all file paths in a monorepo share the same reference point.
178
+ try:
179
+ relative_path = str(file.relative_to(project_root))
180
+ except ValueError:
181
+ relative_path = str(file.relative_to(py_root))
182
+
183
+ file_id = make_node_id(
184
+ project_name, relative_path, NodeKind.FILE.value
185
+ )
186
+ if file_id not in graph.nodes:
187
+ graph.add_node(
188
+ Node(
189
+ id=file_id,
190
+ kind=NodeKind.FILE,
191
+ qualified_name=relative_path,
192
+ name=file.name,
193
+ file_path=relative_path,
194
+ )
195
+ )
196
+ leaf_module_id = modules[module_qname]
197
+ graph.add_relation(
198
+ Relation(
199
+ source_id=leaf_module_id,
200
+ target_id=file_id,
201
+ kind=RelationKind.CONTAINS,
202
+ )
203
+ )
204
+
205
+ try:
206
+ source_bytes = file.read_bytes()
207
+ except OSError as e:
208
+ logger.warning("Cannot read %s: %s — skipping", file, e)
209
+ continue
210
+
211
+ tree = parse_python(source_bytes)
212
+ if tree.root_node.has_error:
213
+ logger.warning(
214
+ "Parse errors in %s — continuing with partial results",
215
+ file,
216
+ )
217
+
218
+ ctx = VisitorContext(
219
+ project_name=project_name,
220
+ file_path=file,
221
+ source_root=source_root,
222
+ module_qualified_name=module_qname,
223
+ )
224
+ visitor = PythonASTVisitor(
225
+ ctx, graph, file_id, source_bytes, classifier
226
+ )
227
+ visitor.visit(tree.root_node)
228
+
229
+ # PROJECT --CONTAINS--> top-level modules
230
+ top_level = {qn: mid for qn, mid in modules.items() if "." not in qn}
231
+ for module_id in top_level.values():
232
+ graph.add_relation(
233
+ Relation(
234
+ source_id=project_id,
235
+ target_id=module_id,
236
+ kind=RelationKind.CONTAINS,
237
+ )
238
+ )
239
+
240
+
241
+ def _find_source_root_for(file: Path, source_roots: list[Path]) -> Path | None:
242
+ for root in source_roots:
243
+ try:
244
+ file.relative_to(root)
245
+ return root
246
+ except ValueError:
247
+ continue
248
+ return None
249
+
250
+
251
+ def _ensure_module_chain(
252
+ graph: GraphLens,
253
+ project_name: str,
254
+ module_qname: str,
255
+ modules: dict[str, str],
256
+ ) -> str:
257
+ """
258
+ Ensure MODULE nodes exist for the full chain a.b.c.
259
+
260
+ Returns the node ID of the leaf module.
261
+ Creates CONTAINS relations between parent and child modules.
262
+ """
263
+ parts = module_qname.split(".")
264
+ parent_id: str | None = None
265
+
266
+ for i in range(1, len(parts) + 1):
267
+ qname = ".".join(parts[:i])
268
+ if qname not in modules:
269
+ node_id = make_node_id(project_name, qname, NodeKind.MODULE.value)
270
+ graph.add_node(
271
+ Node(
272
+ id=node_id,
273
+ kind=NodeKind.MODULE,
274
+ qualified_name=qname,
275
+ name=parts[i - 1],
276
+ )
277
+ )
278
+ modules[qname] = node_id
279
+
280
+ if parent_id is not None:
281
+ graph.add_relation(
282
+ Relation(
283
+ source_id=parent_id,
284
+ target_id=node_id,
285
+ kind=RelationKind.CONTAINS,
286
+ )
287
+ )
288
+
289
+ parent_id = modules[qname]
290
+
291
+ return modules[module_qname]
@@ -0,0 +1,191 @@
1
+ """Dependency file parsers for Python projects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import configparser
6
+ import re
7
+ import sys
8
+ from typing import TYPE_CHECKING
9
+
10
+ import tomllib
11
+ from graphlens.contracts import DependencyFileParser, normalize_pkg_name
12
+
13
+ if TYPE_CHECKING:
14
+ from pathlib import Path
15
+
16
+
17
+ class PyprojectDepsParser(DependencyFileParser):
18
+ """
19
+ Reads declared dependencies from ``pyproject.toml``.
20
+
21
+ Supports:
22
+
23
+ - PEP 621: ``[project.dependencies]`` and
24
+ ``[project.optional-dependencies]``
25
+ - Poetry: ``[tool.poetry.dependencies]`` and
26
+ ``[tool.poetry.group.*.dependencies]``
27
+
28
+ Dev / test dependency groups are included so that test-only imports
29
+ (e.g. ``pytest``) are classified as ``third_party`` rather than
30
+ ``unknown``.
31
+ """
32
+
33
+ def can_parse(self, project_root: Path) -> bool:
34
+ return (project_root / "pyproject.toml").exists()
35
+
36
+ def parse( # noqa: PLR0912
37
+ self, project_root: Path
38
+ ) -> frozenset[str]:
39
+ path = project_root / "pyproject.toml"
40
+ try:
41
+ with path.open("rb") as f:
42
+ data = tomllib.load(f)
43
+ except (tomllib.TOMLDecodeError, OSError):
44
+ return frozenset()
45
+
46
+ names: set[str] = set()
47
+
48
+ # PEP 621 -------------------------------------------------------
49
+ project = data.get("project", {})
50
+ for dep in project.get("dependencies", []):
51
+ n = normalize_pkg_name(dep)
52
+ if n:
53
+ names.add(n)
54
+ for group_deps in project.get("optional-dependencies", {}).values():
55
+ for dep in group_deps:
56
+ n = normalize_pkg_name(dep)
57
+ if n:
58
+ names.add(n)
59
+
60
+ # Poetry --------------------------------------------------------
61
+ poetry = data.get("tool", {}).get("poetry", {})
62
+ for dep in poetry.get("dependencies", {}):
63
+ n = normalize_pkg_name(dep)
64
+ if n and n != "python":
65
+ names.add(n)
66
+ for dep in poetry.get("dev-dependencies", {}):
67
+ n = normalize_pkg_name(dep)
68
+ if n:
69
+ names.add(n)
70
+ # Poetry dependency groups (poetry >= 1.2)
71
+ for group in poetry.get("group", {}).values():
72
+ for dep in group.get("dependencies", {}):
73
+ n = normalize_pkg_name(dep)
74
+ if n:
75
+ names.add(n)
76
+
77
+ return frozenset(names)
78
+
79
+
80
+ class RequirementsTxtParser(DependencyFileParser):
81
+ """
82
+ Reads ``requirements*.txt`` files in the project root.
83
+
84
+ Handles:
85
+
86
+ - Plain package names: ``requests``
87
+ - Version specifiers: ``requests>=2.28``
88
+ - Extras: ``requests[security]``
89
+ - URL / VCS requirements: skipped (no importable name can be reliably
90
+ extracted)
91
+ - ``-r other.txt`` recursive includes: followed one level deep
92
+ - Inline comments: stripped
93
+ """
94
+
95
+ # Matches lines that are URL or VCS requirements (skip them)
96
+ _SKIP_RE = re.compile(r"^\s*(-r|-c|-e|https?://|git\+|svn\+|hg\+|bzr\+)")
97
+
98
+ def can_parse(self, project_root: Path) -> bool:
99
+ return any(project_root.glob("requirements*.txt"))
100
+
101
+ def parse(self, project_root: Path) -> frozenset[str]:
102
+ names: set[str] = set()
103
+ for req_file in sorted(project_root.glob("requirements*.txt")):
104
+ self._parse_file(req_file, names, project_root)
105
+ return frozenset(names)
106
+
107
+ def _parse_file(self, path: Path, names: set[str], root: Path) -> None:
108
+ try:
109
+ lines = path.read_text(encoding="utf-8").splitlines()
110
+ except OSError:
111
+ return
112
+ for raw in lines:
113
+ line = raw.strip()
114
+ if not line or line.startswith("#"):
115
+ continue
116
+ if self._SKIP_RE.match(line):
117
+ # Follow -r includes one level deep
118
+ if line.startswith("-r"):
119
+ ref = line[2:].strip()
120
+ ref_path = root / ref
121
+ if ref_path.exists():
122
+ self._parse_file(ref_path, names, root)
123
+ continue
124
+ n = normalize_pkg_name(line)
125
+ if n:
126
+ names.add(n)
127
+
128
+
129
+ class SetupCfgDepsParser(DependencyFileParser):
130
+ """Reads ``[options] install_requires`` from ``setup.cfg``."""
131
+
132
+ def can_parse(self, project_root: Path) -> bool:
133
+ return (project_root / "setup.cfg").exists()
134
+
135
+ def parse(self, project_root: Path) -> frozenset[str]:
136
+ path = project_root / "setup.cfg"
137
+ cfg = configparser.ConfigParser()
138
+ try:
139
+ cfg.read(path, encoding="utf-8")
140
+ except (configparser.Error, OSError):
141
+ return frozenset()
142
+
143
+ names: set[str] = set()
144
+ raw = cfg.get("options", "install_requires", fallback="")
145
+ for line in raw.splitlines():
146
+ n = normalize_pkg_name(line)
147
+ if n:
148
+ names.add(n)
149
+ # extras_require
150
+ for section in cfg.sections():
151
+ if section.startswith("options.extras_require"):
152
+ raw_extras = cfg.get(
153
+ section, "install_requires", fallback=""
154
+ )
155
+ for line in raw_extras.splitlines():
156
+ n = normalize_pkg_name(line)
157
+ if n:
158
+ names.add(n)
159
+ return frozenset(names)
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # Default parser list for PythonAdapter
164
+ # ---------------------------------------------------------------------------
165
+
166
+ PYTHON_DEFAULT_DEP_PARSERS: list[DependencyFileParser] = [
167
+ PyprojectDepsParser(),
168
+ RequirementsTxtParser(),
169
+ SetupCfgDepsParser(),
170
+ ]
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # Stdlib set (Python 3.10+)
175
+ # ---------------------------------------------------------------------------
176
+
177
+ def get_stdlib_names() -> frozenset[str]:
178
+ """Return stdlib top-level module names for the running Python."""
179
+ # sys.stdlib_module_names is available from Python 3.10
180
+ stdlib: set[str] = set(getattr(sys, "stdlib_module_names", set()))
181
+ # Add a minimal hardcoded set as fallback for older Pythons
182
+ stdlib.update({
183
+ "abc", "ast", "asyncio", "builtins", "collections", "contextlib",
184
+ "copy", "dataclasses", "datetime", "enum", "functools", "hashlib",
185
+ "importlib", "inspect", "io", "itertools", "json", "logging",
186
+ "math", "operator", "os", "pathlib", "pickle", "re", "shutil",
187
+ "signal", "socket", "string", "struct", "subprocess", "sys",
188
+ "tempfile", "threading", "time", "tomllib", "traceback", "typing",
189
+ "unittest", "urllib", "uuid", "warnings", "weakref",
190
+ })
191
+ return frozenset(stdlib)
@@ -0,0 +1,87 @@
1
+ """Module qualified name resolution and source root detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def find_source_roots(project_root: Path, files: list[Path]) -> list[Path]:
9
+ """
10
+ Detect Python source roots.
11
+
12
+ Checks for src/ layout first. Falls back to project root.
13
+ """
14
+ src = project_root / "src"
15
+ if (
16
+ src.is_dir()
17
+ and any(files)
18
+ and any(f.is_relative_to(src) for f in files)
19
+ ):
20
+ return [src]
21
+ return [project_root]
22
+
23
+
24
+ def file_to_qualified_name(file_path: Path, source_root: Path) -> str:
25
+ """
26
+ Convert a file path to a dotted Python qualified module name.
27
+
28
+ Examples:
29
+ src/mypackage/__init__.py -> "mypackage"
30
+ src/mypackage/utils.py -> "mypackage.utils"
31
+ src/mypackage/sub/__init__.py -> "mypackage.sub"
32
+
33
+ """
34
+ relative = file_path.relative_to(source_root)
35
+ parts = list(relative.parts)
36
+
37
+ # Strip .py / .pyi extension from last part
38
+ stem = Path(parts[-1]).stem
39
+ parts[-1] = stem
40
+
41
+ # For __init__, the module is the package itself (drop __init__)
42
+ if parts[-1] == "__init__":
43
+ parts = parts[:-1]
44
+
45
+ if not parts:
46
+ # Top-level __init__.py with no parent — use the source root name
47
+ return source_root.name
48
+
49
+ return ".".join(parts)
50
+
51
+
52
+ def is_package_init(file_path: Path) -> bool:
53
+ """Return True if the file is __init__.py or __init__.pyi."""
54
+ return file_path.name in ("__init__.py", "__init__.pyi")
55
+
56
+
57
+ def resolve_relative_import(
58
+ current_module_qname: str,
59
+ level: int,
60
+ module: str | None,
61
+ ) -> str:
62
+ """
63
+ Resolve a relative import to an absolute qualified name.
64
+
65
+ Args:
66
+ current_module_qname: e.g. ``'mypackage.sub.mod'`` or
67
+ ``'mypackage'`` (for __init__)
68
+ level: number of leading dots (1 = current package,
69
+ 2 = parent package, etc.)
70
+ module: the module part, e.g. ``'utils'`` in
71
+ ``'from ..utils import x'``. Can be None.
72
+
73
+ The current module's *package* is all parts except the last.
74
+ For level=1: base = current package (drop last part of
75
+ current_module_qname if it's not a package __init__).
76
+ For level=2: go one more level up, etc.
77
+
78
+ """
79
+ parts = current_module_qname.split(".")
80
+ # The package is everything except the module name itself.
81
+ # We go up `level` levels from the current module's package.
82
+ # parts[:-1] = current package. Then go up (level - 1) more.
83
+ base_parts = parts[: max(0, len(parts) - level)]
84
+
85
+ if module:
86
+ return ".".join([*base_parts, module]) if base_parts else module
87
+ return ".".join(base_parts) if base_parts else ""