codemap-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap/__init__.py +7 -0
- codemap/cli/__init__.py +3 -0
- codemap/cli/_common.py +90 -0
- codemap/cli/commands/__init__.py +3 -0
- codemap/cli/commands/callees.py +102 -0
- codemap/cli/commands/callers.py +107 -0
- codemap/cli/commands/config.py +78 -0
- codemap/cli/commands/diagnostics.py +142 -0
- codemap/cli/commands/doctor.py +158 -0
- codemap/cli/commands/get.py +93 -0
- codemap/cli/commands/index.py +725 -0
- codemap/cli/commands/routes.py +104 -0
- codemap/cli/commands/search.py +78 -0
- codemap/cli/commands/trace.py +179 -0
- codemap/cli/main.py +140 -0
- codemap/cli/renderers/__init__.py +3 -0
- codemap/cli/renderers/json.py +32 -0
- codemap/cli/renderers/text.py +24 -0
- codemap/config/__init__.py +31 -0
- codemap/config/loader.py +96 -0
- codemap/config/schema.py +122 -0
- codemap/core/__init__.py +7 -0
- codemap/core/bridge/__init__.py +8 -0
- codemap/core/bridge/base.py +38 -0
- codemap/core/bridge/http_route.py +374 -0
- codemap/core/bridge/python_cross_module.py +120 -0
- codemap/core/bridge/registry.py +117 -0
- codemap/core/graph.py +183 -0
- codemap/core/models.py +299 -0
- codemap/core/store.py +78 -0
- codemap/core/symbol.py +314 -0
- codemap/diagnostics/__init__.py +3 -0
- codemap/diagnostics/exit_codes.py +30 -0
- codemap/diagnostics/logging.py +65 -0
- codemap/diagnostics/progress.py +68 -0
- codemap/indexers/__init__.py +9 -0
- codemap/indexers/_example_lang.py +135 -0
- codemap/indexers/base.py +77 -0
- codemap/indexers/python.py +577 -0
- codemap/indexers/registry.py +104 -0
- codemap/io/__init__.py +8 -0
- codemap/io/atomic.py +97 -0
- codemap/io/base.py +12 -0
- codemap/io/json_store.py +433 -0
- codemap/io/lock.py +87 -0
- codemap/io/manifest.py +90 -0
- codemap/mcp/__init__.py +3 -0
- codemap_core-0.1.0.dist-info/METADATA +480 -0
- codemap_core-0.1.0.dist-info/RECORD +52 -0
- codemap_core-0.1.0.dist-info/WHEEL +4 -0
- codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
- codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0
codemap/indexers/base.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Indexer Protocol — the contract every language / asset indexer implements.
|
|
2
|
+
|
|
3
|
+
All indexers, whether shipped in this repository or installed as third-party
|
|
4
|
+
plugins via the ``codemap.indexers`` entry-point group, register through the
|
|
5
|
+
same protocol with equal standing (ADR-004, ADR-L001).
|
|
6
|
+
|
|
7
|
+
Each indexer is responsible for one file at a time. Exceptions raised by a
|
|
8
|
+
single ``index_file`` call must be caught at the orchestration layer and
|
|
9
|
+
converted into ``Diagnostic`` entries — a single bad file must not abort an
|
|
10
|
+
entire indexing run (ADR-007).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path, PurePosixPath
|
|
17
|
+
from typing import ClassVar, Protocol, runtime_checkable
|
|
18
|
+
|
|
19
|
+
from codemap.core.models import IndexResult
|
|
20
|
+
|
|
21
|
+
__all__ = ["IndexContext", "IndexResult", "Indexer"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class IndexContext:
|
|
26
|
+
"""Per-run context passed to every ``Indexer.index_file`` invocation.
|
|
27
|
+
|
|
28
|
+
Carries information that the indexer may need but should not assume from
|
|
29
|
+
its environment: where the project root is, what language the file was
|
|
30
|
+
detected as, and a few advisory hooks.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
project_root: Path
|
|
34
|
+
relative_path: PurePosixPath
|
|
35
|
+
language: str
|
|
36
|
+
config: dict[str, object] = field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@runtime_checkable
|
|
40
|
+
class Indexer(Protocol):
|
|
41
|
+
"""The required interface for any indexer implementation."""
|
|
42
|
+
|
|
43
|
+
name: ClassVar[str]
|
|
44
|
+
"""Unique short identifier (lowercase, ASCII, no spaces)."""
|
|
45
|
+
|
|
46
|
+
version: ClassVar[str]
|
|
47
|
+
"""Semantic version of the indexer itself (independent of CodeMap)."""
|
|
48
|
+
|
|
49
|
+
file_patterns: ClassVar[list[str]]
|
|
50
|
+
"""Glob patterns the indexer claims (e.g. ``["*.py"]``). Used for
|
|
51
|
+
fast dispatch; the orchestrator still calls :meth:`supports` to confirm."""
|
|
52
|
+
|
|
53
|
+
languages: ClassVar[list[str]]
|
|
54
|
+
"""The set of language tags this indexer can emit. May contain custom
|
|
55
|
+
asset names (e.g. ``"openapi"``); not restricted to programming languages."""
|
|
56
|
+
|
|
57
|
+
def supports(self, path: Path) -> bool:
|
|
58
|
+
"""Return True if this indexer wants to handle ``path``.
|
|
59
|
+
|
|
60
|
+
Called after ``file_patterns`` matches; lets the indexer reject files
|
|
61
|
+
by content (e.g. a YAML file that is *not* an OpenAPI schema).
|
|
62
|
+
"""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
def index_file(
|
|
66
|
+
self,
|
|
67
|
+
path: Path,
|
|
68
|
+
source: bytes,
|
|
69
|
+
ctx: IndexContext,
|
|
70
|
+
) -> IndexResult:
|
|
71
|
+
"""Parse ``source`` and return symbols, edges, routes, diagnostics.
|
|
72
|
+
|
|
73
|
+
``path`` is the absolute on-disk location; ``ctx.relative_path`` is the
|
|
74
|
+
path relative to the project root that should be stored on Symbol /
|
|
75
|
+
Edge / Diagnostic ``.file`` fields for cross-machine portability.
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
"""Python indexer — built on the stdlib ``ast`` module.
|
|
2
|
+
|
|
3
|
+
This is the first real-language indexer (Sprint N-1). It is shipped in the
|
|
4
|
+
main repository because (a) CodeMap is itself written in Python, so we get
|
|
5
|
+
dogfooding for free, and (b) ``ast`` is zero-dependency and trivially
|
|
6
|
+
correct. Per ADR-011, this is an *engineering* choice — Python carries no
|
|
7
|
+
product-level privilege over any other language that ships an indexer
|
|
8
|
+
through the ``codemap.indexers`` entry-point group (ADR-004).
|
|
9
|
+
|
|
10
|
+
Scheme: ``scip-python``. Symbol IDs encode the file path as a chain of
|
|
11
|
+
``namespace`` descriptors and the in-file scope as nested namespace / type
|
|
12
|
+
descriptors with the leaf descriptor matching the symbol kind:
|
|
13
|
+
|
|
14
|
+
* module-level function ``foo`` in ``src/m.py``:
|
|
15
|
+
``scip-python local . . src/m.py/foo().``
|
|
16
|
+
* class ``Bar`` in ``src/m.py``:
|
|
17
|
+
``scip-python local . . src/m.py/Bar#``
|
|
18
|
+
* method ``baz`` on ``Bar``:
|
|
19
|
+
``scip-python local . . src/m.py/Bar#baz().``
|
|
20
|
+
* module-level variable ``BAZ``:
|
|
21
|
+
``scip-python local . . src/m.py/BAZ.``
|
|
22
|
+
|
|
23
|
+
Resolution policy (MVP):
|
|
24
|
+
|
|
25
|
+
* Calls to bare names (``foo()``) resolve to the same-file symbol if one
|
|
26
|
+
exists; otherwise they are dropped (no edge) and a low-confidence
|
|
27
|
+
diagnostic is *not* emitted — bare-name resolution failures are common
|
|
28
|
+
(built-ins, dynamic globals) and would flood diagnostics.
|
|
29
|
+
* Calls through ``self.x.y()`` are ignored at MVP — they require type
|
|
30
|
+
inference. A diagnostic with code ``PY101`` is recorded at debug level.
|
|
31
|
+
* Class inheritance is recorded as ``extends`` edges; bases are kept as
|
|
32
|
+
the raw textual name when the parent cannot be resolved.
|
|
33
|
+
* ``import`` / ``from ... import ...`` produce ``imports`` edges to a
|
|
34
|
+
synthetic module symbol (kind=``module``).
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import ast
|
|
40
|
+
from enum import StrEnum
|
|
41
|
+
from pathlib import Path, PurePosixPath
|
|
42
|
+
from typing import ClassVar, Literal
|
|
43
|
+
|
|
44
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
45
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
46
|
+
from codemap.indexers.base import IndexContext
|
|
47
|
+
|
|
48
|
+
SCHEME = "scip-python"
|
|
49
|
+
LANG = "python"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class PythonIndexer:
|
|
53
|
+
name: ClassVar[str] = "python"
|
|
54
|
+
version: ClassVar[str] = "0.1.0"
|
|
55
|
+
file_patterns: ClassVar[list[str]] = ["*.py", "*.pyi"]
|
|
56
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
57
|
+
|
|
58
|
+
def supports(self, path: Path) -> bool:
|
|
59
|
+
return path.suffix in {".py", ".pyi"}
|
|
60
|
+
|
|
61
|
+
def index_file(
|
|
62
|
+
self,
|
|
63
|
+
path: Path,
|
|
64
|
+
source: bytes,
|
|
65
|
+
ctx: IndexContext,
|
|
66
|
+
) -> IndexResult:
|
|
67
|
+
try:
|
|
68
|
+
text = source.decode("utf-8")
|
|
69
|
+
except UnicodeDecodeError as exc:
|
|
70
|
+
return IndexResult(
|
|
71
|
+
diagnostics=[
|
|
72
|
+
Diagnostic(
|
|
73
|
+
severity="error",
|
|
74
|
+
file=ctx.relative_path,
|
|
75
|
+
code="PY002",
|
|
76
|
+
message=f"not valid UTF-8: {exc}",
|
|
77
|
+
producer=self.name,
|
|
78
|
+
)
|
|
79
|
+
]
|
|
80
|
+
)
|
|
81
|
+
try:
|
|
82
|
+
tree = ast.parse(text, filename=str(ctx.relative_path))
|
|
83
|
+
except SyntaxError as exc:
|
|
84
|
+
line = exc.lineno or 1
|
|
85
|
+
return IndexResult(
|
|
86
|
+
diagnostics=[
|
|
87
|
+
Diagnostic(
|
|
88
|
+
severity="error",
|
|
89
|
+
file=ctx.relative_path,
|
|
90
|
+
range=Range(start_line=line, end_line=line),
|
|
91
|
+
code="PY001",
|
|
92
|
+
message=f"syntax error: {exc.msg}",
|
|
93
|
+
producer=self.name,
|
|
94
|
+
)
|
|
95
|
+
]
|
|
96
|
+
)
|
|
97
|
+
visitor = _Visitor(ctx.relative_path)
|
|
98
|
+
visitor.visit(tree)
|
|
99
|
+
return IndexResult(
|
|
100
|
+
symbols=visitor.symbols,
|
|
101
|
+
edges=visitor.edges,
|
|
102
|
+
diagnostics=visitor.diagnostics,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
# Internals
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class _Visitor(ast.NodeVisitor):
|
|
112
|
+
"""Single-pass AST visitor that builds symbols, edges, and diagnostics."""
|
|
113
|
+
|
|
114
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
115
|
+
self.relative_path = relative_path
|
|
116
|
+
self.symbols: list[Symbol] = []
|
|
117
|
+
self.edges: list[Edge] = []
|
|
118
|
+
self.diagnostics: list[Diagnostic] = []
|
|
119
|
+
|
|
120
|
+
self._scope: list[_Scope] = []
|
|
121
|
+
self._imports: dict[str, str] = {}
|
|
122
|
+
"""Local-name → dotted module path."""
|
|
123
|
+
self._symbol_index: dict[str, SymbolID] = {}
|
|
124
|
+
"""Bare local name → SymbolID, used for same-file call resolution."""
|
|
125
|
+
|
|
126
|
+
# ------------------------------------------------------------- modules
|
|
127
|
+
|
|
128
|
+
def visit_Module(self, node: ast.Module) -> None:
|
|
129
|
+
self.generic_visit(node)
|
|
130
|
+
|
|
131
|
+
# ------------------------------------------------------------ imports
|
|
132
|
+
|
|
133
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
134
|
+
for alias in node.names:
|
|
135
|
+
local = alias.asname or alias.name.split(".")[0]
|
|
136
|
+
self._imports[local] = alias.name
|
|
137
|
+
target = _module_symbol_id(alias.name)
|
|
138
|
+
source = self._enclosing_symbol_id()
|
|
139
|
+
if source is not None:
|
|
140
|
+
self.edges.append(
|
|
141
|
+
Edge(
|
|
142
|
+
source=source,
|
|
143
|
+
target=target,
|
|
144
|
+
kind="imports",
|
|
145
|
+
location=_node_range(node),
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
150
|
+
module = node.module or ""
|
|
151
|
+
for alias in node.names:
|
|
152
|
+
local = alias.asname or alias.name
|
|
153
|
+
dotted = f"{module}.{alias.name}" if module else alias.name
|
|
154
|
+
self._imports[local] = dotted
|
|
155
|
+
target = _module_symbol_id(module or alias.name)
|
|
156
|
+
source = self._enclosing_symbol_id()
|
|
157
|
+
if source is not None:
|
|
158
|
+
self.edges.append(
|
|
159
|
+
Edge(
|
|
160
|
+
source=source,
|
|
161
|
+
target=target,
|
|
162
|
+
kind="imports",
|
|
163
|
+
location=_node_range(node),
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# -------------------------------------------------------- definitions
|
|
168
|
+
|
|
169
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
170
|
+
sid = self._make_id(node.name, _Kind.CLASS)
|
|
171
|
+
sym = Symbol(
|
|
172
|
+
id=sid,
|
|
173
|
+
kind="class",
|
|
174
|
+
language=LANG,
|
|
175
|
+
file=self.relative_path,
|
|
176
|
+
range=_node_range(node),
|
|
177
|
+
doc=ast.get_docstring(node),
|
|
178
|
+
extra={"decorators": _decorator_names(node.decorator_list)}
|
|
179
|
+
if node.decorator_list
|
|
180
|
+
else {},
|
|
181
|
+
)
|
|
182
|
+
self.symbols.append(sym)
|
|
183
|
+
self._record_local_name(node.name, sid)
|
|
184
|
+
|
|
185
|
+
for base in node.bases:
|
|
186
|
+
base_name = _format_attr(base)
|
|
187
|
+
if base_name is None:
|
|
188
|
+
continue
|
|
189
|
+
target = self._resolve_name(base_name)
|
|
190
|
+
self.edges.append(
|
|
191
|
+
Edge(
|
|
192
|
+
source=sid,
|
|
193
|
+
target=target,
|
|
194
|
+
kind="extends",
|
|
195
|
+
location=_node_range(base),
|
|
196
|
+
confidence="high"
|
|
197
|
+
if base_name in self._imports or base_name in self._symbol_index
|
|
198
|
+
else "medium",
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
with self._push_scope(_Scope(name=node.name, kind=_Kind.CLASS)):
|
|
203
|
+
self.generic_visit(node)
|
|
204
|
+
|
|
205
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
206
|
+
self._visit_func(node, is_async=False)
|
|
207
|
+
|
|
208
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
209
|
+
self._visit_func(node, is_async=True)
|
|
210
|
+
|
|
211
|
+
def _visit_func(
|
|
212
|
+
self,
|
|
213
|
+
node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
214
|
+
*,
|
|
215
|
+
is_async: bool,
|
|
216
|
+
) -> None:
|
|
217
|
+
in_class = bool(self._scope) and self._scope[-1].kind is _Kind.CLASS
|
|
218
|
+
kind: Literal["method", "function"] = "method" if in_class else "function"
|
|
219
|
+
sid = self._make_id(node.name, _Kind.METHOD)
|
|
220
|
+
extra: dict[str, object] = {}
|
|
221
|
+
if is_async:
|
|
222
|
+
extra["async"] = True
|
|
223
|
+
if node.decorator_list:
|
|
224
|
+
extra["decorators"] = _decorator_names(node.decorator_list)
|
|
225
|
+
|
|
226
|
+
http_route = _extract_http_route(node.decorator_list)
|
|
227
|
+
if http_route is not None:
|
|
228
|
+
extra["http_route"] = http_route
|
|
229
|
+
http_calls = _extract_http_calls(node.body)
|
|
230
|
+
if http_calls:
|
|
231
|
+
extra["http_calls"] = http_calls
|
|
232
|
+
|
|
233
|
+
sym = Symbol(
|
|
234
|
+
id=sid,
|
|
235
|
+
kind=kind,
|
|
236
|
+
language=LANG,
|
|
237
|
+
file=self.relative_path,
|
|
238
|
+
range=_node_range(node),
|
|
239
|
+
signature=_function_signature(node),
|
|
240
|
+
doc=ast.get_docstring(node),
|
|
241
|
+
extra=extra,
|
|
242
|
+
)
|
|
243
|
+
self.symbols.append(sym)
|
|
244
|
+
self._record_local_name(node.name, sid)
|
|
245
|
+
|
|
246
|
+
with self._push_scope(_Scope(name=node.name, kind=_Kind.METHOD, symbol_id=sid)):
|
|
247
|
+
for child in node.body:
|
|
248
|
+
self.visit(child)
|
|
249
|
+
|
|
250
|
+
def visit_Assign(self, node: ast.Assign) -> None:
|
|
251
|
+
# Only record module / class-level assignments. Function-locals are
|
|
252
|
+
# not exposed as symbols.
|
|
253
|
+
if not self._scope or self._scope[-1].kind is _Kind.CLASS:
|
|
254
|
+
for target in node.targets:
|
|
255
|
+
if isinstance(target, ast.Name):
|
|
256
|
+
sid = self._make_id(target.id, _Kind.TERM)
|
|
257
|
+
sym_kind: Literal["field", "variable"] = "field" if self._scope else "variable"
|
|
258
|
+
sym = Symbol(
|
|
259
|
+
id=sid,
|
|
260
|
+
kind=sym_kind,
|
|
261
|
+
language=LANG,
|
|
262
|
+
file=self.relative_path,
|
|
263
|
+
range=_node_range(node),
|
|
264
|
+
)
|
|
265
|
+
self.symbols.append(sym)
|
|
266
|
+
self._record_local_name(target.id, sid)
|
|
267
|
+
self.generic_visit(node)
|
|
268
|
+
|
|
269
|
+
# -------------------------------------------------------------- calls
|
|
270
|
+
|
|
271
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
272
|
+
target_name = _format_attr(node.func)
|
|
273
|
+
source = self._enclosing_callable_id()
|
|
274
|
+
if source is not None and target_name is not None:
|
|
275
|
+
target = self._resolve_name(target_name)
|
|
276
|
+
self.edges.append(
|
|
277
|
+
Edge(
|
|
278
|
+
source=source,
|
|
279
|
+
target=target,
|
|
280
|
+
kind="calls",
|
|
281
|
+
location=_node_range(node),
|
|
282
|
+
confidence=(
|
|
283
|
+
"high" if target_name.split(".")[0] in self._symbol_index else "medium"
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
)
|
|
287
|
+
self.generic_visit(node)
|
|
288
|
+
|
|
289
|
+
# ----------------------------------------------------------- helpers
|
|
290
|
+
|
|
291
|
+
def _enclosing_symbol_id(self) -> SymbolID | None:
|
|
292
|
+
for scope in reversed(self._scope):
|
|
293
|
+
if scope.symbol_id is not None:
|
|
294
|
+
return scope.symbol_id
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
def _enclosing_callable_id(self) -> SymbolID | None:
|
|
298
|
+
for scope in reversed(self._scope):
|
|
299
|
+
if scope.kind is _Kind.METHOD:
|
|
300
|
+
return scope.symbol_id
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
def _make_id(self, name: str, kind: _Kind) -> SymbolID:
|
|
304
|
+
descriptors = list(_path_namespaces(self.relative_path))
|
|
305
|
+
for scope in self._scope:
|
|
306
|
+
if scope.kind is _Kind.CLASS:
|
|
307
|
+
descriptors.append(Descriptor(name=scope.name, kind=DescriptorKind.TYPE))
|
|
308
|
+
elif scope.kind is _Kind.METHOD:
|
|
309
|
+
# Functions/methods do not open a SymbolID namespace; their
|
|
310
|
+
# body's nested definitions become siblings at the module
|
|
311
|
+
# level. (Closures live as anonymous data — we don't index
|
|
312
|
+
# them in this MVP.)
|
|
313
|
+
pass
|
|
314
|
+
if kind is _Kind.CLASS:
|
|
315
|
+
descriptors.append(Descriptor(name=name, kind=DescriptorKind.TYPE))
|
|
316
|
+
elif kind is _Kind.METHOD:
|
|
317
|
+
descriptors.append(Descriptor(name=name, kind=DescriptorKind.METHOD))
|
|
318
|
+
else:
|
|
319
|
+
descriptors.append(Descriptor(name=name, kind=DescriptorKind.TERM))
|
|
320
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
321
|
+
|
|
322
|
+
def _record_local_name(self, name: str, sid: SymbolID) -> None:
|
|
323
|
+
# Module-level and class-level names are addressable from the same
|
|
324
|
+
# file; function-locals are not.
|
|
325
|
+
if not self._scope or self._scope[-1].kind is _Kind.CLASS:
|
|
326
|
+
self._symbol_index[name] = sid
|
|
327
|
+
|
|
328
|
+
def _resolve_name(self, dotted: str) -> SymbolID:
|
|
329
|
+
head, _, _ = dotted.partition(".")
|
|
330
|
+
if dotted in self._symbol_index:
|
|
331
|
+
return self._symbol_index[dotted]
|
|
332
|
+
if head in self._symbol_index and "." not in dotted:
|
|
333
|
+
return self._symbol_index[head]
|
|
334
|
+
if head in self._imports:
|
|
335
|
+
module = self._imports[head]
|
|
336
|
+
# ``import x.y``: head=x, imports[x]=x.y, dotted may be x.func or x
|
|
337
|
+
if "." in dotted:
|
|
338
|
+
_, _, leaf = dotted.partition(".")
|
|
339
|
+
return _external_symbol_id(module, leaf)
|
|
340
|
+
return _module_symbol_id(module)
|
|
341
|
+
# Bare unresolved name: return a `local`-scheme placeholder so the
|
|
342
|
+
# edge survives but is clearly external. Bridges may later resolve it.
|
|
343
|
+
return SymbolID(
|
|
344
|
+
scheme="local",
|
|
345
|
+
descriptors=(Descriptor(name=dotted, kind=DescriptorKind.TERM),),
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _push_scope(self, scope: _Scope) -> _ScopeContext:
|
|
349
|
+
return _ScopeContext(self._scope, scope)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# ---------------------------------------------------------------------------
|
|
353
|
+
# Scope tracking
|
|
354
|
+
# ---------------------------------------------------------------------------
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class _Kind(StrEnum):
|
|
358
|
+
CLASS = "class"
|
|
359
|
+
METHOD = "method"
|
|
360
|
+
TERM = "term"
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class _Scope:
|
|
364
|
+
def __init__(self, *, name: str, kind: str, symbol_id: SymbolID | None = None) -> None:
|
|
365
|
+
self.name = name
|
|
366
|
+
self.kind = kind
|
|
367
|
+
self.symbol_id = symbol_id
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
class _ScopeContext:
|
|
371
|
+
def __init__(self, stack: list[_Scope], scope: _Scope) -> None:
|
|
372
|
+
self._stack = stack
|
|
373
|
+
self._scope = scope
|
|
374
|
+
|
|
375
|
+
def __enter__(self) -> _Scope:
|
|
376
|
+
self._stack.append(self._scope)
|
|
377
|
+
return self._scope
|
|
378
|
+
|
|
379
|
+
def __exit__(self, *exc: object) -> None:
|
|
380
|
+
self._stack.pop()
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
# ---------------------------------------------------------------------------
|
|
384
|
+
# Pure helpers
|
|
385
|
+
# ---------------------------------------------------------------------------
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
389
|
+
"""Encode the file path as a chain of namespace descriptors.
|
|
390
|
+
|
|
391
|
+
``src/foo/bar.py`` becomes ``src/ foo/ bar.py/``. The trailing component
|
|
392
|
+
keeps its ``.py`` extension to make symbols visually identifiable.
|
|
393
|
+
"""
|
|
394
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _module_symbol_id(dotted_module: str) -> SymbolID:
|
|
398
|
+
"""Synthetic SymbolID for a Python module (used as edge target)."""
|
|
399
|
+
parts = dotted_module.split(".") if dotted_module else ["<root>"]
|
|
400
|
+
descriptors = [Descriptor(name=p, kind=DescriptorKind.NAMESPACE) for p in parts[:-1]]
|
|
401
|
+
descriptors.append(Descriptor(name=parts[-1], kind=DescriptorKind.META))
|
|
402
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _external_symbol_id(dotted_module: str, leaf: str) -> SymbolID:
|
|
406
|
+
"""SymbolID for ``module.name`` references where the module is external."""
|
|
407
|
+
parts = dotted_module.split(".") if dotted_module else []
|
|
408
|
+
descriptors = [Descriptor(name=p, kind=DescriptorKind.NAMESPACE) for p in parts]
|
|
409
|
+
descriptors.append(Descriptor(name=leaf, kind=DescriptorKind.TERM))
|
|
410
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def _node_range(node: ast.AST) -> Range:
|
|
414
|
+
start_line = getattr(node, "lineno", 1) or 1
|
|
415
|
+
end_line = getattr(node, "end_lineno", start_line) or start_line
|
|
416
|
+
start_col = getattr(node, "col_offset", 0) or 0
|
|
417
|
+
end_col = getattr(node, "end_col_offset", 0) or 0
|
|
418
|
+
return Range(
|
|
419
|
+
start_line=start_line,
|
|
420
|
+
start_col=start_col,
|
|
421
|
+
end_line=max(end_line, start_line),
|
|
422
|
+
end_col=end_col,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _function_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
427
|
+
try:
|
|
428
|
+
args_src = ast.unparse(node.args)
|
|
429
|
+
except Exception: # pragma: no cover - defensive
|
|
430
|
+
args_src = "..."
|
|
431
|
+
prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
|
|
432
|
+
return f"{prefix} {node.name}({args_src})"
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _decorator_names(decorators: list[ast.expr]) -> list[str]:
|
|
436
|
+
out: list[str] = []
|
|
437
|
+
for d in decorators:
|
|
438
|
+
name = _format_attr(d.func if isinstance(d, ast.Call) else d)
|
|
439
|
+
if name is not None:
|
|
440
|
+
out.append(name)
|
|
441
|
+
return out
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _format_attr(node: ast.AST) -> str | None:
|
|
445
|
+
"""Render an ``ast.Name`` / ``ast.Attribute`` chain back to dotted form."""
|
|
446
|
+
if isinstance(node, ast.Name):
|
|
447
|
+
return node.id
|
|
448
|
+
if isinstance(node, ast.Attribute):
|
|
449
|
+
head = _format_attr(node.value)
|
|
450
|
+
return f"{head}.{node.attr}" if head else node.attr
|
|
451
|
+
return None
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
# ---------------------------------------------------------------------------
|
|
455
|
+
# HTTP route / call recognition (framework-agnostic, pattern-driven)
|
|
456
|
+
# ---------------------------------------------------------------------------
|
|
457
|
+
|
|
458
|
+
_HTTP_VERB_NAMES = frozenset({"get", "post", "put", "delete", "patch", "head", "options"})
|
|
459
|
+
_HTTP_HIGH_CONF_CLIENTS = frozenset({"requests", "httpx", "aiohttp", "urllib3"})
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _extract_http_route(decorators: list[ast.expr]) -> dict[str, str] | None:
|
|
463
|
+
"""Return the ``http_route`` metadata if any decorator looks like a route.
|
|
464
|
+
|
|
465
|
+
Two patterns are recognised, both framework-neutral:
|
|
466
|
+
|
|
467
|
+
* ``@<obj>.{get,post,put,delete,patch,head,options}("/path")`` — the
|
|
468
|
+
decorator's attribute name is the HTTP method.
|
|
469
|
+
* ``@route("/path", methods=[...])`` or ``@<obj>.route("/path", method=...)``
|
|
470
|
+
— the method defaults to ``GET`` when no kwarg is given.
|
|
471
|
+
|
|
472
|
+
The path must be a literal string; dynamic paths (variables, concatenation)
|
|
473
|
+
cannot be statically recovered and are ignored here — Bridges that need
|
|
474
|
+
them must look in the source file.
|
|
475
|
+
"""
|
|
476
|
+
for dec in decorators:
|
|
477
|
+
if not isinstance(dec, ast.Call):
|
|
478
|
+
continue
|
|
479
|
+
func = dec.func
|
|
480
|
+
|
|
481
|
+
# Pattern 1: <obj>.<verb>("path", ...)
|
|
482
|
+
if isinstance(func, ast.Attribute):
|
|
483
|
+
attr = func.attr.lower()
|
|
484
|
+
if attr in _HTTP_VERB_NAMES:
|
|
485
|
+
path = _first_str_arg(dec)
|
|
486
|
+
if path is not None:
|
|
487
|
+
return {"method": attr.upper(), "path": path}
|
|
488
|
+
|
|
489
|
+
# Pattern 2: route("path", method[s]=...) or <obj>.route("path", ...)
|
|
490
|
+
verb_name: str | None = None
|
|
491
|
+
if isinstance(func, ast.Name) and func.id == "route":
|
|
492
|
+
verb_name = "route"
|
|
493
|
+
elif isinstance(func, ast.Attribute) and func.attr in {"route", "add_url_rule"}:
|
|
494
|
+
verb_name = func.attr
|
|
495
|
+
if verb_name is not None:
|
|
496
|
+
path = _first_str_arg(dec)
|
|
497
|
+
if path is not None:
|
|
498
|
+
method = _method_from_kwargs(dec) or "GET"
|
|
499
|
+
return {"method": method, "path": path}
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _first_str_arg(call: ast.Call) -> str | None:
|
|
504
|
+
"""Return ``call.args[0]`` if it is a string literal, else ``None``."""
|
|
505
|
+
if not call.args:
|
|
506
|
+
return None
|
|
507
|
+
first = call.args[0]
|
|
508
|
+
if isinstance(first, ast.Constant) and isinstance(first.value, str):
|
|
509
|
+
return first.value
|
|
510
|
+
return None
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _method_from_kwargs(call: ast.Call) -> str | None:
|
|
514
|
+
"""Pull the HTTP method out of ``method=...`` or ``methods=[...]`` kwargs."""
|
|
515
|
+
for kw in call.keywords:
|
|
516
|
+
if kw.arg not in {"method", "methods"}:
|
|
517
|
+
continue
|
|
518
|
+
value = kw.value
|
|
519
|
+
if isinstance(value, ast.Constant) and isinstance(value.value, str):
|
|
520
|
+
return value.value.upper()
|
|
521
|
+
if isinstance(value, ast.List | ast.Tuple) and value.elts:
|
|
522
|
+
first = value.elts[0]
|
|
523
|
+
if isinstance(first, ast.Constant) and isinstance(first.value, str):
|
|
524
|
+
return first.value.upper()
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _extract_http_calls(body: list[ast.stmt]) -> list[dict[str, str]]:
|
|
529
|
+
"""Walk ``body`` and collect ``<obj>.<verb>("url", ...)`` HTTP calls.
|
|
530
|
+
|
|
531
|
+
The first positional argument must be a string literal that *looks* like a
|
|
532
|
+
URL — leading ``/`` or ``http(s)://``. Without that guard, harmless calls
|
|
533
|
+
like ``dict.get("key")`` would flood the index.
|
|
534
|
+
|
|
535
|
+
Confidence is ``high`` when the receiver is a well-known HTTP client
|
|
536
|
+
library (``requests`` / ``httpx`` / ``aiohttp`` / ``urllib3``), ``medium``
|
|
537
|
+
otherwise — for those, the receiver name didn't prove the caller meant a
|
|
538
|
+
network request.
|
|
539
|
+
"""
|
|
540
|
+
out: list[dict[str, str]] = []
|
|
541
|
+
for stmt in body:
|
|
542
|
+
for node in ast.walk(stmt):
|
|
543
|
+
if not isinstance(node, ast.Call):
|
|
544
|
+
continue
|
|
545
|
+
func = node.func
|
|
546
|
+
if not isinstance(func, ast.Attribute):
|
|
547
|
+
continue
|
|
548
|
+
verb = func.attr.lower()
|
|
549
|
+
if verb not in _HTTP_VERB_NAMES:
|
|
550
|
+
continue
|
|
551
|
+
url = _first_str_arg(node)
|
|
552
|
+
if url is None or not _looks_like_url(url):
|
|
553
|
+
continue
|
|
554
|
+
receiver_head = _attr_head(func.value)
|
|
555
|
+
confidence = "high" if receiver_head in _HTTP_HIGH_CONF_CLIENTS else "medium"
|
|
556
|
+
out.append(
|
|
557
|
+
{
|
|
558
|
+
"method": verb.upper(),
|
|
559
|
+
"url": url,
|
|
560
|
+
"confidence": confidence,
|
|
561
|
+
}
|
|
562
|
+
)
|
|
563
|
+
return out
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def _looks_like_url(s: str) -> bool:
|
|
567
|
+
return s.startswith("/") or s.startswith(("http://", "https://"))
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def _attr_head(node: ast.AST) -> str | None:
|
|
571
|
+
"""Return the leftmost ``ast.Name.id`` in an attribute chain."""
|
|
572
|
+
while isinstance(node, ast.Attribute):
|
|
573
|
+
node = node.value
|
|
574
|
+
return node.id if isinstance(node, ast.Name) else None
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
__all__ = ["LANG", "SCHEME", "PythonIndexer"]
|