codemap-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap/__init__.py +7 -0
- codemap/cli/__init__.py +3 -0
- codemap/cli/_common.py +90 -0
- codemap/cli/commands/__init__.py +3 -0
- codemap/cli/commands/callees.py +102 -0
- codemap/cli/commands/callers.py +107 -0
- codemap/cli/commands/config.py +78 -0
- codemap/cli/commands/diagnostics.py +142 -0
- codemap/cli/commands/doctor.py +158 -0
- codemap/cli/commands/get.py +93 -0
- codemap/cli/commands/index.py +725 -0
- codemap/cli/commands/routes.py +104 -0
- codemap/cli/commands/search.py +78 -0
- codemap/cli/commands/trace.py +179 -0
- codemap/cli/main.py +140 -0
- codemap/cli/renderers/__init__.py +3 -0
- codemap/cli/renderers/json.py +32 -0
- codemap/cli/renderers/text.py +24 -0
- codemap/config/__init__.py +31 -0
- codemap/config/loader.py +96 -0
- codemap/config/schema.py +122 -0
- codemap/core/__init__.py +7 -0
- codemap/core/bridge/__init__.py +8 -0
- codemap/core/bridge/base.py +38 -0
- codemap/core/bridge/http_route.py +374 -0
- codemap/core/bridge/python_cross_module.py +120 -0
- codemap/core/bridge/registry.py +117 -0
- codemap/core/graph.py +183 -0
- codemap/core/models.py +299 -0
- codemap/core/store.py +78 -0
- codemap/core/symbol.py +314 -0
- codemap/diagnostics/__init__.py +3 -0
- codemap/diagnostics/exit_codes.py +30 -0
- codemap/diagnostics/logging.py +65 -0
- codemap/diagnostics/progress.py +68 -0
- codemap/indexers/__init__.py +9 -0
- codemap/indexers/_example_lang.py +135 -0
- codemap/indexers/base.py +77 -0
- codemap/indexers/python.py +577 -0
- codemap/indexers/registry.py +104 -0
- codemap/io/__init__.py +8 -0
- codemap/io/atomic.py +97 -0
- codemap/io/base.py +12 -0
- codemap/io/json_store.py +433 -0
- codemap/io/lock.py +87 -0
- codemap/io/manifest.py +90 -0
- codemap/mcp/__init__.py +3 -0
- codemap_core-0.1.0.dist-info/METADATA +480 -0
- codemap_core-0.1.0.dist-info/RECORD +52 -0
- codemap_core-0.1.0.dist-info/WHEEL +4 -0
- codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
- codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0
codemap/config/schema.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Configuration schema for `.codemap/config.yaml`.
|
|
2
|
+
|
|
3
|
+
Pydantic models double as the single source of truth: the YAML loader
|
|
4
|
+
validates against them, the CLI renders them, the in-memory ``Config``
|
|
5
|
+
object is what callers ever touch. ``extra="forbid"`` ensures typos and
|
|
6
|
+
deprecated keys surface as errors rather than silently being ignored.
|
|
7
|
+
|
|
8
|
+
Layered loading (cf. ``codemap.config.loader.load_config``):
|
|
9
|
+
|
|
10
|
+
1. Built-in defaults — every field has a sensible value.
|
|
11
|
+
2. User-level file at ``~/.config/codemap/config.yaml`` (overrides defaults).
|
|
12
|
+
3. Project-level file at ``<project>/.codemap/config.yaml`` (overrides user).
|
|
13
|
+
4. CLI flags (overrides everything; not handled here).
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Literal
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
21
|
+
|
|
22
|
+
# 10 MB. Matches the previous hard-coded value in `codemap.cli.commands.index`.
|
|
23
|
+
DEFAULT_MAX_FILE_BYTES = 10 * 1024 * 1024
|
|
24
|
+
|
|
25
|
+
# Directories ignored even when not explicitly listed in the config — they
|
|
26
|
+
# are universally noise and would otherwise dominate the index.
|
|
27
|
+
DEFAULT_PRUNE_DIRS: tuple[str, ...] = (
|
|
28
|
+
".git",
|
|
29
|
+
".hg",
|
|
30
|
+
".svn",
|
|
31
|
+
".codemap",
|
|
32
|
+
".venv",
|
|
33
|
+
"venv",
|
|
34
|
+
"node_modules",
|
|
35
|
+
"__pycache__",
|
|
36
|
+
".mypy_cache",
|
|
37
|
+
".pytest_cache",
|
|
38
|
+
".ruff_cache",
|
|
39
|
+
"dist",
|
|
40
|
+
"build",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class _Base(BaseModel):
|
|
45
|
+
"""Common pydantic config for every config section."""
|
|
46
|
+
|
|
47
|
+
model_config = ConfigDict(
|
|
48
|
+
extra="forbid",
|
|
49
|
+
frozen=False,
|
|
50
|
+
validate_assignment=True,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class StorageConfig(_Base):
|
|
55
|
+
"""Persistence backend configuration."""
|
|
56
|
+
|
|
57
|
+
backend: Literal["json", "sqlite"] = "json"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class IndexConfig(_Base):
|
|
61
|
+
"""File-discovery and parsing limits."""
|
|
62
|
+
|
|
63
|
+
ignore: list[str] = Field(default_factory=list)
|
|
64
|
+
"""Extra glob patterns to exclude during indexing.
|
|
65
|
+
|
|
66
|
+
Patterns are matched with ``fnmatch.fnmatch`` against the project-
|
|
67
|
+
relative POSIX path of each candidate file *and* against each directory
|
|
68
|
+
name during ``os.walk``. Use ``**/dist/**`` for path-anchored
|
|
69
|
+
exclusions and ``*.bak`` for filename matching.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
max_file_bytes: int = Field(default=DEFAULT_MAX_FILE_BYTES, ge=1)
|
|
73
|
+
"""Files larger than this byte count are skipped with a diagnostic."""
|
|
74
|
+
|
|
75
|
+
follow_symlinks: bool = False
|
|
76
|
+
"""If ``True``, ``os.walk`` follows symlinks. Use cautiously — cycles
|
|
77
|
+
are guarded by inode tracking but can still slow indexing significantly."""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class IndexersConfig(_Base):
|
|
81
|
+
"""Which indexer plugins to actually run."""
|
|
82
|
+
|
|
83
|
+
enabled: list[str] | Literal["all"] = "all"
|
|
84
|
+
"""Either ``"all"`` (default) or an explicit list of indexer names.
|
|
85
|
+
|
|
86
|
+
A name in this list that does not match any registered indexer is
|
|
87
|
+
silently ignored — third-party plugins may come and go between
|
|
88
|
+
workstations and we don't want to break indexing on a stale config.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
disabled: list[str] = Field(default_factory=list)
|
|
92
|
+
"""Indexer names to skip even when ``enabled = "all"``.
|
|
93
|
+
|
|
94
|
+
The ``_example_lang`` reference indexer is a typical entry here once
|
|
95
|
+
real indexers are in play."""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class BridgesConfig(_Base):
|
|
99
|
+
"""Which bridge plugins to actually run."""
|
|
100
|
+
|
|
101
|
+
enabled: list[str] | Literal["all"] = "all"
|
|
102
|
+
disabled: list[str] = Field(default_factory=list)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class Config(_Base):
|
|
106
|
+
"""Top-level CodeMap configuration."""
|
|
107
|
+
|
|
108
|
+
storage: StorageConfig = Field(default_factory=StorageConfig)
|
|
109
|
+
index: IndexConfig = Field(default_factory=IndexConfig)
|
|
110
|
+
indexers: IndexersConfig = Field(default_factory=IndexersConfig)
|
|
111
|
+
bridges: BridgesConfig = Field(default_factory=BridgesConfig)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
__all__ = [
|
|
115
|
+
"DEFAULT_MAX_FILE_BYTES",
|
|
116
|
+
"DEFAULT_PRUNE_DIRS",
|
|
117
|
+
"BridgesConfig",
|
|
118
|
+
"Config",
|
|
119
|
+
"IndexConfig",
|
|
120
|
+
"IndexersConfig",
|
|
121
|
+
"StorageConfig",
|
|
122
|
+
]
|
codemap/core/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Pure-business core: data models, SymbolID, graph algorithms, query semantics.
|
|
2
|
+
|
|
3
|
+
Strict dependency rule (ADR-003): `core` MUST NOT import from `cli`, `io`,
|
|
4
|
+
`indexers`, or `mcp`. Protocols defined here are implemented in outer layers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Cross-scheme bridge abstractions.
|
|
2
|
+
|
|
3
|
+
Bridges resolve relationships between symbols of different schemes after all
|
|
4
|
+
indexers have finished. They are language-neutral by construction (ADR-L001):
|
|
5
|
+
bridges operate on Symbol/Edge data, not on language-specific AST.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Bridge Protocol — cross-scheme resolvers (HTTP routes, asset-interface
|
|
2
|
+
aliases, dependency injection wiring, etc.).
|
|
3
|
+
|
|
4
|
+
A bridge runs after every indexer has finished. It inspects the populated
|
|
5
|
+
:class:`codemap.io.base.ReadOnlyStore`, derives new edges/aliases/routes, and
|
|
6
|
+
returns them in a :class:`BridgeResult`. Bridges are pure: the orchestrator,
|
|
7
|
+
not the bridge, decides when and how to persist the output.
|
|
8
|
+
|
|
9
|
+
All bridges — built-in or external — register via the ``codemap.bridges``
|
|
10
|
+
entry-point group on equal footing (ADR-004, ADR-L001).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import ClassVar, Protocol, runtime_checkable
|
|
16
|
+
|
|
17
|
+
from codemap.core.models import BridgeResult
|
|
18
|
+
from codemap.core.store import ReadOnlyStore
|
|
19
|
+
|
|
20
|
+
__all__ = ["Bridge", "BridgeResult"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@runtime_checkable
|
|
24
|
+
class Bridge(Protocol):
|
|
25
|
+
"""The required interface for any bridge implementation."""
|
|
26
|
+
|
|
27
|
+
name: ClassVar[str]
|
|
28
|
+
"""Unique short identifier (lowercase, ASCII, no spaces)."""
|
|
29
|
+
|
|
30
|
+
version: ClassVar[str]
|
|
31
|
+
"""Semantic version of the bridge itself."""
|
|
32
|
+
|
|
33
|
+
requires: ClassVar[list[str]]
|
|
34
|
+
"""Names of bridges that must run before this one (topological order)."""
|
|
35
|
+
|
|
36
|
+
def resolve(self, store: ReadOnlyStore) -> BridgeResult:
|
|
37
|
+
"""Inspect ``store``; emit derived edges, aliases, routes, diagnostics."""
|
|
38
|
+
...
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""HTTP route bridge — language-neutral client ↔ server linker.
|
|
2
|
+
|
|
3
|
+
The bridge knows nothing about specific server frameworks or HTTP client
|
|
4
|
+
libraries. Instead, it relies on a small **metadata convention** that any
|
|
5
|
+
indexer can populate. Whether the source language is Python, TypeScript,
|
|
6
|
+
Go, Rust, or anything else does not matter; the contract is the same.
|
|
7
|
+
|
|
8
|
+
## Metadata convention
|
|
9
|
+
|
|
10
|
+
A server-side handler advertises itself by setting two keys on its
|
|
11
|
+
``Symbol.extra`` dict::
|
|
12
|
+
|
|
13
|
+
"http_route": {
|
|
14
|
+
"method": "GET", # required, case-insensitive
|
|
15
|
+
"path": "/api/user/{id}", # required, with optional {var}s
|
|
16
|
+
"context_path": "/api/v1", # optional, prepended to ``path``
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
A client-side caller advertises every HTTP call it issues::
|
|
20
|
+
|
|
21
|
+
"http_calls": [
|
|
22
|
+
{
|
|
23
|
+
"method": "GET",
|
|
24
|
+
"url": "/api/v1/user/42",
|
|
25
|
+
"confidence": "high", # optional; defaults to "medium"
|
|
26
|
+
},
|
|
27
|
+
...
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
## Resolution output
|
|
31
|
+
|
|
32
|
+
For each unique ``(method, full_path)`` advertised by the server side, the
|
|
33
|
+
bridge mints a synthetic intermediate symbol with the ``scip-route`` scheme
|
|
34
|
+
(e.g. ``scip-route . . . api/GET#`/api/user/{id}`.``). Server handlers and
|
|
35
|
+
client calls both relate to this intermediate via :class:`Alias` and
|
|
36
|
+
:class:`Edge` entries — that way the symbol-store doesn't have to carry
|
|
37
|
+
a direct ``client → server`` edge for every (method, path) combination.
|
|
38
|
+
|
|
39
|
+
The bridge produces three kinds of outputs:
|
|
40
|
+
|
|
41
|
+
* :class:`Route` entries (one per server-advertised route) for
|
|
42
|
+
``routes.json``.
|
|
43
|
+
* :class:`Alias` entries linking the intermediate to its server handlers
|
|
44
|
+
(and, when an exact client URL matches, the client callers).
|
|
45
|
+
* :class:`Edge` entries with ``kind="routes_to"`` (``server_handler →
|
|
46
|
+
route_intermediate``) and ``kind="calls"`` (``client_caller →
|
|
47
|
+
route_intermediate``) so the existing call-graph queries keep working.
|
|
48
|
+
|
|
49
|
+
## Path matching
|
|
50
|
+
|
|
51
|
+
A client ``url`` matches a server ``path`` if they have the same number of
|
|
52
|
+
``/``-separated segments and every static server segment equals the
|
|
53
|
+
corresponding client segment; ``{placeholder}`` segments in the server
|
|
54
|
+
path match any non-empty client segment. Trailing slashes and query
|
|
55
|
+
strings are ignored on the client side.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
from __future__ import annotations
|
|
59
|
+
|
|
60
|
+
from collections import defaultdict
|
|
61
|
+
from collections.abc import Iterable
|
|
62
|
+
from pathlib import PurePosixPath
|
|
63
|
+
from typing import ClassVar
|
|
64
|
+
|
|
65
|
+
from codemap.core.models import (
|
|
66
|
+
Alias,
|
|
67
|
+
BridgeResult,
|
|
68
|
+
Confidence,
|
|
69
|
+
Diagnostic,
|
|
70
|
+
Edge,
|
|
71
|
+
Route,
|
|
72
|
+
)
|
|
73
|
+
from codemap.core.store import ReadOnlyStore
|
|
74
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
75
|
+
|
|
76
|
+
SCHEME = "scip-route"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class HttpRouteBridge:
|
|
80
|
+
name: ClassVar[str] = "http_route"
|
|
81
|
+
version: ClassVar[str] = "0.1.0"
|
|
82
|
+
requires: ClassVar[list[str]] = []
|
|
83
|
+
|
|
84
|
+
def resolve(self, store: ReadOnlyStore) -> BridgeResult:
|
|
85
|
+
server_routes: list[_ServerRoute] = []
|
|
86
|
+
client_calls: list[_ClientCall] = []
|
|
87
|
+
|
|
88
|
+
for sym in store.iter_symbols():
|
|
89
|
+
extra = sym.extra
|
|
90
|
+
server_meta = _read_server_route(extra)
|
|
91
|
+
if server_meta is not None:
|
|
92
|
+
method, path, context = server_meta
|
|
93
|
+
full_path = _join_path(context, path)
|
|
94
|
+
server_routes.append(
|
|
95
|
+
_ServerRoute(
|
|
96
|
+
method=method,
|
|
97
|
+
full_path=full_path,
|
|
98
|
+
symbol_id=sym.id,
|
|
99
|
+
file=sym.file,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
for call in _read_client_calls(extra):
|
|
103
|
+
client_method, url, conf = call
|
|
104
|
+
client_calls.append(
|
|
105
|
+
_ClientCall(
|
|
106
|
+
method=client_method,
|
|
107
|
+
url=url,
|
|
108
|
+
symbol_id=sym.id,
|
|
109
|
+
confidence=conf,
|
|
110
|
+
file=sym.file,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Mint one intermediate symbol per unique (method, full_path)
|
|
115
|
+
intermediates: dict[tuple[str, str], SymbolID] = {}
|
|
116
|
+
for route in server_routes:
|
|
117
|
+
key = (route.method, route.full_path)
|
|
118
|
+
if key not in intermediates:
|
|
119
|
+
intermediates[key] = _route_symbol_id(route.method, route.full_path)
|
|
120
|
+
|
|
121
|
+
routes: list[Route] = []
|
|
122
|
+
aliases: list[Alias] = []
|
|
123
|
+
edges: list[Edge] = []
|
|
124
|
+
diagnostics: list[Diagnostic] = []
|
|
125
|
+
|
|
126
|
+
# Group server handlers per route so we can warn on duplicates.
|
|
127
|
+
handlers: defaultdict[tuple[str, str], list[_ServerRoute]] = defaultdict(list)
|
|
128
|
+
for route in server_routes:
|
|
129
|
+
handlers[(route.method, route.full_path)].append(route)
|
|
130
|
+
|
|
131
|
+
for (method, full_path), group in handlers.items():
|
|
132
|
+
route_sid = intermediates[(method, full_path)]
|
|
133
|
+
routes.append(
|
|
134
|
+
Route(
|
|
135
|
+
method=method,
|
|
136
|
+
path=full_path,
|
|
137
|
+
symbol_id=route_sid,
|
|
138
|
+
context_path=None,
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
aliases.append(
|
|
142
|
+
Alias(
|
|
143
|
+
source=route_sid,
|
|
144
|
+
targets=[handler.symbol_id for handler in group],
|
|
145
|
+
producer=self.name,
|
|
146
|
+
confidence="high",
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
edges.extend(
|
|
150
|
+
Edge(
|
|
151
|
+
source=handler.symbol_id,
|
|
152
|
+
target=route_sid,
|
|
153
|
+
kind="routes_to",
|
|
154
|
+
confidence="high",
|
|
155
|
+
)
|
|
156
|
+
for handler in group
|
|
157
|
+
)
|
|
158
|
+
if len(group) > 1:
|
|
159
|
+
diagnostics.append(
|
|
160
|
+
Diagnostic(
|
|
161
|
+
severity="warning",
|
|
162
|
+
file=group[0].file,
|
|
163
|
+
code="ROUTE001",
|
|
164
|
+
message=(
|
|
165
|
+
f"multiple handlers registered for {method} {full_path}: "
|
|
166
|
+
f"{len(group)} symbols"
|
|
167
|
+
),
|
|
168
|
+
producer=self.name,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Match each client call against the known server routes.
|
|
173
|
+
server_keys = list(intermediates.keys())
|
|
174
|
+
for cc in client_calls:
|
|
175
|
+
matched = _match_route(cc.method, cc.url, server_keys)
|
|
176
|
+
if matched is None:
|
|
177
|
+
if cc.confidence == "high":
|
|
178
|
+
diagnostics.append(
|
|
179
|
+
Diagnostic(
|
|
180
|
+
severity="warning",
|
|
181
|
+
file=cc.file,
|
|
182
|
+
code="ROUTE002",
|
|
183
|
+
message=(
|
|
184
|
+
f"client {cc.method} {cc.url} has no matching "
|
|
185
|
+
f"server route in this index"
|
|
186
|
+
),
|
|
187
|
+
producer=self.name,
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
continue
|
|
191
|
+
route_sid = intermediates[matched]
|
|
192
|
+
edges.append(
|
|
193
|
+
Edge(
|
|
194
|
+
source=cc.symbol_id,
|
|
195
|
+
target=route_sid,
|
|
196
|
+
kind="calls",
|
|
197
|
+
confidence=cc.confidence,
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return BridgeResult(
|
|
202
|
+
edges=edges,
|
|
203
|
+
aliases=aliases,
|
|
204
|
+
routes=routes,
|
|
205
|
+
diagnostics=diagnostics,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
# Metadata adapters
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class _ServerRoute:
|
|
215
|
+
__slots__ = ("file", "full_path", "method", "symbol_id")
|
|
216
|
+
|
|
217
|
+
def __init__(
|
|
218
|
+
self,
|
|
219
|
+
*,
|
|
220
|
+
method: str,
|
|
221
|
+
full_path: str,
|
|
222
|
+
symbol_id: SymbolID,
|
|
223
|
+
file: PurePosixPath,
|
|
224
|
+
) -> None:
|
|
225
|
+
self.method = method
|
|
226
|
+
self.full_path = full_path
|
|
227
|
+
self.symbol_id = symbol_id
|
|
228
|
+
self.file = file
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class _ClientCall:
|
|
232
|
+
__slots__ = ("confidence", "file", "method", "symbol_id", "url")
|
|
233
|
+
|
|
234
|
+
def __init__(
|
|
235
|
+
self,
|
|
236
|
+
*,
|
|
237
|
+
method: str,
|
|
238
|
+
url: str,
|
|
239
|
+
symbol_id: SymbolID,
|
|
240
|
+
confidence: Confidence,
|
|
241
|
+
file: PurePosixPath,
|
|
242
|
+
) -> None:
|
|
243
|
+
self.method = method
|
|
244
|
+
self.url = url
|
|
245
|
+
self.symbol_id = symbol_id
|
|
246
|
+
self.confidence: Confidence = confidence
|
|
247
|
+
self.file = file
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _read_server_route(extra: dict[str, object]) -> tuple[str, str, str] | None:
|
|
251
|
+
raw = extra.get("http_route")
|
|
252
|
+
if not isinstance(raw, dict):
|
|
253
|
+
return None
|
|
254
|
+
method_raw = raw.get("method")
|
|
255
|
+
path_raw = raw.get("path")
|
|
256
|
+
if not isinstance(method_raw, str) or not isinstance(path_raw, str):
|
|
257
|
+
return None
|
|
258
|
+
method = method_raw.strip().upper()
|
|
259
|
+
path = path_raw.strip()
|
|
260
|
+
if not method or not path:
|
|
261
|
+
return None
|
|
262
|
+
ctx_raw = raw.get("context_path")
|
|
263
|
+
context = ctx_raw.strip() if isinstance(ctx_raw, str) else ""
|
|
264
|
+
return method, path, context
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
_VALID_CONFIDENCE = frozenset({"high", "medium", "low"})
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _read_client_calls(
|
|
271
|
+
extra: dict[str, object],
|
|
272
|
+
) -> Iterable[tuple[str, str, Confidence]]:
|
|
273
|
+
raw = extra.get("http_calls")
|
|
274
|
+
if not isinstance(raw, list):
|
|
275
|
+
return
|
|
276
|
+
for item in raw:
|
|
277
|
+
if not isinstance(item, dict):
|
|
278
|
+
continue
|
|
279
|
+
method_raw = item.get("method")
|
|
280
|
+
url_raw = item.get("url")
|
|
281
|
+
if not isinstance(method_raw, str) or not isinstance(url_raw, str):
|
|
282
|
+
continue
|
|
283
|
+
method = method_raw.strip().upper()
|
|
284
|
+
url = url_raw.strip()
|
|
285
|
+
if not method or not url:
|
|
286
|
+
continue
|
|
287
|
+
conf_raw = item.get("confidence")
|
|
288
|
+
if isinstance(conf_raw, str) and conf_raw in _VALID_CONFIDENCE:
|
|
289
|
+
confidence: Confidence = conf_raw # type: ignore[assignment]
|
|
290
|
+
else:
|
|
291
|
+
confidence = "medium"
|
|
292
|
+
yield method, url, confidence
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
# Path manipulation
|
|
297
|
+
# ---------------------------------------------------------------------------
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _join_path(context: str, path: str) -> str:
|
|
301
|
+
"""Concatenate ``context_path`` (optional) with ``path``.
|
|
302
|
+
|
|
303
|
+
Both arguments may or may not include leading / trailing slashes; the
|
|
304
|
+
result is normalised to a single leading ``/`` and no trailing ``/``
|
|
305
|
+
unless the entire result is ``/``.
|
|
306
|
+
"""
|
|
307
|
+
parts: list[str] = []
|
|
308
|
+
for piece in (context, path):
|
|
309
|
+
stripped = piece.strip()
|
|
310
|
+
if not stripped:
|
|
311
|
+
continue
|
|
312
|
+
parts.extend(seg for seg in stripped.split("/") if seg)
|
|
313
|
+
if not parts:
|
|
314
|
+
return "/"
|
|
315
|
+
return "/" + "/".join(parts)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _match_route(
|
|
319
|
+
method: str,
|
|
320
|
+
url: str,
|
|
321
|
+
server_routes: list[tuple[str, str]],
|
|
322
|
+
) -> tuple[str, str] | None:
|
|
323
|
+
"""Return the first ``(method, path)`` from ``server_routes`` that matches.
|
|
324
|
+
|
|
325
|
+
Path-variable segments (``{name}``) on the server side match any
|
|
326
|
+
single non-empty client segment. Query strings on the client URL are
|
|
327
|
+
stripped before comparison. Trailing slashes are normalised.
|
|
328
|
+
"""
|
|
329
|
+
client_path = url.split("?", 1)[0].split("#", 1)[0]
|
|
330
|
+
c_segments = [s for s in client_path.split("/") if s]
|
|
331
|
+
for s_method, s_path in server_routes:
|
|
332
|
+
if s_method != method:
|
|
333
|
+
continue
|
|
334
|
+
s_segments = [s for s in s_path.split("/") if s]
|
|
335
|
+
if len(s_segments) != len(c_segments):
|
|
336
|
+
continue
|
|
337
|
+
ok = True
|
|
338
|
+
for s, c in zip(s_segments, c_segments, strict=True):
|
|
339
|
+
if s.startswith("{") and s.endswith("}") and len(s) >= 2:
|
|
340
|
+
if not c:
|
|
341
|
+
ok = False
|
|
342
|
+
break
|
|
343
|
+
continue
|
|
344
|
+
if s != c:
|
|
345
|
+
ok = False
|
|
346
|
+
break
|
|
347
|
+
if ok:
|
|
348
|
+
return (s_method, s_path)
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# ---------------------------------------------------------------------------
|
|
353
|
+
# Intermediate symbol minting
|
|
354
|
+
# ---------------------------------------------------------------------------
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _route_symbol_id(method: str, path: str) -> SymbolID:
|
|
358
|
+
"""Build the ``scip-route`` intermediate symbol.
|
|
359
|
+
|
|
360
|
+
Shape: ``scip-route . . . api/<METHOD>#`<path>`.``. The path is held
|
|
361
|
+
in a TERM descriptor so it can contain slashes; the parser auto-
|
|
362
|
+
escapes via backticks.
|
|
363
|
+
"""
|
|
364
|
+
return SymbolID(
|
|
365
|
+
scheme=SCHEME,
|
|
366
|
+
descriptors=(
|
|
367
|
+
Descriptor(name="api", kind=DescriptorKind.NAMESPACE),
|
|
368
|
+
Descriptor(name=method, kind=DescriptorKind.TYPE),
|
|
369
|
+
Descriptor(name=path, kind=DescriptorKind.TERM),
|
|
370
|
+
),
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
__all__ = ["SCHEME", "HttpRouteBridge"]
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Cross-module Python call resolver.
|
|
2
|
+
|
|
3
|
+
The Python indexer emits ``calls`` edges whose target is a synthetic
|
|
4
|
+
``scip-python . . . <module-dotted-ns>/<leaf>.`` symbol whenever a name
|
|
5
|
+
came in through an ``import`` statement. That synthetic target rarely
|
|
6
|
+
matches a real on-disk symbol — the same function defined in
|
|
7
|
+
``foo/bar.py`` lives at ``scip-python . . . foo/bar.py/<leaf>().`` (note
|
|
8
|
+
the ``.py`` extension and the method/term suffix).
|
|
9
|
+
|
|
10
|
+
This bridge bridges the two. After all indexers have run it scans the
|
|
11
|
+
edge table, finds calls pointing at unresolved scip-python targets, and
|
|
12
|
+
maps each one to a concrete local symbol whose **leaf name matches** and
|
|
13
|
+
whose **file stem matches the last namespace segment of the synthetic
|
|
14
|
+
target**. Matches are emitted as :class:`Alias` entries so query code
|
|
15
|
+
(``callers`` / ``callees`` / ``trace``) can transparently expand them.
|
|
16
|
+
|
|
17
|
+
The matcher is intentionally simple: it ships as a single-language
|
|
18
|
+
heuristic, not a type system. It accepts only one of three confidence
|
|
19
|
+
levels:
|
|
20
|
+
|
|
21
|
+
* ``high`` — exactly one candidate whose file stem equals the last
|
|
22
|
+
namespace segment of the synthetic target.
|
|
23
|
+
* ``medium`` — exactly one candidate by leaf name alone, when the
|
|
24
|
+
synthetic target has no namespace segments to disambiguate.
|
|
25
|
+
* otherwise — drop the edge (no alias produced).
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
from pathlib import PurePosixPath
|
|
32
|
+
from typing import ClassVar
|
|
33
|
+
|
|
34
|
+
from codemap.core.models import Alias, BridgeResult, Confidence, Symbol
|
|
35
|
+
from codemap.core.store import ReadOnlyStore
|
|
36
|
+
from codemap.core.symbol import DescriptorKind, SymbolID
|
|
37
|
+
|
|
38
|
+
_PYTHON_SCHEME = "scip-python"
|
|
39
|
+
_INDEXABLE_KINDS = frozenset({"function", "method", "class", "field", "variable"})
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class PythonCrossModuleBridge:
|
|
43
|
+
name: ClassVar[str] = "python_cross_module"
|
|
44
|
+
version: ClassVar[str] = "0.1.0"
|
|
45
|
+
requires: ClassVar[list[str]] = []
|
|
46
|
+
|
|
47
|
+
def resolve(self, store: ReadOnlyStore) -> BridgeResult:
|
|
48
|
+
leaf_to_symbols: dict[str, list[Symbol]] = defaultdict(list)
|
|
49
|
+
for sym in store.iter_symbols():
|
|
50
|
+
if sym.language != "python":
|
|
51
|
+
continue
|
|
52
|
+
if sym.kind not in _INDEXABLE_KINDS:
|
|
53
|
+
continue
|
|
54
|
+
if not sym.id.descriptors:
|
|
55
|
+
continue
|
|
56
|
+
leaf = sym.id.descriptors[-1].name
|
|
57
|
+
leaf_to_symbols[leaf].append(sym)
|
|
58
|
+
|
|
59
|
+
aliases: list[Alias] = []
|
|
60
|
+
seen_sources: set[str] = set()
|
|
61
|
+
for edge in store.iter_edges():
|
|
62
|
+
if edge.kind != "calls":
|
|
63
|
+
continue
|
|
64
|
+
target = edge.target
|
|
65
|
+
if target.scheme != _PYTHON_SCHEME:
|
|
66
|
+
continue
|
|
67
|
+
if store.get(target) is not None:
|
|
68
|
+
continue # already a real local symbol
|
|
69
|
+
target_key = str(target)
|
|
70
|
+
if target_key in seen_sources:
|
|
71
|
+
continue
|
|
72
|
+
resolved = _resolve_one(target, leaf_to_symbols)
|
|
73
|
+
if resolved is None:
|
|
74
|
+
continue
|
|
75
|
+
local, confidence = resolved
|
|
76
|
+
aliases.append(
|
|
77
|
+
Alias(
|
|
78
|
+
source=target,
|
|
79
|
+
targets=[local.id],
|
|
80
|
+
producer=self.name,
|
|
81
|
+
confidence=confidence,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
seen_sources.add(target_key)
|
|
85
|
+
return BridgeResult(aliases=aliases)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _resolve_one(
|
|
89
|
+
target: SymbolID,
|
|
90
|
+
leaf_to_symbols: dict[str, list[Symbol]],
|
|
91
|
+
) -> tuple[Symbol, Confidence] | None:
|
|
92
|
+
"""Return ``(local Symbol, confidence)`` or ``None``."""
|
|
93
|
+
if not target.descriptors:
|
|
94
|
+
return None
|
|
95
|
+
last = target.descriptors[-1]
|
|
96
|
+
# Terms / methods are the shapes the indexer's _module_symbol_id and
|
|
97
|
+
# _external_symbol_id helpers actually produce; only those are worth
|
|
98
|
+
# trying to resolve here.
|
|
99
|
+
if last.kind not in (DescriptorKind.TERM, DescriptorKind.METHOD, DescriptorKind.META):
|
|
100
|
+
return None
|
|
101
|
+
leaf = last.name
|
|
102
|
+
candidates = leaf_to_symbols.get(leaf, [])
|
|
103
|
+
if not candidates:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
ns_parts = [d.name for d in target.descriptors[:-1] if d.kind is DescriptorKind.NAMESPACE]
|
|
107
|
+
if ns_parts:
|
|
108
|
+
last_ns = ns_parts[-1]
|
|
109
|
+
matching = [c for c in candidates if PurePosixPath(c.file).stem == last_ns]
|
|
110
|
+
if len(matching) == 1:
|
|
111
|
+
return matching[0], "high"
|
|
112
|
+
# If the namespace chain matches but multiple candidates share the
|
|
113
|
+
# file stem, we don't pick one — the index would lie.
|
|
114
|
+
return None
|
|
115
|
+
if len(candidates) == 1:
|
|
116
|
+
return candidates[0], "medium"
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
__all__ = ["PythonCrossModuleBridge"]
|