codemap-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. codemap/__init__.py +7 -0
  2. codemap/cli/__init__.py +3 -0
  3. codemap/cli/_common.py +90 -0
  4. codemap/cli/commands/__init__.py +3 -0
  5. codemap/cli/commands/callees.py +102 -0
  6. codemap/cli/commands/callers.py +107 -0
  7. codemap/cli/commands/config.py +78 -0
  8. codemap/cli/commands/diagnostics.py +142 -0
  9. codemap/cli/commands/doctor.py +158 -0
  10. codemap/cli/commands/get.py +93 -0
  11. codemap/cli/commands/index.py +725 -0
  12. codemap/cli/commands/routes.py +104 -0
  13. codemap/cli/commands/search.py +78 -0
  14. codemap/cli/commands/trace.py +179 -0
  15. codemap/cli/main.py +140 -0
  16. codemap/cli/renderers/__init__.py +3 -0
  17. codemap/cli/renderers/json.py +32 -0
  18. codemap/cli/renderers/text.py +24 -0
  19. codemap/config/__init__.py +31 -0
  20. codemap/config/loader.py +96 -0
  21. codemap/config/schema.py +122 -0
  22. codemap/core/__init__.py +7 -0
  23. codemap/core/bridge/__init__.py +8 -0
  24. codemap/core/bridge/base.py +38 -0
  25. codemap/core/bridge/http_route.py +374 -0
  26. codemap/core/bridge/python_cross_module.py +120 -0
  27. codemap/core/bridge/registry.py +117 -0
  28. codemap/core/graph.py +183 -0
  29. codemap/core/models.py +299 -0
  30. codemap/core/store.py +78 -0
  31. codemap/core/symbol.py +314 -0
  32. codemap/diagnostics/__init__.py +3 -0
  33. codemap/diagnostics/exit_codes.py +30 -0
  34. codemap/diagnostics/logging.py +65 -0
  35. codemap/diagnostics/progress.py +68 -0
  36. codemap/indexers/__init__.py +9 -0
  37. codemap/indexers/_example_lang.py +135 -0
  38. codemap/indexers/base.py +77 -0
  39. codemap/indexers/python.py +577 -0
  40. codemap/indexers/registry.py +104 -0
  41. codemap/io/__init__.py +8 -0
  42. codemap/io/atomic.py +97 -0
  43. codemap/io/base.py +12 -0
  44. codemap/io/json_store.py +433 -0
  45. codemap/io/lock.py +87 -0
  46. codemap/io/manifest.py +90 -0
  47. codemap/mcp/__init__.py +3 -0
  48. codemap_core-0.1.0.dist-info/METADATA +480 -0
  49. codemap_core-0.1.0.dist-info/RECORD +52 -0
  50. codemap_core-0.1.0.dist-info/WHEEL +4 -0
  51. codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
  52. codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,122 @@
1
+ """Configuration schema for `.codemap/config.yaml`.
2
+
3
+ Pydantic models double as the single source of truth: the YAML loader
4
+ validates against them, the CLI renders them, the in-memory ``Config``
5
+ object is what callers ever touch. ``extra="forbid"`` ensures typos and
6
+ deprecated keys surface as errors rather than silently being ignored.
7
+
8
+ Layered loading (cf. ``codemap.config.loader.load_config``):
9
+
10
+ 1. Built-in defaults — every field has a sensible value.
11
+ 2. User-level file at ``~/.config/codemap/config.yaml`` (overrides defaults).
12
+ 3. Project-level file at ``<project>/.codemap/config.yaml`` (overrides user).
13
+ 4. CLI flags (overrides everything; not handled here).
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Literal
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field
21
+
22
+ # 10 MB. Matches the previous hard-coded value in `codemap.cli.commands.index`.
23
+ DEFAULT_MAX_FILE_BYTES = 10 * 1024 * 1024
24
+
25
+ # Directories ignored even when not explicitly listed in the config — they
26
+ # are universally noise and would otherwise dominate the index.
27
+ DEFAULT_PRUNE_DIRS: tuple[str, ...] = (
28
+ ".git",
29
+ ".hg",
30
+ ".svn",
31
+ ".codemap",
32
+ ".venv",
33
+ "venv",
34
+ "node_modules",
35
+ "__pycache__",
36
+ ".mypy_cache",
37
+ ".pytest_cache",
38
+ ".ruff_cache",
39
+ "dist",
40
+ "build",
41
+ )
42
+
43
+
44
+ class _Base(BaseModel):
45
+ """Common pydantic config for every config section."""
46
+
47
+ model_config = ConfigDict(
48
+ extra="forbid",
49
+ frozen=False,
50
+ validate_assignment=True,
51
+ )
52
+
53
+
54
+ class StorageConfig(_Base):
55
+ """Persistence backend configuration."""
56
+
57
+ backend: Literal["json", "sqlite"] = "json"
58
+
59
+
60
+ class IndexConfig(_Base):
61
+ """File-discovery and parsing limits."""
62
+
63
+ ignore: list[str] = Field(default_factory=list)
64
+ """Extra glob patterns to exclude during indexing.
65
+
66
+ Patterns are matched with ``fnmatch.fnmatch`` against the project-
67
+ relative POSIX path of each candidate file *and* against each directory
68
+ name during ``os.walk``. Use ``**/dist/**`` for path-anchored
69
+ exclusions and ``*.bak`` for filename matching.
70
+ """
71
+
72
+ max_file_bytes: int = Field(default=DEFAULT_MAX_FILE_BYTES, ge=1)
73
+ """Files larger than this byte count are skipped with a diagnostic."""
74
+
75
+ follow_symlinks: bool = False
76
+ """If ``True``, ``os.walk`` follows symlinks. Use cautiously — cycles
77
+ are guarded by inode tracking but can still slow indexing significantly."""
78
+
79
+
80
+ class IndexersConfig(_Base):
81
+ """Which indexer plugins to actually run."""
82
+
83
+ enabled: list[str] | Literal["all"] = "all"
84
+ """Either ``"all"`` (default) or an explicit list of indexer names.
85
+
86
+ A name in this list that does not match any registered indexer is
87
+ silently ignored — third-party plugins may come and go between
88
+ workstations and we don't want to break indexing on a stale config.
89
+ """
90
+
91
+ disabled: list[str] = Field(default_factory=list)
92
+ """Indexer names to skip even when ``enabled = "all"``.
93
+
94
+ The ``_example_lang`` reference indexer is a typical entry here once
95
+ real indexers are in play."""
96
+
97
+
98
+ class BridgesConfig(_Base):
99
+ """Which bridge plugins to actually run."""
100
+
101
+ enabled: list[str] | Literal["all"] = "all"
102
+ disabled: list[str] = Field(default_factory=list)
103
+
104
+
105
+ class Config(_Base):
106
+ """Top-level CodeMap configuration."""
107
+
108
+ storage: StorageConfig = Field(default_factory=StorageConfig)
109
+ index: IndexConfig = Field(default_factory=IndexConfig)
110
+ indexers: IndexersConfig = Field(default_factory=IndexersConfig)
111
+ bridges: BridgesConfig = Field(default_factory=BridgesConfig)
112
+
113
+
114
+ __all__ = [
115
+ "DEFAULT_MAX_FILE_BYTES",
116
+ "DEFAULT_PRUNE_DIRS",
117
+ "BridgesConfig",
118
+ "Config",
119
+ "IndexConfig",
120
+ "IndexersConfig",
121
+ "StorageConfig",
122
+ ]
@@ -0,0 +1,7 @@
1
+ """Pure-business core: data models, SymbolID, graph algorithms, query semantics.
2
+
3
+ Strict dependency rule (ADR-003): `core` MUST NOT import from `cli`, `io`,
4
+ `indexers`, or `mcp`. Protocols defined here are implemented in outer layers.
5
+ """
6
+
7
+ from __future__ import annotations
@@ -0,0 +1,8 @@
1
+ """Cross-scheme bridge abstractions.
2
+
3
+ Bridges resolve relationships between symbols of different schemes after all
4
+ indexers have finished. They are language-neutral by construction (ADR-L001):
5
+ bridges operate on Symbol/Edge data, not on language-specific AST.
6
+ """
7
+
8
+ from __future__ import annotations
@@ -0,0 +1,38 @@
1
+ """Bridge Protocol — cross-scheme resolvers (HTTP routes, asset-interface
2
+ aliases, dependency injection wiring, etc.).
3
+
4
+ A bridge runs after every indexer has finished. It inspects the populated
5
+ :class:`codemap.io.base.ReadOnlyStore`, derives new edges/aliases/routes, and
6
+ returns them in a :class:`BridgeResult`. Bridges are pure: the orchestrator,
7
+ not the bridge, decides when and how to persist the output.
8
+
9
+ All bridges — built-in or external — register via the ``codemap.bridges``
10
+ entry-point group on equal footing (ADR-004, ADR-L001).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import ClassVar, Protocol, runtime_checkable
16
+
17
+ from codemap.core.models import BridgeResult
18
+ from codemap.core.store import ReadOnlyStore
19
+
20
+ __all__ = ["Bridge", "BridgeResult"]
21
+
22
+
23
+ @runtime_checkable
24
+ class Bridge(Protocol):
25
+ """The required interface for any bridge implementation."""
26
+
27
+ name: ClassVar[str]
28
+ """Unique short identifier (lowercase, ASCII, no spaces)."""
29
+
30
+ version: ClassVar[str]
31
+ """Semantic version of the bridge itself."""
32
+
33
+ requires: ClassVar[list[str]]
34
+ """Names of bridges that must run before this one (topological order)."""
35
+
36
+ def resolve(self, store: ReadOnlyStore) -> BridgeResult:
37
+ """Inspect ``store``; emit derived edges, aliases, routes, diagnostics."""
38
+ ...
@@ -0,0 +1,374 @@
1
+ """HTTP route bridge — language-neutral client ↔ server linker.
2
+
3
+ The bridge knows nothing about specific server frameworks or HTTP client
4
+ libraries. Instead, it relies on a small **metadata convention** that any
5
+ indexer can populate. Whether the source language is Python, TypeScript,
6
+ Go, Rust, or anything else does not matter; the contract is the same.
7
+
8
+ ## Metadata convention
9
+
10
+ A server-side handler advertises itself by setting two keys on its
11
+ ``Symbol.extra`` dict::
12
+
13
+ "http_route": {
14
+ "method": "GET", # required, case-insensitive
15
+ "path": "/api/user/{id}", # required, with optional {var}s
16
+ "context_path": "/api/v1", # optional, prepended to ``path``
17
+ }
18
+
19
+ A client-side caller advertises every HTTP call it issues::
20
+
21
+ "http_calls": [
22
+ {
23
+ "method": "GET",
24
+ "url": "/api/v1/user/42",
25
+ "confidence": "high", # optional; defaults to "medium"
26
+ },
27
+ ...
28
+ ]
29
+
30
+ ## Resolution output
31
+
32
+ For each unique ``(method, full_path)`` advertised by the server side, the
33
+ bridge mints a synthetic intermediate symbol with the ``scip-route`` scheme
34
+ (e.g. ``scip-route . . . api/GET#`/api/user/{id}`.``). Server handlers and
35
+ client calls both relate to this intermediate via :class:`Alias` and
36
+ :class:`Edge` entries — that way the symbol-store doesn't have to carry
37
+ a direct ``client → server`` edge for every (method, path) combination.
38
+
39
+ The bridge produces three kinds of outputs:
40
+
41
+ * :class:`Route` entries (one per server-advertised route) for
42
+ ``routes.json``.
43
+ * :class:`Alias` entries linking the intermediate to its server handlers
44
+ (and, when an exact client URL matches, the client callers).
45
+ * :class:`Edge` entries with ``kind="routes_to"`` (``server_handler →
46
+ route_intermediate``) and ``kind="calls"`` (``client_caller →
47
+ route_intermediate``) so the existing call-graph queries keep working.
48
+
49
+ ## Path matching
50
+
51
+ A client ``url`` matches a server ``path`` if they have the same number of
52
+ ``/``-separated segments and every static server segment equals the
53
+ corresponding client segment; ``{placeholder}`` segments in the server
54
+ path match any non-empty client segment. Trailing slashes and query
55
+ strings are ignored on the client side.
56
+ """
57
+
58
+ from __future__ import annotations
59
+
60
+ from collections import defaultdict
61
+ from collections.abc import Iterable
62
+ from pathlib import PurePosixPath
63
+ from typing import ClassVar
64
+
65
+ from codemap.core.models import (
66
+ Alias,
67
+ BridgeResult,
68
+ Confidence,
69
+ Diagnostic,
70
+ Edge,
71
+ Route,
72
+ )
73
+ from codemap.core.store import ReadOnlyStore
74
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
75
+
76
+ SCHEME = "scip-route"
77
+
78
+
79
+ class HttpRouteBridge:
80
+ name: ClassVar[str] = "http_route"
81
+ version: ClassVar[str] = "0.1.0"
82
+ requires: ClassVar[list[str]] = []
83
+
84
+ def resolve(self, store: ReadOnlyStore) -> BridgeResult:
85
+ server_routes: list[_ServerRoute] = []
86
+ client_calls: list[_ClientCall] = []
87
+
88
+ for sym in store.iter_symbols():
89
+ extra = sym.extra
90
+ server_meta = _read_server_route(extra)
91
+ if server_meta is not None:
92
+ method, path, context = server_meta
93
+ full_path = _join_path(context, path)
94
+ server_routes.append(
95
+ _ServerRoute(
96
+ method=method,
97
+ full_path=full_path,
98
+ symbol_id=sym.id,
99
+ file=sym.file,
100
+ )
101
+ )
102
+ for call in _read_client_calls(extra):
103
+ client_method, url, conf = call
104
+ client_calls.append(
105
+ _ClientCall(
106
+ method=client_method,
107
+ url=url,
108
+ symbol_id=sym.id,
109
+ confidence=conf,
110
+ file=sym.file,
111
+ )
112
+ )
113
+
114
+ # Mint one intermediate symbol per unique (method, full_path)
115
+ intermediates: dict[tuple[str, str], SymbolID] = {}
116
+ for route in server_routes:
117
+ key = (route.method, route.full_path)
118
+ if key not in intermediates:
119
+ intermediates[key] = _route_symbol_id(route.method, route.full_path)
120
+
121
+ routes: list[Route] = []
122
+ aliases: list[Alias] = []
123
+ edges: list[Edge] = []
124
+ diagnostics: list[Diagnostic] = []
125
+
126
+ # Group server handlers per route so we can warn on duplicates.
127
+ handlers: defaultdict[tuple[str, str], list[_ServerRoute]] = defaultdict(list)
128
+ for route in server_routes:
129
+ handlers[(route.method, route.full_path)].append(route)
130
+
131
+ for (method, full_path), group in handlers.items():
132
+ route_sid = intermediates[(method, full_path)]
133
+ routes.append(
134
+ Route(
135
+ method=method,
136
+ path=full_path,
137
+ symbol_id=route_sid,
138
+ context_path=None,
139
+ )
140
+ )
141
+ aliases.append(
142
+ Alias(
143
+ source=route_sid,
144
+ targets=[handler.symbol_id for handler in group],
145
+ producer=self.name,
146
+ confidence="high",
147
+ )
148
+ )
149
+ edges.extend(
150
+ Edge(
151
+ source=handler.symbol_id,
152
+ target=route_sid,
153
+ kind="routes_to",
154
+ confidence="high",
155
+ )
156
+ for handler in group
157
+ )
158
+ if len(group) > 1:
159
+ diagnostics.append(
160
+ Diagnostic(
161
+ severity="warning",
162
+ file=group[0].file,
163
+ code="ROUTE001",
164
+ message=(
165
+ f"multiple handlers registered for {method} {full_path}: "
166
+ f"{len(group)} symbols"
167
+ ),
168
+ producer=self.name,
169
+ )
170
+ )
171
+
172
+ # Match each client call against the known server routes.
173
+ server_keys = list(intermediates.keys())
174
+ for cc in client_calls:
175
+ matched = _match_route(cc.method, cc.url, server_keys)
176
+ if matched is None:
177
+ if cc.confidence == "high":
178
+ diagnostics.append(
179
+ Diagnostic(
180
+ severity="warning",
181
+ file=cc.file,
182
+ code="ROUTE002",
183
+ message=(
184
+ f"client {cc.method} {cc.url} has no matching "
185
+ f"server route in this index"
186
+ ),
187
+ producer=self.name,
188
+ )
189
+ )
190
+ continue
191
+ route_sid = intermediates[matched]
192
+ edges.append(
193
+ Edge(
194
+ source=cc.symbol_id,
195
+ target=route_sid,
196
+ kind="calls",
197
+ confidence=cc.confidence,
198
+ )
199
+ )
200
+
201
+ return BridgeResult(
202
+ edges=edges,
203
+ aliases=aliases,
204
+ routes=routes,
205
+ diagnostics=diagnostics,
206
+ )
207
+
208
+
209
+ # ---------------------------------------------------------------------------
210
+ # Metadata adapters
211
+ # ---------------------------------------------------------------------------
212
+
213
+
214
+ class _ServerRoute:
215
+ __slots__ = ("file", "full_path", "method", "symbol_id")
216
+
217
+ def __init__(
218
+ self,
219
+ *,
220
+ method: str,
221
+ full_path: str,
222
+ symbol_id: SymbolID,
223
+ file: PurePosixPath,
224
+ ) -> None:
225
+ self.method = method
226
+ self.full_path = full_path
227
+ self.symbol_id = symbol_id
228
+ self.file = file
229
+
230
+
231
+ class _ClientCall:
232
+ __slots__ = ("confidence", "file", "method", "symbol_id", "url")
233
+
234
+ def __init__(
235
+ self,
236
+ *,
237
+ method: str,
238
+ url: str,
239
+ symbol_id: SymbolID,
240
+ confidence: Confidence,
241
+ file: PurePosixPath,
242
+ ) -> None:
243
+ self.method = method
244
+ self.url = url
245
+ self.symbol_id = symbol_id
246
+ self.confidence: Confidence = confidence
247
+ self.file = file
248
+
249
+
250
+ def _read_server_route(extra: dict[str, object]) -> tuple[str, str, str] | None:
251
+ raw = extra.get("http_route")
252
+ if not isinstance(raw, dict):
253
+ return None
254
+ method_raw = raw.get("method")
255
+ path_raw = raw.get("path")
256
+ if not isinstance(method_raw, str) or not isinstance(path_raw, str):
257
+ return None
258
+ method = method_raw.strip().upper()
259
+ path = path_raw.strip()
260
+ if not method or not path:
261
+ return None
262
+ ctx_raw = raw.get("context_path")
263
+ context = ctx_raw.strip() if isinstance(ctx_raw, str) else ""
264
+ return method, path, context
265
+
266
+
267
+ _VALID_CONFIDENCE = frozenset({"high", "medium", "low"})
268
+
269
+
270
+ def _read_client_calls(
271
+ extra: dict[str, object],
272
+ ) -> Iterable[tuple[str, str, Confidence]]:
273
+ raw = extra.get("http_calls")
274
+ if not isinstance(raw, list):
275
+ return
276
+ for item in raw:
277
+ if not isinstance(item, dict):
278
+ continue
279
+ method_raw = item.get("method")
280
+ url_raw = item.get("url")
281
+ if not isinstance(method_raw, str) or not isinstance(url_raw, str):
282
+ continue
283
+ method = method_raw.strip().upper()
284
+ url = url_raw.strip()
285
+ if not method or not url:
286
+ continue
287
+ conf_raw = item.get("confidence")
288
+ if isinstance(conf_raw, str) and conf_raw in _VALID_CONFIDENCE:
289
+ confidence: Confidence = conf_raw # type: ignore[assignment]
290
+ else:
291
+ confidence = "medium"
292
+ yield method, url, confidence
293
+
294
+
295
+ # ---------------------------------------------------------------------------
296
+ # Path manipulation
297
+ # ---------------------------------------------------------------------------
298
+
299
+
300
+ def _join_path(context: str, path: str) -> str:
301
+ """Concatenate ``context_path`` (optional) with ``path``.
302
+
303
+ Both arguments may or may not include leading / trailing slashes; the
304
+ result is normalised to a single leading ``/`` and no trailing ``/``
305
+ unless the entire result is ``/``.
306
+ """
307
+ parts: list[str] = []
308
+ for piece in (context, path):
309
+ stripped = piece.strip()
310
+ if not stripped:
311
+ continue
312
+ parts.extend(seg for seg in stripped.split("/") if seg)
313
+ if not parts:
314
+ return "/"
315
+ return "/" + "/".join(parts)
316
+
317
+
318
+ def _match_route(
319
+ method: str,
320
+ url: str,
321
+ server_routes: list[tuple[str, str]],
322
+ ) -> tuple[str, str] | None:
323
+ """Return the first ``(method, path)`` from ``server_routes`` that matches.
324
+
325
+ Path-variable segments (``{name}``) on the server side match any
326
+ single non-empty client segment. Query strings on the client URL are
327
+ stripped before comparison. Trailing slashes are normalised.
328
+ """
329
+ client_path = url.split("?", 1)[0].split("#", 1)[0]
330
+ c_segments = [s for s in client_path.split("/") if s]
331
+ for s_method, s_path in server_routes:
332
+ if s_method != method:
333
+ continue
334
+ s_segments = [s for s in s_path.split("/") if s]
335
+ if len(s_segments) != len(c_segments):
336
+ continue
337
+ ok = True
338
+ for s, c in zip(s_segments, c_segments, strict=True):
339
+ if s.startswith("{") and s.endswith("}") and len(s) >= 2:
340
+ if not c:
341
+ ok = False
342
+ break
343
+ continue
344
+ if s != c:
345
+ ok = False
346
+ break
347
+ if ok:
348
+ return (s_method, s_path)
349
+ return None
350
+
351
+
352
+ # ---------------------------------------------------------------------------
353
+ # Intermediate symbol minting
354
+ # ---------------------------------------------------------------------------
355
+
356
+
357
+ def _route_symbol_id(method: str, path: str) -> SymbolID:
358
+ """Build the ``scip-route`` intermediate symbol.
359
+
360
+ Shape: ``scip-route . . . api/<METHOD>#`<path>`.``. The path is held
361
+ in a TERM descriptor so it can contain slashes; the parser auto-
362
+ escapes via backticks.
363
+ """
364
+ return SymbolID(
365
+ scheme=SCHEME,
366
+ descriptors=(
367
+ Descriptor(name="api", kind=DescriptorKind.NAMESPACE),
368
+ Descriptor(name=method, kind=DescriptorKind.TYPE),
369
+ Descriptor(name=path, kind=DescriptorKind.TERM),
370
+ ),
371
+ )
372
+
373
+
374
+ __all__ = ["SCHEME", "HttpRouteBridge"]
@@ -0,0 +1,120 @@
1
+ """Cross-module Python call resolver.
2
+
3
+ The Python indexer emits ``calls`` edges whose target is a synthetic
4
+ ``scip-python . . . <module-dotted-ns>/<leaf>.`` symbol whenever a name
5
+ came in through an ``import`` statement. That synthetic target rarely
6
+ matches a real on-disk symbol — the same function defined in
7
+ ``foo/bar.py`` lives at ``scip-python . . . foo/bar.py/<leaf>().`` (note
8
+ the ``.py`` extension and the method/term suffix).
9
+
10
+ This bridge bridges the two. After all indexers have run it scans the
11
+ edge table, finds calls pointing at unresolved scip-python targets, and
12
+ maps each one to a concrete local symbol whose **leaf name matches** and
13
+ whose **file stem matches the last namespace segment of the synthetic
14
+ target**. Matches are emitted as :class:`Alias` entries so query code
15
+ (``callers`` / ``callees`` / ``trace``) can transparently expand them.
16
+
17
+ The matcher is intentionally simple: it ships as a single-language
18
+ heuristic, not a type system. It accepts only one of three confidence
19
+ levels:
20
+
21
+ * ``high`` — exactly one candidate whose file stem equals the last
22
+ namespace segment of the synthetic target.
23
+ * ``medium`` — exactly one candidate by leaf name alone, when the
24
+ synthetic target has no namespace segments to disambiguate.
25
+ * otherwise — drop the edge (no alias produced).
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ from collections import defaultdict
31
+ from pathlib import PurePosixPath
32
+ from typing import ClassVar
33
+
34
+ from codemap.core.models import Alias, BridgeResult, Confidence, Symbol
35
+ from codemap.core.store import ReadOnlyStore
36
+ from codemap.core.symbol import DescriptorKind, SymbolID
37
+
38
+ _PYTHON_SCHEME = "scip-python"
39
+ _INDEXABLE_KINDS = frozenset({"function", "method", "class", "field", "variable"})
40
+
41
+
42
+ class PythonCrossModuleBridge:
43
+ name: ClassVar[str] = "python_cross_module"
44
+ version: ClassVar[str] = "0.1.0"
45
+ requires: ClassVar[list[str]] = []
46
+
47
+ def resolve(self, store: ReadOnlyStore) -> BridgeResult:
48
+ leaf_to_symbols: dict[str, list[Symbol]] = defaultdict(list)
49
+ for sym in store.iter_symbols():
50
+ if sym.language != "python":
51
+ continue
52
+ if sym.kind not in _INDEXABLE_KINDS:
53
+ continue
54
+ if not sym.id.descriptors:
55
+ continue
56
+ leaf = sym.id.descriptors[-1].name
57
+ leaf_to_symbols[leaf].append(sym)
58
+
59
+ aliases: list[Alias] = []
60
+ seen_sources: set[str] = set()
61
+ for edge in store.iter_edges():
62
+ if edge.kind != "calls":
63
+ continue
64
+ target = edge.target
65
+ if target.scheme != _PYTHON_SCHEME:
66
+ continue
67
+ if store.get(target) is not None:
68
+ continue # already a real local symbol
69
+ target_key = str(target)
70
+ if target_key in seen_sources:
71
+ continue
72
+ resolved = _resolve_one(target, leaf_to_symbols)
73
+ if resolved is None:
74
+ continue
75
+ local, confidence = resolved
76
+ aliases.append(
77
+ Alias(
78
+ source=target,
79
+ targets=[local.id],
80
+ producer=self.name,
81
+ confidence=confidence,
82
+ )
83
+ )
84
+ seen_sources.add(target_key)
85
+ return BridgeResult(aliases=aliases)
86
+
87
+
88
+ def _resolve_one(
89
+ target: SymbolID,
90
+ leaf_to_symbols: dict[str, list[Symbol]],
91
+ ) -> tuple[Symbol, Confidence] | None:
92
+ """Return ``(local Symbol, confidence)`` or ``None``."""
93
+ if not target.descriptors:
94
+ return None
95
+ last = target.descriptors[-1]
96
+ # Terms / methods are the shapes the indexer's _module_symbol_id and
97
+ # _external_symbol_id helpers actually produce; only those are worth
98
+ # trying to resolve here.
99
+ if last.kind not in (DescriptorKind.TERM, DescriptorKind.METHOD, DescriptorKind.META):
100
+ return None
101
+ leaf = last.name
102
+ candidates = leaf_to_symbols.get(leaf, [])
103
+ if not candidates:
104
+ return None
105
+
106
+ ns_parts = [d.name for d in target.descriptors[:-1] if d.kind is DescriptorKind.NAMESPACE]
107
+ if ns_parts:
108
+ last_ns = ns_parts[-1]
109
+ matching = [c for c in candidates if PurePosixPath(c.file).stem == last_ns]
110
+ if len(matching) == 1:
111
+ return matching[0], "high"
112
+ # If the namespace chain matches but multiple candidates share the
113
+ # file stem, we don't pick one — the index would lie.
114
+ return None
115
+ if len(candidates) == 1:
116
+ return candidates[0], "medium"
117
+ return None
118
+
119
+
120
+ __all__ = ["PythonCrossModuleBridge"]