codemap-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. codemap/__init__.py +7 -0
  2. codemap/cli/__init__.py +3 -0
  3. codemap/cli/_common.py +90 -0
  4. codemap/cli/commands/__init__.py +3 -0
  5. codemap/cli/commands/callees.py +102 -0
  6. codemap/cli/commands/callers.py +107 -0
  7. codemap/cli/commands/config.py +78 -0
  8. codemap/cli/commands/diagnostics.py +142 -0
  9. codemap/cli/commands/doctor.py +158 -0
  10. codemap/cli/commands/get.py +93 -0
  11. codemap/cli/commands/index.py +725 -0
  12. codemap/cli/commands/routes.py +104 -0
  13. codemap/cli/commands/search.py +78 -0
  14. codemap/cli/commands/trace.py +179 -0
  15. codemap/cli/main.py +140 -0
  16. codemap/cli/renderers/__init__.py +3 -0
  17. codemap/cli/renderers/json.py +32 -0
  18. codemap/cli/renderers/text.py +24 -0
  19. codemap/config/__init__.py +31 -0
  20. codemap/config/loader.py +96 -0
  21. codemap/config/schema.py +122 -0
  22. codemap/core/__init__.py +7 -0
  23. codemap/core/bridge/__init__.py +8 -0
  24. codemap/core/bridge/base.py +38 -0
  25. codemap/core/bridge/http_route.py +374 -0
  26. codemap/core/bridge/python_cross_module.py +120 -0
  27. codemap/core/bridge/registry.py +117 -0
  28. codemap/core/graph.py +183 -0
  29. codemap/core/models.py +299 -0
  30. codemap/core/store.py +78 -0
  31. codemap/core/symbol.py +314 -0
  32. codemap/diagnostics/__init__.py +3 -0
  33. codemap/diagnostics/exit_codes.py +30 -0
  34. codemap/diagnostics/logging.py +65 -0
  35. codemap/diagnostics/progress.py +68 -0
  36. codemap/indexers/__init__.py +9 -0
  37. codemap/indexers/_example_lang.py +135 -0
  38. codemap/indexers/base.py +77 -0
  39. codemap/indexers/python.py +577 -0
  40. codemap/indexers/registry.py +104 -0
  41. codemap/io/__init__.py +8 -0
  42. codemap/io/atomic.py +97 -0
  43. codemap/io/base.py +12 -0
  44. codemap/io/json_store.py +433 -0
  45. codemap/io/lock.py +87 -0
  46. codemap/io/manifest.py +90 -0
  47. codemap/mcp/__init__.py +3 -0
  48. codemap_core-0.1.0.dist-info/METADATA +480 -0
  49. codemap_core-0.1.0.dist-info/RECORD +52 -0
  50. codemap_core-0.1.0.dist-info/WHEEL +4 -0
  51. codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
  52. codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0
codemap/core/symbol.py ADDED
@@ -0,0 +1,314 @@
1
+ """SymbolID — SCIP-compatible cross-language symbol identifier.
2
+
3
+ This is the foundational data type for the entire system (ADR-001). A SymbolID
4
+ is a string-encoded handle that uniquely identifies a symbol across languages,
5
+ file types, and assets. The encoding follows the SCIP Symbol grammar so that
6
+ CodeMap can interoperate with the Sourcegraph SCIP ecosystem.
7
+
8
+ Grammar (informal, see SCIP `scip.proto` for the canonical reference)::
9
+
10
+ <symbol> ::= <scheme> ' ' <manager> ' ' <package_name>
11
+ ' ' <package_version> ' ' <descriptor>+
12
+ <scheme> ::= 'local' | <identifier> ; e.g. 'scip-python'
13
+ <descriptor> ::= <namespace> | <type> | <term> | <method>
14
+ | <type_parameter> | <parameter> | <meta>
15
+ <namespace> ::= <name> '/'
16
+ <type> ::= <name> '#'
17
+ <term> ::= <name> '.'
18
+ <method> ::= <name> '(' <disambiguator>? ').'
19
+ <type_parameter> ::= '[' <name> ']'
20
+ <parameter> ::= '(' <name> ')'
21
+ <meta> ::= <name> ':'
22
+ <name> ::= <simple-identifier> | <escaped-identifier>
23
+ <simple-identifier> ::= one or more of [A-Za-z0-9_+$.-]
24
+ <escaped-identifier> ::= '`' (any char, '``' escapes a literal backtick) '`'
25
+
26
+ Invariants enforced here:
27
+
28
+ * ``SymbolID.parse(s).to_string() == s`` (round-trip).
29
+ * The header (scheme/manager/package/version) is exactly four space-separated
30
+ tokens followed by a single space and the descriptor stream.
31
+ * Empty header fields use the placeholder ``'.'`` (matching SCIP convention).
32
+
33
+ The module is intentionally dependency-free except for ``pydantic-core`` for
34
+ the Pydantic v2 integration (see ``__get_pydantic_core_schema__``).
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ from dataclasses import dataclass, field
40
+ from enum import StrEnum
41
+ from typing import TYPE_CHECKING, Any
42
+
43
+ if TYPE_CHECKING:
44
+ from pydantic import GetCoreSchemaHandler
45
+ from pydantic_core import CoreSchema
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Public types
50
+ # ---------------------------------------------------------------------------
51
+
52
+
53
+ class DescriptorKind(StrEnum):
54
+ """SCIP descriptor kinds, identified by their trailing suffix syntax."""
55
+
56
+ NAMESPACE = "namespace" # ``name/``
57
+ TYPE = "type" # ``name#``
58
+ TERM = "term" # ``name.``
59
+ METHOD = "method" # ``name(disambig?).``
60
+ TYPE_PARAMETER = "type_parameter" # ``[name]``
61
+ PARAMETER = "parameter" # ``(name)``
62
+ META = "meta" # ``name:``
63
+
64
+
65
+ @dataclass(frozen=True, slots=True)
66
+ class Descriptor:
67
+ """One segment of a SymbolID."""
68
+
69
+ name: str
70
+ kind: DescriptorKind
71
+ disambiguator: str = ""
72
+
73
+ def __post_init__(self) -> None:
74
+ if self.disambiguator and self.kind is not DescriptorKind.METHOD:
75
+ raise ValueError("disambiguator is only valid for METHOD descriptors")
76
+
77
+ def to_string(self) -> str:
78
+ n = _encode_name(self.name)
79
+ if self.kind is DescriptorKind.NAMESPACE:
80
+ return f"{n}/"
81
+ if self.kind is DescriptorKind.TYPE:
82
+ return f"{n}#"
83
+ if self.kind is DescriptorKind.TERM:
84
+ return f"{n}."
85
+ if self.kind is DescriptorKind.META:
86
+ return f"{n}:"
87
+ if self.kind is DescriptorKind.METHOD:
88
+ d = _encode_name(self.disambiguator) if self.disambiguator else ""
89
+ return f"{n}({d})."
90
+ if self.kind is DescriptorKind.TYPE_PARAMETER:
91
+ return f"[{n}]"
92
+ if self.kind is DescriptorKind.PARAMETER:
93
+ return f"({n})"
94
+ raise AssertionError(f"unhandled descriptor kind: {self.kind}") # pragma: no cover
95
+
96
+
97
+ @dataclass(frozen=True, slots=True)
98
+ class SymbolID:
99
+ """A SCIP-encoded symbol identifier.
100
+
101
+ Construct via :meth:`parse` from a serialized string, or by composing
102
+ descriptors directly. Instances are hashable and value-equal — they may
103
+ safely live in sets and dict keys.
104
+ """
105
+
106
+ scheme: str
107
+ manager: str = "."
108
+ package_name: str = "."
109
+ package_version: str = "."
110
+ descriptors: tuple[Descriptor, ...] = field(default_factory=tuple)
111
+
112
+ # ------------------------------------------------------------------ ctors
113
+ @classmethod
114
+ def parse(cls, s: str) -> SymbolID:
115
+ """Parse a serialized SCIP symbol. Raises :class:`SymbolParseError`."""
116
+ return _parse_symbol(s)
117
+
118
+ # ----------------------------------------------------------- serialization
119
+ def to_string(self) -> str:
120
+ header = f"{self.scheme} {self.manager} {self.package_name} {self.package_version} "
121
+ body = "".join(d.to_string() for d in self.descriptors)
122
+ return header + body
123
+
124
+ def __str__(self) -> str:
125
+ return self.to_string()
126
+
127
+ # --------------------------------------------------- pydantic v2 support
128
+ @classmethod
129
+ def __get_pydantic_core_schema__(
130
+ cls,
131
+ _source_type: Any,
132
+ _handler: GetCoreSchemaHandler,
133
+ ) -> CoreSchema:
134
+ from pydantic_core import core_schema
135
+
136
+ def _validate(v: Any) -> SymbolID:
137
+ if isinstance(v, cls):
138
+ return v
139
+ if isinstance(v, str):
140
+ return cls.parse(v)
141
+ raise TypeError(f"cannot convert {type(v).__name__} to SymbolID")
142
+
143
+ return core_schema.no_info_plain_validator_function(
144
+ _validate,
145
+ serialization=core_schema.plain_serializer_function_ser_schema(
146
+ str,
147
+ when_used="always",
148
+ return_schema=core_schema.str_schema(),
149
+ ),
150
+ )
151
+
152
+
153
+ class SymbolParseError(ValueError):
154
+ """Raised when a SCIP symbol string cannot be parsed."""
155
+
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # Parser internals
159
+ # ---------------------------------------------------------------------------
160
+
161
+
162
+ _IDENT_EXTRA = frozenset("_$+-.")
163
+
164
+
165
+ def _is_ident_char(c: str) -> bool:
166
+ """Per scip-go: simple-identifier chars include alphanumerics + ``_$+-.``."""
167
+ return c.isalnum() or c in _IDENT_EXTRA
168
+
169
+
170
+ def _needs_escape(name: str) -> bool:
171
+ """A name needs backtick-escaping if it is empty or contains non-ident chars."""
172
+ if not name:
173
+ return True
174
+ return any(not _is_ident_char(c) for c in name)
175
+
176
+
177
+ def _encode_name(name: str) -> str:
178
+ if not _needs_escape(name):
179
+ return name
180
+ return "`" + name.replace("`", "``") + "`"
181
+
182
+
183
+ def _parse_symbol(s: str) -> SymbolID:
184
+ if not isinstance(s, str):
185
+ raise SymbolParseError(f"symbol must be str, got {type(s).__name__}")
186
+ # Header is 4 space-separated tokens, then ' ', then the descriptor stream.
187
+ # We split on the first 4 spaces; remaining is the descriptor body
188
+ # (which may itself contain spaces inside backtick-escaped names).
189
+ parts = s.split(" ", 4)
190
+ if len(parts) < 5:
191
+ raise SymbolParseError(
192
+ f"invalid SCIP symbol: need at least 5 space-separated fields "
193
+ f"(scheme manager package version descriptors...), got {len(parts)} in {s!r}"
194
+ )
195
+ scheme, manager, pkg, ver, body = parts
196
+ if not scheme:
197
+ raise SymbolParseError("scheme must be non-empty")
198
+ descriptors = tuple(_parse_descriptors(body))
199
+ if not descriptors:
200
+ raise SymbolParseError(f"symbol must have at least one descriptor: {s!r}")
201
+ return SymbolID(
202
+ scheme=scheme,
203
+ manager=manager or ".",
204
+ package_name=pkg or ".",
205
+ package_version=ver or ".",
206
+ descriptors=descriptors,
207
+ )
208
+
209
+
210
+ def _parse_descriptors(s: str) -> list[Descriptor]:
211
+ out: list[Descriptor] = []
212
+ i = 0
213
+ n = len(s)
214
+ while i < n:
215
+ c = s[i]
216
+ if c == "[":
217
+ name, j = _read_name(s, i + 1)
218
+ if j >= n or s[j] != "]":
219
+ raise SymbolParseError(f"unterminated [type_parameter] at offset {i} in {s!r}")
220
+ out.append(Descriptor(name=name, kind=DescriptorKind.TYPE_PARAMETER))
221
+ i = j + 1
222
+ continue
223
+ if c == "(":
224
+ # Bare '(name)' is a PARAMETER descriptor.
225
+ name, j = _read_name(s, i + 1)
226
+ if j >= n or s[j] != ")":
227
+ raise SymbolParseError(f"unterminated (parameter) at offset {i} in {s!r}")
228
+ out.append(Descriptor(name=name, kind=DescriptorKind.PARAMETER))
229
+ i = j + 1
230
+ continue
231
+
232
+ name, j = _read_name(s, i)
233
+ # Trailing-dot recovery: a greedy identifier may have absorbed a final
234
+ # '.' which was actually meant to be the term suffix. If we ran off the
235
+ # end of the input with no remaining suffix character, give back the
236
+ # final '.' to serve as the suffix.
237
+ if j == n and name.endswith("."):
238
+ name = name[:-1]
239
+ j -= 1
240
+ if j >= n:
241
+ raise SymbolParseError(f"missing descriptor suffix after name at offset {i} in {s!r}")
242
+ suffix = s[j]
243
+ if suffix == "/":
244
+ out.append(Descriptor(name=name, kind=DescriptorKind.NAMESPACE))
245
+ i = j + 1
246
+ elif suffix == "#":
247
+ out.append(Descriptor(name=name, kind=DescriptorKind.TYPE))
248
+ i = j + 1
249
+ elif suffix == ":":
250
+ out.append(Descriptor(name=name, kind=DescriptorKind.META))
251
+ i = j + 1
252
+ elif suffix == ".":
253
+ out.append(Descriptor(name=name, kind=DescriptorKind.TERM))
254
+ i = j + 1
255
+ elif suffix == "(":
256
+ disambig, k = _read_name(s, j + 1)
257
+ if k >= n or s[k] != ")":
258
+ raise SymbolParseError(f"unterminated method '(' at offset {j} in {s!r}")
259
+ if k + 1 >= n or s[k + 1] != ".":
260
+ raise SymbolParseError(
261
+ f"method ')' must be followed by '.' at offset {k + 1} in {s!r}"
262
+ )
263
+ out.append(
264
+ Descriptor(
265
+ name=name,
266
+ kind=DescriptorKind.METHOD,
267
+ disambiguator=disambig,
268
+ )
269
+ )
270
+ i = k + 2
271
+ else:
272
+ raise SymbolParseError(f"unknown descriptor suffix {suffix!r} at offset {j} in {s!r}")
273
+ return out
274
+
275
+
276
+ def _read_name(s: str, i: int) -> tuple[str, int]:
277
+ """Read a single ``<name>`` token. Returns ``(name, next_index)``.
278
+
279
+ Supports both simple identifiers and backtick-escaped names. Inside a
280
+ backtick-escaped name, ``''`` (two backticks) encodes a literal backtick.
281
+ """
282
+ n = len(s)
283
+ if i >= n:
284
+ return "", i
285
+ if s[i] == "`":
286
+ return _read_escaped(s, i + 1)
287
+ j = i
288
+ while j < n and _is_ident_char(s[j]):
289
+ j += 1
290
+ return s[i:j], j
291
+
292
+
293
+ def _read_escaped(s: str, i: int) -> tuple[str, int]:
294
+ n = len(s)
295
+ buf: list[str] = []
296
+ j = i
297
+ while j < n:
298
+ if s[j] == "`":
299
+ if j + 1 < n and s[j + 1] == "`":
300
+ buf.append("`")
301
+ j += 2
302
+ continue
303
+ return "".join(buf), j + 1
304
+ buf.append(s[j])
305
+ j += 1
306
+ raise SymbolParseError(f"unterminated escaped name starting at offset {i - 1}")
307
+
308
+
309
+ __all__ = [
310
+ "Descriptor",
311
+ "DescriptorKind",
312
+ "SymbolID",
313
+ "SymbolParseError",
314
+ ]
@@ -0,0 +1,3 @@
1
+ """Cross-cutting diagnostics: exit codes, logging, progress."""
2
+
3
+ from __future__ import annotations
@@ -0,0 +1,30 @@
1
+ """sysexits.h-compatible exit codes (ADR-005).
2
+
3
+ A subset of BSD sysexits used consistently across CLI entry points so callers
4
+ (scripts, CI, MCP) can branch on specific failure modes without parsing stderr.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from enum import IntEnum
10
+
11
+
12
+ class ExitCode(IntEnum):
13
+ """Codes returned by the CodeMap CLI."""
14
+
15
+ OK = 0
16
+ GENERIC_ERROR = 1
17
+ USAGE_ERROR = 64 # EX_USAGE
18
+ DATA_ERROR = 65 # EX_DATAERR — corrupt index, incompatible schema
19
+ NO_INPUT = 66 # EX_NOINPUT — `.codemap/` missing or no source files
20
+ NO_USER = 67 # EX_NOUSER (reserved)
21
+ UNAVAILABLE = 69 # EX_UNAVAILABLE (reserved)
22
+ INTERNAL_BUG = 70 # EX_SOFTWARE
23
+ OS_ERROR = 71 # EX_OSERR (reserved)
24
+ CANT_CREATE = 73 # EX_CANTCREAT — write failed
25
+ IO_ERROR = 74 # EX_IOERR (reserved)
26
+ TEMP_FAIL = 75 # EX_TEMPFAIL — lock contention timeout
27
+ CONFIG_ERROR = 78 # EX_CONFIG
28
+
29
+
30
+ __all__ = ["ExitCode"]
@@ -0,0 +1,65 @@
1
+ """Logging configuration for the CLI.
2
+
3
+ Defaults to WARNING. ``-v`` raises to INFO; ``-vv`` raises to DEBUG. Output
4
+ goes to stderr through ``rich.logging.RichHandler`` for readability, or to a
5
+ JSON line per record when ``--log-format json`` is set.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import sys
13
+ import traceback
14
+ from typing import Literal
15
+
16
+ from rich.console import Console
17
+ from rich.logging import RichHandler
18
+
19
+ LogFormat = Literal["text", "json"]
20
+
21
+
22
+ class _JsonHandler(logging.Handler):
23
+ """Emit one JSON object per log record on stderr."""
24
+
25
+ def emit(self, record: logging.LogRecord) -> None:
26
+ payload = {
27
+ "ts": int(record.created * 1000),
28
+ "level": record.levelname,
29
+ "logger": record.name,
30
+ "msg": record.getMessage(),
31
+ }
32
+ if record.exc_info:
33
+ payload["exc_info"] = "".join(traceback.format_exception(*record.exc_info))
34
+ sys.stderr.write(json.dumps(payload, ensure_ascii=False) + "\n")
35
+ sys.stderr.flush()
36
+
37
+
38
+ def configure_logging(verbosity: int = 0, *, log_format: LogFormat = "text") -> None:
39
+ """Install root handlers idempotently."""
40
+ if verbosity <= 0:
41
+ level = logging.WARNING
42
+ elif verbosity == 1:
43
+ level = logging.INFO
44
+ else:
45
+ level = logging.DEBUG
46
+
47
+ root = logging.getLogger()
48
+ for h in list(root.handlers):
49
+ root.removeHandler(h)
50
+ if log_format == "json":
51
+ root.addHandler(_JsonHandler())
52
+ else:
53
+ root.addHandler(
54
+ RichHandler(
55
+ console=Console(stderr=True),
56
+ show_time=False,
57
+ show_path=False,
58
+ markup=False,
59
+ rich_tracebacks=True,
60
+ )
61
+ )
62
+ root.setLevel(level)
63
+
64
+
65
+ __all__ = ["LogFormat", "configure_logging"]
@@ -0,0 +1,68 @@
1
+ """Thin wrapper over ``rich.progress`` used by the indexing pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterator
6
+ from contextlib import contextmanager
7
+ from typing import TypeVar
8
+
9
+ from rich.console import Console
10
+ from rich.progress import (
11
+ BarColumn,
12
+ Progress,
13
+ SpinnerColumn,
14
+ TextColumn,
15
+ TimeElapsedColumn,
16
+ )
17
+
18
+ T = TypeVar("T")
19
+
20
+
21
+ @contextmanager
22
+ def progress_bar(
23
+ description: str,
24
+ total: int | None = None,
25
+ *,
26
+ enabled: bool = True,
27
+ console: Console | None = None,
28
+ ) -> Iterator[Progress]:
29
+ """Yield a configured ``rich.Progress`` (or a silent no-op when disabled)."""
30
+ if not enabled:
31
+ yield _SilentProgress() # type: ignore[misc]
32
+ return
33
+ with Progress(
34
+ SpinnerColumn(),
35
+ TextColumn("[progress.description]{task.description}"),
36
+ BarColumn(bar_width=30),
37
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
38
+ TimeElapsedColumn(),
39
+ console=console or Console(stderr=True),
40
+ transient=True,
41
+ ) as p:
42
+ p.add_task(description, total=total)
43
+ yield p
44
+
45
+
46
+ class _SilentProgress:
47
+ """Stand-in that swallows update calls when ``--no-progress`` is set."""
48
+
49
+ def __enter__(self) -> _SilentProgress:
50
+ return self
51
+
52
+ def __exit__(self, *exc: object) -> None: # pragma: no cover
53
+ return None
54
+
55
+ def add_task(self, *_a: object, **_kw: object) -> int: # pragma: no cover
56
+ return 0
57
+
58
+ def update(self, *_a: object, **_kw: object) -> None: # pragma: no cover
59
+ return None
60
+
61
+ def advance(self, *_a: object, **_kw: object) -> None: # pragma: no cover
62
+ return None
63
+
64
+ def stop(self) -> None: # pragma: no cover
65
+ return None
66
+
67
+
68
+ __all__ = ["progress_bar"]
@@ -0,0 +1,9 @@
1
+ """Pluggable indexer registry.
2
+
3
+ Built-in indexers are registered through `pyproject.toml`'s `codemap.indexers`
4
+ entry-point group on equal footing with third-party plugins (ADR-004, ADR-L001).
5
+ No language is privileged; the main repository ships only a reference
6
+ `_example_lang` indexer used to validate the end-to-end pipeline.
7
+ """
8
+
9
+ from __future__ import annotations
@@ -0,0 +1,135 @@
1
+ """Reference indexer used to validate the end-to-end pipeline.
2
+
3
+ Files ending in ``.example`` are treated as a tiny pseudo-language:
4
+
5
+ * Each line starting with ``def NAME`` declares a function symbol named
6
+ ``NAME`` at that line.
7
+ * Each line containing ``call NAME`` records a ``calls`` edge from the most
8
+ recent ``def`` to ``NAME``.
9
+
10
+ The indexer is intentionally minimal — it exists so Sprint 0 can prove that
11
+ the storage, registry, and CLI layers compose end-to-end without depending on
12
+ any real-language parser. It is published through the ``codemap.indexers``
13
+ entry-point and registers on equal footing with third-party indexers.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from pathlib import Path, PurePosixPath
19
+ from typing import ClassVar
20
+
21
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
22
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
23
+ from codemap.indexers.base import IndexContext
24
+
25
+ LANG = "example"
26
+ SCHEME = "scip-example"
27
+
28
+
29
+ class ExampleLangIndexer:
30
+ name: ClassVar[str] = "_example_lang"
31
+ version: ClassVar[str] = "0.1.0"
32
+ file_patterns: ClassVar[list[str]] = ["*.example"]
33
+ languages: ClassVar[list[str]] = [LANG]
34
+
35
+ def supports(self, path: Path) -> bool:
36
+ return path.suffix == ".example"
37
+
38
+ def index_file(
39
+ self,
40
+ path: Path,
41
+ source: bytes,
42
+ ctx: IndexContext,
43
+ ) -> IndexResult:
44
+ try:
45
+ text = source.decode("utf-8")
46
+ except UnicodeDecodeError as exc:
47
+ return IndexResult(
48
+ diagnostics=[
49
+ Diagnostic(
50
+ severity="error",
51
+ file=ctx.relative_path,
52
+ code="EXAMPLE001",
53
+ message=f"not valid UTF-8: {exc}",
54
+ producer=self.name,
55
+ )
56
+ ]
57
+ )
58
+
59
+ symbols: list[Symbol] = []
60
+ edges: list[Edge] = []
61
+ diagnostics: list[Diagnostic] = []
62
+
63
+ current_function: SymbolID | None = None
64
+ current_range: Range | None = None
65
+
66
+ for line_no, raw in enumerate(text.splitlines(), start=1):
67
+ stripped = raw.strip()
68
+ if not stripped or stripped.startswith("#"):
69
+ continue
70
+ if stripped.startswith("def "):
71
+ name = stripped[4:].split()[0].rstrip("()")
72
+ if not name:
73
+ diagnostics.append(
74
+ Diagnostic(
75
+ severity="warning",
76
+ file=ctx.relative_path,
77
+ range=Range(start_line=line_no, end_line=line_no),
78
+ code="EXAMPLE002",
79
+ message="empty def name",
80
+ producer=self.name,
81
+ )
82
+ )
83
+ continue
84
+ sid = _make_symbol_id(ctx.relative_path, name)
85
+ current_range = Range(start_line=line_no, end_line=line_no)
86
+ symbols.append(
87
+ Symbol(
88
+ id=sid,
89
+ kind="function",
90
+ language=LANG,
91
+ file=ctx.relative_path,
92
+ range=current_range,
93
+ signature=f"def {name}()",
94
+ )
95
+ )
96
+ current_function = sid
97
+ continue
98
+ if "call " in stripped:
99
+ target_name = stripped.split("call ", 1)[1].split()[0]
100
+ if current_function is None:
101
+ diagnostics.append(
102
+ Diagnostic(
103
+ severity="warning",
104
+ file=ctx.relative_path,
105
+ range=Range(start_line=line_no, end_line=line_no),
106
+ code="EXAMPLE003",
107
+ message=f"'call {target_name}' outside any def",
108
+ producer=self.name,
109
+ )
110
+ )
111
+ continue
112
+ target_id = _make_symbol_id(ctx.relative_path, target_name)
113
+ edges.append(
114
+ Edge(
115
+ source=current_function,
116
+ target=target_id,
117
+ kind="calls",
118
+ location=Range(start_line=line_no, end_line=line_no),
119
+ )
120
+ )
121
+
122
+ return IndexResult(symbols=symbols, edges=edges, diagnostics=diagnostics)
123
+
124
+
125
+ def _make_symbol_id(_file: PurePosixPath, function_name: str) -> SymbolID:
126
+ # The reference language uses a single global namespace so that cross-file
127
+ # ``call`` references resolve to the same SymbolID. Real-world indexers
128
+ # generally encode the file/module into the namespace.
129
+ return SymbolID(
130
+ scheme=SCHEME,
131
+ descriptors=(Descriptor(name=function_name, kind=DescriptorKind.METHOD),),
132
+ )
133
+
134
+
135
+ __all__ = ["LANG", "SCHEME", "ExampleLangIndexer"]