PyPI - codemap-core - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codemap-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

codemap/__init__.py +7 -0
codemap/cli/__init__.py +3 -0
codemap/cli/_common.py +90 -0
codemap/cli/commands/__init__.py +3 -0
codemap/cli/commands/callees.py +102 -0
codemap/cli/commands/callers.py +107 -0
codemap/cli/commands/config.py +78 -0
codemap/cli/commands/diagnostics.py +142 -0
codemap/cli/commands/doctor.py +158 -0
codemap/cli/commands/get.py +93 -0
codemap/cli/commands/index.py +725 -0
codemap/cli/commands/routes.py +104 -0
codemap/cli/commands/search.py +78 -0
codemap/cli/commands/trace.py +179 -0
codemap/cli/main.py +140 -0
codemap/cli/renderers/__init__.py +3 -0
codemap/cli/renderers/json.py +32 -0
codemap/cli/renderers/text.py +24 -0
codemap/config/__init__.py +31 -0
codemap/config/loader.py +96 -0
codemap/config/schema.py +122 -0
codemap/core/__init__.py +7 -0
codemap/core/bridge/__init__.py +8 -0
codemap/core/bridge/base.py +38 -0
codemap/core/bridge/http_route.py +374 -0
codemap/core/bridge/python_cross_module.py +120 -0
codemap/core/bridge/registry.py +117 -0
codemap/core/graph.py +183 -0
codemap/core/models.py +299 -0
codemap/core/store.py +78 -0
codemap/core/symbol.py +314 -0
codemap/diagnostics/__init__.py +3 -0
codemap/diagnostics/exit_codes.py +30 -0
codemap/diagnostics/logging.py +65 -0
codemap/diagnostics/progress.py +68 -0
codemap/indexers/__init__.py +9 -0
codemap/indexers/_example_lang.py +135 -0
codemap/indexers/base.py +77 -0
codemap/indexers/python.py +577 -0
codemap/indexers/registry.py +104 -0
codemap/io/__init__.py +8 -0
codemap/io/atomic.py +97 -0
codemap/io/base.py +12 -0
codemap/io/json_store.py +433 -0
codemap/io/lock.py +87 -0
codemap/io/manifest.py +90 -0
codemap/mcp/__init__.py +3 -0
codemap_core-0.1.0.dist-info/METADATA +480 -0
codemap_core-0.1.0.dist-info/RECORD +52 -0
codemap_core-0.1.0.dist-info/WHEEL +4 -0
codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0

codemap/core/symbol.py ADDED Viewed

@@ -0,0 +1,314 @@
+"""SymbolID — SCIP-compatible cross-language symbol identifier.
+This is the foundational data type for the entire system (ADR-001). A SymbolID
+is a string-encoded handle that uniquely identifies a symbol across languages,
+file types, and assets. The encoding follows the SCIP Symbol grammar so that
+CodeMap can interoperate with the Sourcegraph SCIP ecosystem.
+Grammar (informal, see SCIP `scip.proto` for the canonical reference)::
+    <symbol>          ::= <scheme> ' ' <manager> ' ' <package_name>
+                          ' ' <package_version> ' ' <descriptor>+
+    <scheme>          ::= 'local' | <identifier>     ; e.g. 'scip-python'
+    <descriptor>      ::= <namespace> | <type> | <term> | <method>
+                          | <type_parameter> | <parameter> | <meta>
+    <namespace>       ::= <name> '/'
+    <type>            ::= <name> '#'
+    <term>            ::= <name> '.'
+    <method>          ::= <name> '(' <disambiguator>? ').'
+    <type_parameter>  ::= '[' <name> ']'
+    <parameter>       ::= '(' <name> ')'
+    <meta>            ::= <name> ':'
+    <name>            ::= <simple-identifier> | <escaped-identifier>
+    <simple-identifier> ::= one or more of [A-Za-z0-9_+$.-]
+    <escaped-identifier> ::= '`' (any char, '``' escapes a literal backtick) '`'
+Invariants enforced here:
+* ``SymbolID.parse(s).to_string() == s`` (round-trip).
+* The header (scheme/manager/package/version) is exactly four space-separated
+  tokens followed by a single space and the descriptor stream.
+* Empty header fields use the placeholder ``'.'`` (matching SCIP convention).
+The module is intentionally dependency-free except for ``pydantic-core`` for
+the Pydantic v2 integration (see ``__get_pydantic_core_schema__``).
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import StrEnum
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from pydantic import GetCoreSchemaHandler
+    from pydantic_core import CoreSchema
+# ---------------------------------------------------------------------------
+# Public types
+# ---------------------------------------------------------------------------
+class DescriptorKind(StrEnum):
+    """SCIP descriptor kinds, identified by their trailing suffix syntax."""
+    NAMESPACE = "namespace"  # ``name/``
+    TYPE = "type"  # ``name#``
+    TERM = "term"  # ``name.``
+    METHOD = "method"  # ``name(disambig?).``
+    TYPE_PARAMETER = "type_parameter"  # ``[name]``
+    PARAMETER = "parameter"  # ``(name)``
+    META = "meta"  # ``name:``
+@dataclass(frozen=True, slots=True)
+class Descriptor:
+    """One segment of a SymbolID."""
+    name: str
+    kind: DescriptorKind
+    disambiguator: str = ""
+    def __post_init__(self) -> None:
+        if self.disambiguator and self.kind is not DescriptorKind.METHOD:
+            raise ValueError("disambiguator is only valid for METHOD descriptors")
+    def to_string(self) -> str:
+        n = _encode_name(self.name)
+        if self.kind is DescriptorKind.NAMESPACE:
+            return f"{n}/"
+        if self.kind is DescriptorKind.TYPE:
+            return f"{n}#"
+        if self.kind is DescriptorKind.TERM:
+            return f"{n}."
+        if self.kind is DescriptorKind.META:
+            return f"{n}:"
+        if self.kind is DescriptorKind.METHOD:
+            d = _encode_name(self.disambiguator) if self.disambiguator else ""
+            return f"{n}({d})."
+        if self.kind is DescriptorKind.TYPE_PARAMETER:
+            return f"[{n}]"
+        if self.kind is DescriptorKind.PARAMETER:
+            return f"({n})"
+        raise AssertionError(f"unhandled descriptor kind: {self.kind}")  # pragma: no cover
+@dataclass(frozen=True, slots=True)
+class SymbolID:
+    """A SCIP-encoded symbol identifier.
+    Construct via :meth:`parse` from a serialized string, or by composing
+    descriptors directly. Instances are hashable and value-equal — they may
+    safely live in sets and dict keys.
+    """
+    scheme: str
+    manager: str = "."
+    package_name: str = "."
+    package_version: str = "."
+    descriptors: tuple[Descriptor, ...] = field(default_factory=tuple)
+    # ------------------------------------------------------------------ ctors
+    @classmethod
+    def parse(cls, s: str) -> SymbolID:
+        """Parse a serialized SCIP symbol. Raises :class:`SymbolParseError`."""
+        return _parse_symbol(s)
+    # ----------------------------------------------------------- serialization
+    def to_string(self) -> str:
+        header = f"{self.scheme} {self.manager} {self.package_name} {self.package_version} "
+        body = "".join(d.to_string() for d in self.descriptors)
+        return header + body
+    def __str__(self) -> str:
+        return self.to_string()
+    # --------------------------------------------------- pydantic v2 support
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls,
+        _source_type: Any,
+        _handler: GetCoreSchemaHandler,
+    ) -> CoreSchema:
+        from pydantic_core import core_schema
+        def _validate(v: Any) -> SymbolID:
+            if isinstance(v, cls):
+                return v
+            if isinstance(v, str):
+                return cls.parse(v)
+            raise TypeError(f"cannot convert {type(v).__name__} to SymbolID")
+        return core_schema.no_info_plain_validator_function(
+            _validate,
+            serialization=core_schema.plain_serializer_function_ser_schema(
+                str,
+                when_used="always",
+                return_schema=core_schema.str_schema(),
+            ),
+        )
+class SymbolParseError(ValueError):
+    """Raised when a SCIP symbol string cannot be parsed."""
+# ---------------------------------------------------------------------------
+# Parser internals
+# ---------------------------------------------------------------------------
+_IDENT_EXTRA = frozenset("_$+-.")
+def _is_ident_char(c: str) -> bool:
+    """Per scip-go: simple-identifier chars include alphanumerics + ``_$+-.``."""
+    return c.isalnum() or c in _IDENT_EXTRA
+def _needs_escape(name: str) -> bool:
+    """A name needs backtick-escaping if it is empty or contains non-ident chars."""
+    if not name:
+        return True
+    return any(not _is_ident_char(c) for c in name)
+def _encode_name(name: str) -> str:
+    if not _needs_escape(name):
+        return name
+    return "`" + name.replace("`", "``") + "`"
+def _parse_symbol(s: str) -> SymbolID:
+    if not isinstance(s, str):
+        raise SymbolParseError(f"symbol must be str, got {type(s).__name__}")
+    # Header is 4 space-separated tokens, then ' ', then the descriptor stream.
+    # We split on the first 4 spaces; remaining is the descriptor body
+    # (which may itself contain spaces inside backtick-escaped names).
+    parts = s.split(" ", 4)
+    if len(parts) < 5:
+        raise SymbolParseError(
+            f"invalid SCIP symbol: need at least 5 space-separated fields "
+            f"(scheme manager package version descriptors...), got {len(parts)} in {s!r}"
+        )
+    scheme, manager, pkg, ver, body = parts
+    if not scheme:
+        raise SymbolParseError("scheme must be non-empty")
+    descriptors = tuple(_parse_descriptors(body))
+    if not descriptors:
+        raise SymbolParseError(f"symbol must have at least one descriptor: {s!r}")
+    return SymbolID(
+        scheme=scheme,
+        manager=manager or ".",
+        package_name=pkg or ".",
+        package_version=ver or ".",
+        descriptors=descriptors,
+    )
+def _parse_descriptors(s: str) -> list[Descriptor]:
+    out: list[Descriptor] = []
+    i = 0
+    n = len(s)
+    while i < n:
+        c = s[i]
+        if c == "[":
+            name, j = _read_name(s, i + 1)
+            if j >= n or s[j] != "]":
+                raise SymbolParseError(f"unterminated [type_parameter] at offset {i} in {s!r}")
+            out.append(Descriptor(name=name, kind=DescriptorKind.TYPE_PARAMETER))
+            i = j + 1
+            continue
+        if c == "(":
+            # Bare '(name)' is a PARAMETER descriptor.
+            name, j = _read_name(s, i + 1)
+            if j >= n or s[j] != ")":
+                raise SymbolParseError(f"unterminated (parameter) at offset {i} in {s!r}")
+            out.append(Descriptor(name=name, kind=DescriptorKind.PARAMETER))
+            i = j + 1
+            continue
+        name, j = _read_name(s, i)
+        # Trailing-dot recovery: a greedy identifier may have absorbed a final
+        # '.' which was actually meant to be the term suffix. If we ran off the
+        # end of the input with no remaining suffix character, give back the
+        # final '.' to serve as the suffix.
+        if j == n and name.endswith("."):
+            name = name[:-1]
+            j -= 1
+        if j >= n:
+            raise SymbolParseError(f"missing descriptor suffix after name at offset {i} in {s!r}")
+        suffix = s[j]
+        if suffix == "/":
+            out.append(Descriptor(name=name, kind=DescriptorKind.NAMESPACE))
+            i = j + 1
+        elif suffix == "#":
+            out.append(Descriptor(name=name, kind=DescriptorKind.TYPE))
+            i = j + 1
+        elif suffix == ":":
+            out.append(Descriptor(name=name, kind=DescriptorKind.META))
+            i = j + 1
+        elif suffix == ".":
+            out.append(Descriptor(name=name, kind=DescriptorKind.TERM))
+            i = j + 1
+        elif suffix == "(":
+            disambig, k = _read_name(s, j + 1)
+            if k >= n or s[k] != ")":
+                raise SymbolParseError(f"unterminated method '(' at offset {j} in {s!r}")
+            if k + 1 >= n or s[k + 1] != ".":
+                raise SymbolParseError(
+                    f"method ')' must be followed by '.' at offset {k + 1} in {s!r}"
+                )
+            out.append(
+                Descriptor(
+                    name=name,
+                    kind=DescriptorKind.METHOD,
+                    disambiguator=disambig,
+                )
+            )
+            i = k + 2
+        else:
+            raise SymbolParseError(f"unknown descriptor suffix {suffix!r} at offset {j} in {s!r}")
+    return out
+def _read_name(s: str, i: int) -> tuple[str, int]:
+    """Read a single ``<name>`` token. Returns ``(name, next_index)``.
+    Supports both simple identifiers and backtick-escaped names. Inside a
+    backtick-escaped name, ``''`` (two backticks) encodes a literal backtick.
+    """
+    n = len(s)
+    if i >= n:
+        return "", i
+    if s[i] == "`":
+        return _read_escaped(s, i + 1)
+    j = i
+    while j < n and _is_ident_char(s[j]):
+        j += 1
+    return s[i:j], j
+def _read_escaped(s: str, i: int) -> tuple[str, int]:
+    n = len(s)
+    buf: list[str] = []
+    j = i
+    while j < n:
+        if s[j] == "`":
+            if j + 1 < n and s[j + 1] == "`":
+                buf.append("`")
+                j += 2
+                continue
+            return "".join(buf), j + 1
+        buf.append(s[j])
+        j += 1
+    raise SymbolParseError(f"unterminated escaped name starting at offset {i - 1}")
+__all__ = [
+    "Descriptor",
+    "DescriptorKind",
+    "SymbolID",
+    "SymbolParseError",
+]

codemap/diagnostics/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Cross-cutting diagnostics: exit codes, logging, progress."""
+from __future__ import annotations

codemap/diagnostics/exit_codes.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""sysexits.h-compatible exit codes (ADR-005).
+A subset of BSD sysexits used consistently across CLI entry points so callers
+(scripts, CI, MCP) can branch on specific failure modes without parsing stderr.
+"""
+from __future__ import annotations
+from enum import IntEnum
+class ExitCode(IntEnum):
+    """Codes returned by the CodeMap CLI."""
+    OK = 0
+    GENERIC_ERROR = 1
+    USAGE_ERROR = 64  # EX_USAGE
+    DATA_ERROR = 65  # EX_DATAERR — corrupt index, incompatible schema
+    NO_INPUT = 66  # EX_NOINPUT — `.codemap/` missing or no source files
+    NO_USER = 67  # EX_NOUSER (reserved)
+    UNAVAILABLE = 69  # EX_UNAVAILABLE (reserved)
+    INTERNAL_BUG = 70  # EX_SOFTWARE
+    OS_ERROR = 71  # EX_OSERR (reserved)
+    CANT_CREATE = 73  # EX_CANTCREAT — write failed
+    IO_ERROR = 74  # EX_IOERR (reserved)
+    TEMP_FAIL = 75  # EX_TEMPFAIL — lock contention timeout
+    CONFIG_ERROR = 78  # EX_CONFIG
+__all__ = ["ExitCode"]

codemap/diagnostics/logging.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Logging configuration for the CLI.
+Defaults to WARNING. ``-v`` raises to INFO; ``-vv`` raises to DEBUG. Output
+goes to stderr through ``rich.logging.RichHandler`` for readability, or to a
+JSON line per record when ``--log-format json`` is set.
+"""
+from __future__ import annotations
+import json
+import logging
+import sys
+import traceback
+from typing import Literal
+from rich.console import Console
+from rich.logging import RichHandler
+LogFormat = Literal["text", "json"]
+class _JsonHandler(logging.Handler):
+    """Emit one JSON object per log record on stderr."""
+    def emit(self, record: logging.LogRecord) -> None:
+        payload = {
+            "ts": int(record.created * 1000),
+            "level": record.levelname,
+            "logger": record.name,
+            "msg": record.getMessage(),
+        }
+        if record.exc_info:
+            payload["exc_info"] = "".join(traceback.format_exception(*record.exc_info))
+        sys.stderr.write(json.dumps(payload, ensure_ascii=False) + "\n")
+        sys.stderr.flush()
+def configure_logging(verbosity: int = 0, *, log_format: LogFormat = "text") -> None:
+    """Install root handlers idempotently."""
+    if verbosity <= 0:
+        level = logging.WARNING
+    elif verbosity == 1:
+        level = logging.INFO
+    else:
+        level = logging.DEBUG
+    root = logging.getLogger()
+    for h in list(root.handlers):
+        root.removeHandler(h)
+    if log_format == "json":
+        root.addHandler(_JsonHandler())
+    else:
+        root.addHandler(
+            RichHandler(
+                console=Console(stderr=True),
+                show_time=False,
+                show_path=False,
+                markup=False,
+                rich_tracebacks=True,
+            )
+        )
+    root.setLevel(level)
+__all__ = ["LogFormat", "configure_logging"]

codemap/diagnostics/progress.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Thin wrapper over ``rich.progress`` used by the indexing pipeline."""
+from __future__ import annotations
+from collections.abc import Iterator
+from contextlib import contextmanager
+from typing import TypeVar
+from rich.console import Console
+from rich.progress import (
+    BarColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+    TimeElapsedColumn,
+)
+T = TypeVar("T")
+@contextmanager
+def progress_bar(
+    description: str,
+    total: int | None = None,
+    *,
+    enabled: bool = True,
+    console: Console | None = None,
+) -> Iterator[Progress]:
+    """Yield a configured ``rich.Progress`` (or a silent no-op when disabled)."""
+    if not enabled:
+        yield _SilentProgress()  # type: ignore[misc]
+        return
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(bar_width=30),
+        TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+        TimeElapsedColumn(),
+        console=console or Console(stderr=True),
+        transient=True,
+    ) as p:
+        p.add_task(description, total=total)
+        yield p
+class _SilentProgress:
+    """Stand-in that swallows update calls when ``--no-progress`` is set."""
+    def __enter__(self) -> _SilentProgress:
+        return self
+    def __exit__(self, *exc: object) -> None:  # pragma: no cover
+        return None
+    def add_task(self, *_a: object, **_kw: object) -> int:  # pragma: no cover
+        return 0
+    def update(self, *_a: object, **_kw: object) -> None:  # pragma: no cover
+        return None
+    def advance(self, *_a: object, **_kw: object) -> None:  # pragma: no cover
+        return None
+    def stop(self) -> None:  # pragma: no cover
+        return None
+__all__ = ["progress_bar"]

codemap/indexers/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Pluggable indexer registry.
+Built-in indexers are registered through `pyproject.toml`'s `codemap.indexers`
+entry-point group on equal footing with third-party plugins (ADR-004, ADR-L001).
+No language is privileged; the main repository ships only a reference
+`_example_lang` indexer used to validate the end-to-end pipeline.
+"""
+from __future__ import annotations

codemap/indexers/_example_lang.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Reference indexer used to validate the end-to-end pipeline.
+Files ending in ``.example`` are treated as a tiny pseudo-language:
+* Each line starting with ``def NAME`` declares a function symbol named
+  ``NAME`` at that line.
+* Each line containing ``call NAME`` records a ``calls`` edge from the most
+  recent ``def`` to ``NAME``.
+The indexer is intentionally minimal — it exists so Sprint 0 can prove that
+the storage, registry, and CLI layers compose end-to-end without depending on
+any real-language parser. It is published through the ``codemap.indexers``
+entry-point and registers on equal footing with third-party indexers.
+"""
+from __future__ import annotations
+from pathlib import Path, PurePosixPath
+from typing import ClassVar
+from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
+from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
+from codemap.indexers.base import IndexContext
+LANG = "example"
+SCHEME = "scip-example"
+class ExampleLangIndexer:
+    name: ClassVar[str] = "_example_lang"
+    version: ClassVar[str] = "0.1.0"
+    file_patterns: ClassVar[list[str]] = ["*.example"]
+    languages: ClassVar[list[str]] = [LANG]
+    def supports(self, path: Path) -> bool:
+        return path.suffix == ".example"
+    def index_file(
+        self,
+        path: Path,
+        source: bytes,
+        ctx: IndexContext,
+    ) -> IndexResult:
+        try:
+            text = source.decode("utf-8")
+        except UnicodeDecodeError as exc:
+            return IndexResult(
+                diagnostics=[
+                    Diagnostic(
+                        severity="error",
+                        file=ctx.relative_path,
+                        code="EXAMPLE001",
+                        message=f"not valid UTF-8: {exc}",
+                        producer=self.name,
+                    )
+                ]
+            )
+        symbols: list[Symbol] = []
+        edges: list[Edge] = []
+        diagnostics: list[Diagnostic] = []
+        current_function: SymbolID | None = None
+        current_range: Range | None = None
+        for line_no, raw in enumerate(text.splitlines(), start=1):
+            stripped = raw.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            if stripped.startswith("def "):
+                name = stripped[4:].split()[0].rstrip("()")
+                if not name:
+                    diagnostics.append(
+                        Diagnostic(
+                            severity="warning",
+                            file=ctx.relative_path,
+                            range=Range(start_line=line_no, end_line=line_no),
+                            code="EXAMPLE002",
+                            message="empty def name",
+                            producer=self.name,
+                        )
+                    )
+                    continue
+                sid = _make_symbol_id(ctx.relative_path, name)
+                current_range = Range(start_line=line_no, end_line=line_no)
+                symbols.append(
+                    Symbol(
+                        id=sid,
+                        kind="function",
+                        language=LANG,
+                        file=ctx.relative_path,
+                        range=current_range,
+                        signature=f"def {name}()",
+                    )
+                )
+                current_function = sid
+                continue
+            if "call " in stripped:
+                target_name = stripped.split("call ", 1)[1].split()[0]
+                if current_function is None:
+                    diagnostics.append(
+                        Diagnostic(
+                            severity="warning",
+                            file=ctx.relative_path,
+                            range=Range(start_line=line_no, end_line=line_no),
+                            code="EXAMPLE003",
+                            message=f"'call {target_name}' outside any def",
+                            producer=self.name,
+                        )
+                    )
+                    continue
+                target_id = _make_symbol_id(ctx.relative_path, target_name)
+                edges.append(
+                    Edge(
+                        source=current_function,
+                        target=target_id,
+                        kind="calls",
+                        location=Range(start_line=line_no, end_line=line_no),
+                    )
+                )
+        return IndexResult(symbols=symbols, edges=edges, diagnostics=diagnostics)
+def _make_symbol_id(_file: PurePosixPath, function_name: str) -> SymbolID:
+    # The reference language uses a single global namespace so that cross-file
+    # ``call`` references resolve to the same SymbolID. Real-world indexers
+    # generally encode the file/module into the namespace.
+    return SymbolID(
+        scheme=SCHEME,
+        descriptors=(Descriptor(name=function_name, kind=DescriptorKind.METHOD),),
+    )
+__all__ = ["LANG", "SCHEME", "ExampleLangIndexer"]