PyPI - codebase-mcp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codebase-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

codebase_mcp/__init__.py +3 -0
codebase_mcp/__main__.py +524 -0
codebase_mcp/config.py +211 -0
codebase_mcp/db.py +541 -0
codebase_mcp/exporter.py +243 -0
codebase_mcp/handoff.py +317 -0
codebase_mcp/indexer.py +415 -0
codebase_mcp/models.py +46 -0
codebase_mcp/parsers/__init__.py +15 -0
codebase_mcp/parsers/base.py +157 -0
codebase_mcp/parsers/config_parsers.py +462 -0
codebase_mcp/parsers/generic.py +95 -0
codebase_mcp/parsers/go.py +222 -0
codebase_mcp/parsers/python.py +231 -0
codebase_mcp/parsers/rust.py +205 -0
codebase_mcp/parsers/typescript.py +303 -0
codebase_mcp/parsers/universal.py +625 -0
codebase_mcp/server.py +1291 -0
codebase_mcp/watcher.py +169 -0
codebase_mcp/webui.py +611 -0
codebase_mcp-0.1.0.dist-info/METADATA +424 -0
codebase_mcp-0.1.0.dist-info/RECORD +24 -0
codebase_mcp-0.1.0.dist-info/WHEEL +4 -0
codebase_mcp-0.1.0.dist-info/entry_points.txt +2 -0

codebase_mcp/parsers/config_parsers.py ADDED Viewed

@@ -0,0 +1,462 @@
+"""
+Lightweight parsers for non-code files:
+  - Markdown (.md)      → headings as symbols
+  - JSON (.json)        → top-level keys as symbols
+  - TOML (.toml)        → sections + keys as symbols
+  - YAML (.yaml, .yml)  → top-level keys as symbols
+  - Makefile            → targets as symbols
+  - Dockerfile          → stages and major instructions as symbols
+  - .env                → key names ONLY (never values) as symbols
+  - .proto (Protocol Buffers) → messages, enums, services as symbols
+  - .graphql / .gql     → types, queries, mutations as symbols
+  - .sql                → CREATE TABLE/FUNCTION/PROCEDURE/VIEW as symbols
+These use stdlib + regex, NOT tree-sitter, so they work without any extra packages.
+"""
+from __future__ import annotations
+import json
+import re
+from ..models import ParseResult, ParsedSymbol, ParsedImport
+from .base import BaseParser, register
+# ─── Markdown ─────────────────────────────────────────────────────────────────
+@register("markdown")
+class MarkdownParser(BaseParser):
+    language_name = "markdown"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="markdown", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            m = re.match(r'^(#{1,6})\s+(.+)', line)
+            if m:
+                level = len(m.group(1))
+                heading = m.group(2).strip()
+                result.symbols.append(ParsedSymbol(
+                    kind="heading",
+                    name=heading[:120],
+                    line_start=i,
+                    line_end=i,
+                    signature=line.strip()[:120],
+                    is_exported=True,
+                ))
+            # Also extract code fence language hints as import-like refs
+            m2 = re.match(r'^```(\w+)', line)
+            if m2:
+                result.imports.append(ParsedImport(
+                    imported_name=m2.group(1),
+                    line_number=i,
+                    import_kind="from",
+                ))
+        return result
+# ─── JSON ─────────────────────────────────────────────────────────────────────
+@register("json")
+class JSONParser(BaseParser):
+    language_name = "json"
+    @staticmethod
+    def _strip_comments(text: str) -> str:
+        """Strip // and /* */ comments for JSONC/tsconfig/launch.json files."""
+        import re
+        # Remove block comments /* ... */
+        text = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
+        # Remove line comments // ... (not inside strings — best-effort)
+        text = re.sub(r'(?<!:)//[^\n]*', '', text)
+        # Remove trailing commas before } or ] (common in JSONC)
+        text = re.sub(r',(\s*[}\]])', r'\1', text)
+        return text
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="json", line_count=content.count("\n") + 1)
+        try:
+            data = json.loads(content)
+        except (json.JSONDecodeError, ValueError):
+            # Retry as JSONC (JSON with comments) — tsconfig.json, launch.json, etc.
+            try:
+                data = json.loads(self._strip_comments(content))
+            except (json.JSONDecodeError, ValueError):
+                result.parse_error = "Invalid JSON"
+                return result
+        if not isinstance(data, dict):
+            return result
+        # package.json special handling
+        is_package_json = path.endswith("package.json")
+        for key, value in data.items():
+            kind = "variable"
+            if key == "scripts" and isinstance(value, dict):
+                for script_name in value:
+                    result.symbols.append(ParsedSymbol(
+                        kind="function",
+                        name=f"script:{script_name}",
+                        line_start=1,
+                        line_end=1,
+                        signature=f"npm run {script_name}",
+                    ))
+                continue
+            if key in ("dependencies", "devDependencies", "peerDependencies") and isinstance(value, dict):
+                for dep in value:
+                    result.imports.append(ParsedImport(
+                        imported_name=dep,
+                        line_number=1,
+                        import_kind="module",
+                    ))
+                continue
+            val_preview = json.dumps(value)[:60] if value is not None else "null"
+            result.symbols.append(ParsedSymbol(
+                kind="variable",
+                name=key,
+                line_start=1,
+                line_end=1,
+                signature=f"{key}: {val_preview}",
+            ))
+        return result
+# ─── TOML ─────────────────────────────────────────────────────────────────────
+@register("toml")
+class TOMLParser(BaseParser):
+    language_name = "toml"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="toml", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        current_section = ""
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            # [section] or [[array]]
+            m = re.match(r'^\[{1,2}([^\]]+)\]{1,2}', stripped)
+            if m:
+                current_section = m.group(1).strip()
+                result.symbols.append(ParsedSymbol(
+                    kind="class",
+                    name=current_section,
+                    line_start=i,
+                    line_end=i,
+                    signature=stripped,
+                ))
+                continue
+            # key = value
+            m2 = re.match(r'^([a-zA-Z_][a-zA-Z0-9_\-\.]*)\s*=', stripped)
+            if m2:
+                key = m2.group(1)
+                full_key = f"{current_section}.{key}" if current_section else key
+                result.symbols.append(ParsedSymbol(
+                    kind="variable",
+                    name=key,
+                    line_start=i,
+                    line_end=i,
+                    signature=stripped[:80],
+                    parent_name=current_section if current_section else None,
+                ))
+        return result
+# ─── YAML ─────────────────────────────────────────────────────────────────────
+@register("yaml")
+class YAMLParser(BaseParser):
+    language_name = "yaml"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="yaml", line_count=content.count("\n") + 1)
+        # Try PyYAML if available
+        try:
+            import yaml
+            try:
+                data = yaml.safe_load(content)
+                if isinstance(data, dict):
+                    self._extract_yaml_keys(data, result, depth=0, max_depth=2)
+                return result
+            except yaml.YAMLError:
+                pass
+        except ImportError:
+            pass
+        # Fallback: regex-based top-level key extraction
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            m = re.match(r'^([a-zA-Z_][a-zA-Z0-9_\-]*):', line)
+            if m:
+                result.symbols.append(ParsedSymbol(
+                    kind="variable",
+                    name=m.group(1),
+                    line_start=i,
+                    line_end=i,
+                    signature=line.strip()[:80],
+                ))
+        return result
+    def _extract_yaml_keys(self, data: dict, result: ParseResult,
+                            depth: int, max_depth: int, parent: str = "") -> None:
+        for key, value in data.items():
+            name = str(key)
+            result.symbols.append(ParsedSymbol(
+                kind="class" if isinstance(value, dict) else "variable",
+                name=name,
+                line_start=1,
+                line_end=1,
+                signature=f"{name}: {str(value)[:60]}" if not isinstance(value, dict) else f"{name}:",
+                parent_name=parent or None,
+            ))
+            if isinstance(value, dict) and depth < max_depth:
+                self._extract_yaml_keys(value, result, depth + 1, max_depth, name)
+# ─── Makefile ─────────────────────────────────────────────────────────────────
+@register("makefile")
+class MakefileParser(BaseParser):
+    language_name = "makefile"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="makefile", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            # Target: target_name: [dependencies]
+            m = re.match(r'^([a-zA-Z_][a-zA-Z0-9_\-\./]*)(?:\s*:(?!=))', line)
+            if m and not line.startswith("\t"):
+                target = m.group(1)
+                if not target.startswith(".") or target in (".PHONY", ".DEFAULT", ".SILENT"):
+                    result.symbols.append(ParsedSymbol(
+                        kind="function",
+                        name=target,
+                        line_start=i,
+                        line_end=i,
+                        signature=line.strip()[:120],
+                        is_exported=not target.startswith("_"),
+                    ))
+            # Variable definitions
+            m2 = re.match(r'^([A-Z_][A-Z0-9_]*)\s*[:?!]?=', line)
+            if m2:
+                result.symbols.append(ParsedSymbol(
+                    kind="variable",
+                    name=m2.group(1),
+                    line_start=i,
+                    line_end=i,
+                    signature=line.strip()[:80],
+                ))
+        return result
+# ─── Dockerfile ───────────────────────────────────────────────────────────────
+@register("dockerfile")
+class DockerfileParser(BaseParser):
+    language_name = "dockerfile"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="dockerfile", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        stage_count = 0
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            upper = stripped.upper()
+            # FROM image AS stage_name
+            m = re.match(r'FROM\s+\S+(?:\s+AS\s+(\S+))?', stripped, re.IGNORECASE)
+            if m:
+                stage_name = m.group(1) or f"stage_{stage_count}"
+                stage_count += 1
+                result.symbols.append(ParsedSymbol(
+                    kind="class",
+                    name=stage_name,
+                    line_start=i,
+                    line_end=i,
+                    signature=stripped[:120],
+                ))
+                continue
+            # ARG / ENV variable declarations
+            for keyword in ("ARG", "ENV"):
+                if upper.startswith(keyword + " "):
+                    m2 = re.match(rf'{keyword}\s+([A-Z_][A-Z0-9_]*)', stripped, re.IGNORECASE)
+                    if m2:
+                        result.symbols.append(ParsedSymbol(
+                            kind="variable",
+                            name=m2.group(1),
+                            line_start=i,
+                            line_end=i,
+                            signature=stripped[:80],
+                        ))
+        return result
+# ─── .env files ───────────────────────────────────────────────────────────────
+@register("env")
+class EnvParser(BaseParser):
+    language_name = "env"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="env", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            m = re.match(r'^export\s+([A-Za-z_][A-Za-z0-9_]*)=?', stripped)
+            if not m:
+                m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*)=', stripped)
+            if m:
+                key_name = m.group(1)
+                result.symbols.append(ParsedSymbol(
+                    kind="variable",
+                    name=key_name,
+                    line_start=i,
+                    line_end=i,
+                    # NEVER store the value — only the key name
+                    signature=f"{key_name}=<value>",
+                    is_exported=True,
+                ))
+        return result
+# ─── Protocol Buffers (.proto) ────────────────────────────────────────────────
+@register("proto")
+class ProtoParser(BaseParser):
+    language_name = "proto"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="proto", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            for keyword, kind in [("message", "class"), ("enum", "enum"),
+                                   ("service", "interface"), ("rpc", "function")]:
+                m = re.match(rf'^{keyword}\s+(\w+)', stripped)
+                if m:
+                    result.symbols.append(ParsedSymbol(
+                        kind=kind,
+                        name=m.group(1),
+                        line_start=i,
+                        line_end=i,
+                        signature=stripped[:120],
+                    ))
+            # import "foo.proto"
+            m2 = re.match(r'^import\s+"([^"]+)"', stripped)
+            if m2:
+                result.imports.append(ParsedImport(imported_name=m2.group(1), line_number=i))
+        return result
+# ─── GraphQL (.graphql / .gql) ───────────────────────────────────────────────
+@register("graphql")
+class GraphQLParser(BaseParser):
+    language_name = "graphql"
+    _KEYWORDS = {
+        "type": "class", "interface": "interface", "enum": "enum",
+        "union": "type_alias", "input": "struct", "scalar": "type_alias",
+        "query": "function", "mutation": "function", "subscription": "function",
+        "fragment": "function", "directive": "function", "schema": "class",
+        "extend": "class",
+    }
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="graphql", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            for kw, kind in self._KEYWORDS.items():
+                m = re.match(rf'^{kw}\s+(\w+)', stripped)
+                if m:
+                    result.symbols.append(ParsedSymbol(
+                        kind=kind,
+                        name=m.group(1),
+                        line_start=i,
+                        line_end=i,
+                        signature=stripped[:120],
+                    ))
+                    break
+        return result
+# ─── SQL ──────────────────────────────────────────────────────────────────────
+@register("sql")
+class SQLParser(BaseParser):
+    language_name = "sql"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="sql", line_count=content.count("\n") + 1)
+        # Multi-line so we join and scan
+        combined = " ".join(l.strip() for l in content.splitlines() if not l.strip().startswith("--"))
+        patterns = [
+            (r'CREATE\s+(?:OR\s+REPLACE\s+)?TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?([`"\[\w\.]+)', "class"),
+            (r'CREATE\s+(?:OR\s+REPLACE\s+)?(?:DEFINER[^F]*)?FUNCTION\s+([`"\[\w\.]+)', "function"),
+            (r'CREATE\s+(?:OR\s+REPLACE\s+)?PROCEDURE\s+([`"\[\w\.]+)', "function"),
+            (r'CREATE\s+(?:OR\s+REPLACE\s+)?VIEW\s+([`"\[\w\.]+)', "class"),
+            (r'CREATE\s+(?:UNIQUE\s+)?INDEX\s+(\w+)', "variable"),
+            (r'CREATE\s+(?:OR\s+REPLACE\s+)?TRIGGER\s+(\w+)', "function"),
+        ]
+        for pattern, kind in patterns:
+            for m in re.finditer(pattern, combined, re.IGNORECASE):
+                name = m.group(1).strip('`"[]')
+                result.symbols.append(ParsedSymbol(
+                    kind=kind,
+                    name=name,
+                    line_start=1,
+                    line_end=1,
+                    signature=combined[m.start():m.start() + 80].strip(),
+                ))
+        return result
+# ─── HCL / Terraform (.tf) ───────────────────────────────────────────────────
+@register("hcl")
+class HCLParser(BaseParser):
+    language_name = "hcl"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="hcl", line_count=content.count("\n") + 1)
+        lines = content.splitlines()
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            # resource "type" "name" {
+            m = re.match(r'^(\w+)\s+"([^"]+)"\s+"([^"]+)"\s*\{?', stripped)
+            if m:
+                block_type, res_type, res_name = m.groups()
+                result.symbols.append(ParsedSymbol(
+                    kind="class" if block_type == "resource" else "function",
+                    name=f"{res_type}.{res_name}",
+                    line_start=i,
+                    line_end=i,
+                    signature=stripped[:120],
+                ))
+                continue
+            # variable "name" {, output "name" {, module "name" {
+            m2 = re.match(r'^(\w+)\s+"([^"]+)"\s*\{?', stripped)
+            if m2:
+                block_type, name = m2.groups()
+                if block_type in ("variable", "output", "module", "data", "locals",
+                                   "provider", "terraform"):
+                    result.symbols.append(ParsedSymbol(
+                        kind="variable" if block_type in ("variable", "output") else "class",
+                        name=f"{block_type}.{name}" if block_type not in ("variable", "output") else name,
+                        line_start=i,
+                        line_end=i,
+                        signature=stripped[:120],
+                    ))
+        return result

codebase_mcp/parsers/generic.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""
+Generic regex-based fallback parser.
+Used for languages without a tree-sitter grammar, or when tree-sitter is unavailable.
+Extracts function/class-like patterns with simple regex — better than nothing.
+"""
+from __future__ import annotations
+import re
+from ..models import ParseResult, ParsedSymbol, ParsedImport
+from .base import BaseParser, register
+# Patterns that work across most C-style and Python-style languages
+_FUNC_PATTERNS = [
+    # Python
+    re.compile(r"^\s*(?:async\s+)?def\s+(\w+)\s*\(", re.M),
+    # JS/TS
+    re.compile(r"^\s*(?:async\s+)?function\s+(\w+)\s*\(", re.M),
+    re.compile(r"^\s*(?:export\s+)?(?:async\s+)?(?:function\s+)?(\w+)\s*=\s*(?:async\s*)?\(", re.M),
+    # Go
+    re.compile(r"^func\s+(?:\(\w+\s+\*?\w+\)\s+)?(\w+)\s*\(", re.M),
+    # Rust
+    re.compile(r"^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)\s*[(<]", re.M),
+    # Java/C#
+    re.compile(r"^\s*(?:public|private|protected|static|async)[\s\w]*\s+(\w+)\s*\(", re.M),
+]
+_CLASS_PATTERNS = [
+    re.compile(r"^\s*class\s+(\w+)", re.M),
+    re.compile(r"^\s*(?:pub\s+)?struct\s+(\w+)", re.M),
+    re.compile(r"^\s*(?:pub\s+)?enum\s+(\w+)", re.M),
+    re.compile(r"^\s*(?:pub\s+)?trait\s+(\w+)", re.M),
+    re.compile(r"^\s*interface\s+(\w+)", re.M),
+]
+_IMPORT_PATTERNS = [
+    re.compile(r"^(?:import|from)\s+([\w.]+)", re.M),
+    re.compile(r'^import\s+"([\w./]+)"', re.M),
+    re.compile(r'^use\s+([\w:]+)', re.M),
+    re.compile(r'^require\s*\(\s*["\']([^"\']+)', re.M),
+    re.compile(r'^import\s+.*\s+from\s+["\']([^"\']+)', re.M),
+]
+class GenericParser(BaseParser):
+    language_name = "generic"
+    def parse(self, path: str, content: str) -> ParseResult:
+        result = ParseResult(language="generic")
+        lines = content.splitlines()
+        result.line_count = len(lines)
+        seen_names: set[str] = set()
+        for pat in _FUNC_PATTERNS:
+            for m in pat.finditer(content):
+                name = m.group(1)
+                if name and name not in seen_names and not name.startswith("_"):
+                    seen_names.add(name)
+                    line_no = content[:m.start()].count("\n") + 1
+                    result.symbols.append(ParsedSymbol(
+                        kind="function",
+                        name=name,
+                        line_start=line_no,
+                        line_end=line_no,
+                        signature=lines[line_no - 1].strip() if line_no <= len(lines) else "",
+                    ))
+        for pat in _CLASS_PATTERNS:
+            for m in pat.finditer(content):
+                name = m.group(1)
+                if name and name not in seen_names:
+                    seen_names.add(name)
+                    line_no = content[:m.start()].count("\n") + 1
+                    result.symbols.append(ParsedSymbol(
+                        kind="class",
+                        name=name,
+                        line_start=line_no,
+                        line_end=line_no,
+                        signature=lines[line_no - 1].strip() if line_no <= len(lines) else "",
+                    ))
+        seen_imports: set[str] = set()
+        for pat in _IMPORT_PATTERNS:
+            for m in pat.finditer(content):
+                name = m.group(1)
+                if name and name not in seen_imports:
+                    seen_imports.add(name)
+                    line_no = content[:m.start()].count("\n") + 1
+                    result.imports.append(ParsedImport(
+                        imported_name=name,
+                        line_number=line_no,
+                    ))
+        return result