PyPI - optulus - Versions diffs - 0.1.0__py3-none-any.whl - Mend

optulus 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

contextforge/__init__.py +4 -0
contextforge/cli.py +185 -0
contextforge/graph/__init__.py +0 -0
contextforge/graph/schema.py +50 -0
contextforge/graph/store.py +224 -0
contextforge/indexer/__init__.py +0 -0
contextforge/indexer/embedder.py +64 -0
contextforge/indexer/parser.py +166 -0
contextforge/indexer/run.py +93 -0
contextforge/indexer/tree_sitter_extract.py +273 -0
contextforge/indexer/walker.py +61 -0
contextforge/indexer/watch.py +49 -0
contextforge/indexer/writer.py +18 -0
contextforge/mcp/__init__.py +0 -0
contextforge/mcp/__main__.py +31 -0
contextforge/mcp/indexing.py +65 -0
contextforge/mcp/server.py +127 -0
contextforge/query/__init__.py +0 -0
contextforge/query/context.py +65 -0
contextforge/query/expand.py +17 -0
contextforge/query/rank.py +71 -0
contextforge/query/search.py +76 -0
contextforge/utils/__init__.py +0 -0
contextforge/utils/config.py +69 -0
contextforge/utils/repo.py +41 -0
contextforge/utils/tokens.py +11 -0
optulus-0.1.0.dist-info/METADATA +138 -0
optulus-0.1.0.dist-info/RECORD +32 -0
optulus-0.1.0.dist-info/WHEEL +5 -0
optulus-0.1.0.dist-info/entry_points.txt +2 -0
optulus-0.1.0.dist-info/licenses/LICENSE +201 -0
optulus-0.1.0.dist-info/top_level.txt +1 -0

contextforge/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""ContextForge package."""
+__all__ = ["__version__"]
+__version__ = "0.1.0"

contextforge/cli.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""Typer CLI entrypoints."""
+from __future__ import annotations
+import json
+import time
+from pathlib import Path
+import typer
+from rich.console import Console
+from rich.table import Table
+from contextforge.graph.store import GraphStore
+from contextforge.indexer.run import run_full_index
+from contextforge.query.context import build_context
+from contextforge.utils.config import load_repo_config
+app = typer.Typer(help="ContextForge CLI")
+console = Console(stderr=True)
+def _err(msg: str) -> None:
+    console.print(f"[red]Error:[/red] {msg}")
+    raise typer.Exit(1)
+def _repo_root(path: str) -> Path:
+    return Path(path).resolve()
+def _require_index(repo_root: Path) -> Path:
+    cf_dir = repo_root / ".cf"
+    db_path = cf_dir / "index.db"
+    if not cf_dir.exists():
+        console.print("[yellow]ContextForge is not initialized in this repo yet.[/yellow]")
+        _err(
+            f"No index at {db_path}. Run: cf index <repo> (creates <repo>/.cf/) then cd into that repo for cf context."
+        )
+    if not db_path.exists():
+        _err(
+            f"No index at {db_path}. Run: cf index <repo> (creates <repo>/.cf/) then cd into that repo for cf context."
+        )
+    return db_path
+@app.command()
+def index(
+    path: str,
+    watch: bool = typer.Option(False, "--watch"),
+    langs: str | None = typer.Option(None, "--langs"),
+    reset: bool = typer.Option(False, "--reset"),
+) -> None:
+    repo_root = _repo_root(path)
+    if not repo_root.exists():
+        _err(f"Path does not exist: {repo_root}")
+    config = load_repo_config(repo_root)
+    lang_override: set[str] | None = None
+    if langs:
+        lang_override = {x.strip() for x in langs.split(",") if x.strip()}
+    selected_langs = lang_override if lang_override is not None else set(config.index_languages)
+    run_full_index(repo_root, reset=reset, langs=lang_override, console=console)
+    if watch:
+        from contextforge.indexer.watch import watch_and_reindex
+        watch_and_reindex(repo_root, selected_langs, set(config.index_exclude), config.batch_size)
+@app.command()
+def context(
+    task: str,
+    max_tokens: int = typer.Option(8000, "--max-tokens"),
+    top: int = typer.Option(20, "--top"),
+    json_out: bool = typer.Option(False, "--json"),
+) -> None:
+    repo_root = Path.cwd()
+    db_path = _require_index(repo_root)
+    store = GraphStore(db_path)
+    result = build_context(task=task, store=store, repo_root=repo_root, top_n=top, max_tokens=max_tokens)
+    if json_out:
+        typer.echo(
+            json.dumps(
+                {
+                    "session_id": result.session_id,
+                    "task": result.task,
+                    "token_count": result.token_count,
+                    "baseline_tokens": result.baseline_tokens,
+                    "savings": result.baseline_tokens - result.token_count,
+                    "nodes": [
+                        {"id": s.node.id, "kind": s.node.kind, "name": s.node.name, "path": s.node.path, "score": s.score}
+                        for s in result.nodes
+                    ],
+                    "context_block": json.loads(result.context_block),
+                }
+            )
+        )
+        return
+    table = Table(title=f'Context for: "{task}"')
+    table.add_column("#")
+    table.add_column("Kind")
+    table.add_column("Name")
+    table.add_column("Path")
+    table.add_column("Score")
+    table.add_column("Tokens")
+    for i, sn in enumerate(result.nodes, start=1):
+        table.add_row(str(i), sn.node.kind, sn.node.name, sn.node.path, f"{sn.score:.2f}", str(len((sn.node.snippet or "").split())))
+    console.print(table)
+    console.print(f"Token estimate: {result.token_count}")
+    console.print(f"Baseline (est): {result.baseline_tokens}")
+    console.print(f"Savings: {result.baseline_tokens - result.token_count}")
+    console.print("\n--- CONTEXT BLOCK ---")
+    console.print(result.context_block)
+@app.command()
+def stats(since: str | None = typer.Option(None, "--since")) -> None:
+    repo_root = Path.cwd()
+    db_path = _require_index(repo_root)
+    store = GraphStore(db_path)
+    sessions = store.list_sessions()
+    if since:
+        now = time.time()
+        if since.endswith("d"):
+            days = int(since[:-1] or "0")
+            cutoff = now - days * 86400
+            sessions = [s for s in sessions if (s.created_at or 0) >= cutoff]
+    baseline = 12000
+    total_tokens = sum(s.token_count or 0 for s in sessions)
+    total_baseline = len(sessions) * baseline
+    savings = total_baseline - total_tokens
+    dollars = (savings / 1_000_000) * 15
+    table = Table(title="ContextForge Stats")
+    table.add_column("Metric")
+    table.add_column("Value")
+    table.add_row("Total sessions", str(len(sessions)))
+    table.add_row("Avg tokens returned", f"{(total_tokens / len(sessions)):.1f}" if sessions else "0")
+    table.add_row("Cumulative token savings", str(savings))
+    table.add_row("Estimated dollar savings", f"${dollars:.2f}")
+    console.print(table)
+    counts: dict[str, int] = {}
+    for sess in sessions:
+        for node_id in sess.nodes_returned or []:
+            counts[node_id] = counts.get(node_id, 0) + 1
+    top = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:10]
+    top_table = Table(title="Top Retrieved Nodes")
+    top_table.add_column("Node")
+    top_table.add_column("Path")
+    top_table.add_column("Count")
+    for node_id, count in top:
+        node = store.get_node(node_id)
+        if node is None:
+            continue
+        top_table.add_row(node.name, node.path, str(count))
+    console.print(top_table)
+@app.command()
+def serve(
+    transport: str = typer.Option(
+        "stdio",
+        "--transport",
+        help="stdio for Claude Code / Cursor MCP; sse or streamable-http for HTTP clients",
+    ),
+    host: str = typer.Option("127.0.0.1", "--host", help="Bind address for sse/streamable-http"),
+    port: int = typer.Option(8765, "--port", help="Port for sse/streamable-http (ignored for stdio)"),
+    auto_index: bool | None = typer.Option(
+        None,
+        "--auto-index/--no-auto-index",
+        help="Create or repair .cf/index.db on server start; default follows CONTEXTFORGE_AUTO_INDEX",
+    ),
+) -> None:
+    from contextforge.mcp.server import run_server
+    if transport not in ("stdio", "sse", "streamable-http"):
+        _err(f"Unknown transport {transport!r}; use stdio, sse, or streamable-http")
+    run_server(
+        transport=transport,  # type: ignore[arg-type]
+        host=host,
+        port=port,
+        auto_index=auto_index,
+    )
+if __name__ == "__main__":
+    app()

contextforge/graph/__init__.py ADDED Viewed

File without changes

contextforge/graph/schema.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""SQLite schema and migrations for ContextForge."""
+from __future__ import annotations
+import sqlite3
+SCHEMA_SQL = """
+CREATE TABLE IF NOT EXISTS nodes (
+    id          TEXT PRIMARY KEY,
+    kind        TEXT NOT NULL,
+    name        TEXT NOT NULL,
+    path        TEXT NOT NULL,
+    language    TEXT,
+    start_line  INTEGER,
+    end_line    INTEGER,
+    snippet     TEXT,
+    file_hash   TEXT,
+    embedding   BLOB,
+    created_at  REAL
+);
+CREATE TABLE IF NOT EXISTS edges (
+    src_id  TEXT NOT NULL,
+    dst_id  TEXT NOT NULL,
+    kind    TEXT NOT NULL,
+    weight  REAL DEFAULT 1.0,
+    PRIMARY KEY (src_id, dst_id, kind)
+);
+CREATE TABLE IF NOT EXISTS sessions (
+    id              TEXT PRIMARY KEY,
+    task_text       TEXT NOT NULL,
+    nodes_returned  TEXT,
+    nodes_used      TEXT,
+    outcome_signal  INTEGER,
+    token_count     INTEGER,
+    created_at      REAL
+);
+CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
+CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
+CREATE INDEX IF NOT EXISTS idx_edges_src  ON edges(src_id);
+CREATE INDEX IF NOT EXISTS idx_edges_dst  ON edges(dst_id);
+"""
+def migrate(conn: sqlite3.Connection) -> None:
+    """Create required tables and indexes."""
+    conn.executescript(SCHEMA_SQL)
+    conn.commit()

contextforge/graph/store.py ADDED Viewed

@@ -0,0 +1,224 @@
+"""Graph store abstraction over SQLite."""
+from __future__ import annotations
+import json
+import sqlite3
+import time
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+from contextforge.graph.schema import migrate
+def _pack_embedding(vector: np.ndarray | None) -> bytes | None:
+    if vector is None:
+        return None
+    return np.asarray(vector, dtype=np.float32).tobytes()
+def _unpack_embedding(blob: bytes | None) -> np.ndarray | None:
+    if blob is None:
+        return None
+    return np.frombuffer(blob, dtype=np.float32)
+@dataclass(slots=True)
+class Node:
+    id: str
+    kind: str
+    name: str
+    path: str
+    language: str | None = None
+    start_line: int | None = None
+    end_line: int | None = None
+    snippet: str | None = None
+    file_hash: str | None = None
+    embedding: np.ndarray | None = None
+    created_at: float | None = None
+@dataclass(slots=True)
+class Edge:
+    src_id: str
+    dst_id: str
+    kind: str
+    weight: float = 1.0
+@dataclass(slots=True)
+class Session:
+    id: str
+    task_text: str
+    nodes_returned: list[str] | None = None
+    nodes_used: list[str] | None = None
+    outcome_signal: int | None = None
+    token_count: int | None = None
+    created_at: float | None = None
+class GraphStore:
+    """Read/write helper for nodes, edges, and sessions."""
+    def __init__(self, db_path: Path) -> None:
+        self.db_path = db_path
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self.conn = sqlite3.connect(self.db_path)
+        self.conn.row_factory = sqlite3.Row
+        migrate(self.conn)
+    def close(self) -> None:
+        self.conn.close()
+    def upsert_nodes(self, nodes: list[Node]) -> None:
+        self.conn.executemany(
+            """
+            INSERT INTO nodes (id, kind, name, path, language, start_line, end_line, snippet, file_hash, embedding, created_at)
+            VALUES (:id, :kind, :name, :path, :language, :start_line, :end_line, :snippet, :file_hash, :embedding, :created_at)
+            ON CONFLICT(id) DO UPDATE SET
+                kind = excluded.kind,
+                name = excluded.name,
+                path = excluded.path,
+                language = excluded.language,
+                start_line = excluded.start_line,
+                end_line = excluded.end_line,
+                snippet = excluded.snippet,
+                file_hash = excluded.file_hash,
+                embedding = excluded.embedding
+            """,
+            [
+                {
+                    "id": n.id,
+                    "kind": n.kind,
+                    "name": n.name,
+                    "path": n.path,
+                    "language": n.language,
+                    "start_line": n.start_line,
+                    "end_line": n.end_line,
+                    "snippet": n.snippet,
+                    "file_hash": n.file_hash,
+                    "embedding": _pack_embedding(n.embedding),
+                    "created_at": n.created_at if n.created_at is not None else time.time(),
+                }
+                for n in nodes
+            ],
+        )
+        self.conn.commit()
+    def upsert_edges(self, edges: list[Edge]) -> None:
+        self.conn.executemany(
+            """
+            INSERT INTO edges (src_id, dst_id, kind, weight)
+            VALUES (?, ?, ?, ?)
+            ON CONFLICT(src_id, dst_id, kind) DO UPDATE SET
+                weight = excluded.weight
+            """,
+            [(e.src_id, e.dst_id, e.kind, e.weight) for e in edges],
+        )
+        self.conn.commit()
+    def get_node(self, node_id: str) -> Node | None:
+        row = self.conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone()
+        if row is None:
+            return None
+        return Node(
+            id=row["id"],
+            kind=row["kind"],
+            name=row["name"],
+            path=row["path"],
+            language=row["language"],
+            start_line=row["start_line"],
+            end_line=row["end_line"],
+            snippet=row["snippet"],
+            file_hash=row["file_hash"],
+            embedding=_unpack_embedding(row["embedding"]),
+            created_at=row["created_at"],
+        )
+    def list_nodes(self) -> list[Node]:
+        rows = self.conn.execute("SELECT * FROM nodes").fetchall()
+        return [
+            Node(
+                id=row["id"],
+                kind=row["kind"],
+                name=row["name"],
+                path=row["path"],
+                language=row["language"],
+                start_line=row["start_line"],
+                end_line=row["end_line"],
+                snippet=row["snippet"],
+                file_hash=row["file_hash"],
+                embedding=_unpack_embedding(row["embedding"]),
+                created_at=row["created_at"],
+            )
+            for row in rows
+        ]
+    def get_edges_for_node(self, node_id: str) -> list[Edge]:
+        rows = self.conn.execute(
+            "SELECT * FROM edges WHERE src_id = ? OR dst_id = ?",
+            (node_id, node_id),
+        ).fetchall()
+        return [Edge(src_id=r["src_id"], dst_id=r["dst_id"], kind=r["kind"], weight=r["weight"]) for r in rows]
+    def file_hash(self, path: str) -> str | None:
+        row = self.conn.execute(
+            "SELECT file_hash FROM nodes WHERE kind='file' AND path = ? LIMIT 1",
+            (path,),
+        ).fetchone()
+        return None if row is None else row["file_hash"]
+    def create_session(self, task_text: str, nodes_returned: list[str], token_count: int) -> Session:
+        session = Session(
+            id=str(uuid.uuid4()),
+            task_text=task_text,
+            nodes_returned=nodes_returned,
+            token_count=token_count,
+            created_at=time.time(),
+        )
+        self.conn.execute(
+            """
+            INSERT INTO sessions (id, task_text, nodes_returned, nodes_used, outcome_signal, token_count, created_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                session.id,
+                session.task_text,
+                json.dumps(nodes_returned),
+                None,
+                None,
+                token_count,
+                session.created_at,
+            ),
+        )
+        self.conn.commit()
+        return session
+    def update_session_outcome(self, session_id: str, used_node_ids: list[str], success: bool) -> None:
+        self.conn.execute(
+            """
+            UPDATE sessions
+            SET nodes_used = ?, outcome_signal = ?
+            WHERE id = ?
+            """,
+            (json.dumps(used_node_ids), 1 if success else 0, session_id),
+        )
+        self.conn.commit()
+    def list_sessions(self) -> list[Session]:
+        rows = self.conn.execute("SELECT * FROM sessions ORDER BY created_at DESC").fetchall()
+        return [
+            Session(
+                id=r["id"],
+                task_text=r["task_text"],
+                nodes_returned=json.loads(r["nodes_returned"]) if r["nodes_returned"] else None,
+                nodes_used=json.loads(r["nodes_used"]) if r["nodes_used"] else None,
+                outcome_signal=r["outcome_signal"],
+                token_count=r["token_count"],
+                created_at=r["created_at"],
+            )
+            for r in rows
+        ]

contextforge/indexer/__init__.py ADDED Viewed

File without changes

contextforge/indexer/embedder.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Node embedding utilities."""
+from __future__ import annotations
+import hashlib
+from pathlib import Path
+import numpy as np
+from rich.console import Console
+from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn
+from contextforge.graph.store import Node
+MODEL = "BAAI/bge-small-en-v1.5"
+console = Console(stderr=True)
+def _fallback_embedding(text: str, size: int = 384) -> np.ndarray:
+    digest = hashlib.sha256(text.encode("utf-8")).digest()
+    seed = int.from_bytes(digest[:8], "little")
+    rng = np.random.default_rng(seed)
+    vec = rng.random(size, dtype=np.float32)
+    norm = np.linalg.norm(vec)
+    if norm == 0:
+        return vec
+    return vec / norm
+def embed_nodes(nodes: list[Node], batch_size: int = 64, cache_dir: Path | None = None) -> list[Node]:
+    if not nodes:
+        return nodes
+    model = None
+    try:
+        from sentence_transformers import SentenceTransformer
+        cache = cache_dir or (Path.home() / ".cache" / "contextforge")
+        cache.mkdir(parents=True, exist_ok=True)
+        model = SentenceTransformer(MODEL, cache_folder=str(cache))
+    except Exception:
+        console.print("[yellow]Warning:[/yellow] embedding model unavailable, using fallback vectors")
+    texts = [f"{n.kind}: {n.name}. {n.snippet or ''}" for n in nodes]
+    with Progress(
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        TimeRemainingColumn(),
+        console=console,
+        transient=True,
+    ) as progress:
+        task = progress.add_task("Embedding nodes", total=len(nodes))
+        for i in range(0, len(nodes), batch_size):
+            batch = nodes[i : i + batch_size]
+            batch_texts = texts[i : i + batch_size]
+            if model is not None:
+                vectors = model.encode(batch_texts, convert_to_numpy=True, normalize_embeddings=True)
+            else:
+                vectors = np.stack([_fallback_embedding(t) for t in batch_texts], axis=0)
+            for node, vector in zip(batch, vectors, strict=True):
+                node.embedding = np.asarray(vector, dtype=np.float32)
+            progress.advance(task, len(batch))
+    return nodes

contextforge/indexer/parser.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""Parse source files into graph nodes and edges."""
+from __future__ import annotations
+import ast
+import hashlib
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from contextforge.graph.store import Edge, Node
+from contextforge.indexer.tree_sitter_extract import try_extract_symbols
+def _node_id(kind: str, path: str, name: str) -> str:
+    return hashlib.sha1(f"{kind}:{path}:{name}".encode("utf-8")).hexdigest()
+def _file_hash(content: str) -> str:
+    return hashlib.sha1(content.encode("utf-8")).hexdigest()
+@dataclass(slots=True)
+class ParseResult:
+    file_node: Node
+    symbol_nodes: list[Node]
+    edges: list[Edge]
+def _parse_python_with_ast(content: str, rel_path: str) -> tuple[list[Node], list[Edge]]:
+    tree = ast.parse(content)
+    symbols: list[Node] = []
+    edges: list[Edge] = []
+    file_id = _node_id("file", rel_path, rel_path)
+    known_symbol_names: set[str] = set()
+    for item in ast.walk(tree):
+        if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
+            name = item.name
+            symbol_id = _node_id("symbol", rel_path, name)
+            known_symbol_names.add(name)
+            snippet = ast.get_source_segment(content, item) or ""
+            symbols.append(
+                Node(
+                    id=symbol_id,
+                    kind="symbol",
+                    name=name,
+                    path=rel_path,
+                    language="python",
+                    start_line=getattr(item, "lineno", None),
+                    end_line=getattr(item, "end_lineno", None),
+                    snippet=snippet[:300],
+                    file_hash=_file_hash(content),
+                )
+            )
+            edges.append(Edge(src_id=file_id, dst_id=symbol_id, kind="contains"))
+        if isinstance(item, ast.Import):
+            for alias in item.names:
+                mod_id = _node_id("module", rel_path, alias.name)
+                edges.append(Edge(src_id=file_id, dst_id=mod_id, kind="imports"))
+        if isinstance(item, ast.ImportFrom):
+            module = item.module or ""
+            if module:
+                mod_id = _node_id("module", rel_path, module)
+                edges.append(Edge(src_id=file_id, dst_id=mod_id, kind="imports"))
+    for item in ast.walk(tree):
+        if isinstance(item, ast.Call):
+            name: str | None = None
+            if isinstance(item.func, ast.Name):
+                name = item.func.id
+            elif isinstance(item.func, ast.Attribute):
+                name = item.func.attr
+            if name and name in known_symbol_names:
+                src = _node_id("symbol", rel_path, name)
+                dst = _node_id("symbol", rel_path, name)
+                edges.append(Edge(src_id=src, dst_id=dst, kind="calls"))
+    return symbols, edges
+def _parse_ts_js_regex(content: str, rel_path: str, language: str, file_id: str, file_hash: str) -> tuple[list[Node], list[Edge]]:
+    symbol_nodes: list[Node] = []
+    edges: list[Edge] = []
+    patterns = [
+        re.compile(r"function\s+([A-Za-z_][A-Za-z0-9_]*)"),
+        re.compile(r"class\s+([A-Za-z_][A-Za-z0-9_]*)"),
+        re.compile(r"const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*\("),
+    ]
+    for pat in patterns:
+        for m in pat.finditer(content):
+            name = m.group(1)
+            sid = _node_id("symbol", rel_path, name)
+            symbol_nodes.append(
+                Node(
+                    id=sid,
+                    kind="symbol",
+                    name=name,
+                    path=rel_path,
+                    language=language,
+                    snippet=name,
+                    file_hash=file_hash,
+                )
+            )
+            edges.append(Edge(src_id=file_id, dst_id=sid, kind="contains"))
+    return symbol_nodes, edges
+def _parse_go_regex(content: str, rel_path: str, language: str, file_id: str, file_hash: str) -> tuple[list[Node], list[Edge]]:
+    symbol_nodes: list[Node] = []
+    edges: list[Edge] = []
+    for m in re.finditer(r"func\s+([A-Za-z_][A-Za-z0-9_]*)", content):
+        name = m.group(1)
+        sid = _node_id("symbol", rel_path, name)
+        symbol_nodes.append(
+            Node(id=sid, kind="symbol", name=name, path=rel_path, language=language, snippet=name, file_hash=file_hash)
+        )
+        edges.append(Edge(src_id=file_id, dst_id=sid, kind="contains"))
+    return symbol_nodes, edges
+def _parse_rust_regex(content: str, rel_path: str, language: str, file_id: str, file_hash: str) -> tuple[list[Node], list[Edge]]:
+    symbol_nodes: list[Node] = []
+    edges: list[Edge] = []
+    for m in re.finditer(r"(fn|struct|enum)\s+([A-Za-z_][A-Za-z0-9_]*)", content):
+        name = m.group(2)
+        sid = _node_id("symbol", rel_path, name)
+        symbol_nodes.append(
+            Node(id=sid, kind="symbol", name=name, path=rel_path, language=language, snippet=name, file_hash=file_hash)
+        )
+        edges.append(Edge(src_id=file_id, dst_id=sid, kind="contains"))
+    return symbol_nodes, edges
+def parse_file(path: Path, rel_path: str, language: str) -> ParseResult:
+    content = path.read_text(encoding="utf-8", errors="ignore")
+    file_id = _node_id("file", rel_path, rel_path)
+    fh = _file_hash(content)
+    file_node = Node(
+        id=file_id,
+        kind="file",
+        name=path.name,
+        path=rel_path,
+        language=language,
+        start_line=1,
+        end_line=max(content.count("\n") + 1, 1),
+        snippet=content[:300],
+        file_hash=fh,
+    )
+    symbol_nodes: list[Node] = []
+    edges: list[Edge] = []
+    if language == "python":
+        symbol_nodes, edges = _parse_python_with_ast(content, rel_path)
+    elif language in {"typescript", "javascript", "go", "rust"}:
+        extracted = try_extract_symbols(path, rel_path, content, language, file_id, fh)
+        if extracted is not None:
+            symbol_nodes, edges = extracted
+        elif language in {"typescript", "javascript"}:
+            symbol_nodes, edges = _parse_ts_js_regex(content, rel_path, language, file_id, fh)
+        elif language == "go":
+            symbol_nodes, edges = _parse_go_regex(content, rel_path, language, file_id, fh)
+        elif language == "rust":
+            symbol_nodes, edges = _parse_rust_regex(content, rel_path, language, file_id, fh)
+    return ParseResult(file_node=file_node, symbol_nodes=symbol_nodes, edges=edges)