PyPI - ctxgraph-code - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ctxgraph-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ctxgraph_code/__init__.py +0 -0
ctxgraph_code/__main__.py +3 -0
ctxgraph_code/analyzers/__init__.py +0 -0
ctxgraph_code/analyzers/python/__init__.py +0 -0
ctxgraph_code/analyzers/python/importer.py +140 -0
ctxgraph_code/analyzers/python/semantic.py +75 -0
ctxgraph_code/analyzers/python/symbols.py +221 -0
ctxgraph_code/cli.py +337 -0
ctxgraph_code/config/__init__.py +0 -0
ctxgraph_code/config/init.py +14 -0
ctxgraph_code/config/settings.py +121 -0
ctxgraph_code/exclude/__init__.py +0 -0
ctxgraph_code/exclude/patterns.py +75 -0
ctxgraph_code/graph/__init__.py +0 -0
ctxgraph_code/graph/builder.py +76 -0
ctxgraph_code/graph/models.py +83 -0
ctxgraph_code/graph/query.py +115 -0
ctxgraph_code/graph/storage.py +224 -0
ctxgraph_code/render.py +244 -0
ctxgraph_code-0.1.0.dist-info/METADATA +279 -0
ctxgraph_code-0.1.0.dist-info/RECORD +24 -0
ctxgraph_code-0.1.0.dist-info/WHEEL +5 -0
ctxgraph_code-0.1.0.dist-info/entry_points.txt +2 -0
ctxgraph_code-0.1.0.dist-info/top_level.txt +1 -0

ctxgraph_code/graph/models.py ADDED Viewed

@@ -0,0 +1,83 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Optional
+@dataclass
+class Node:
+    id: str
+    type: str
+    name: str
+    path: Optional[str] = None
+    parent_id: Optional[str] = None
+    summary: Optional[str] = None
+    importance: float = 0.5
+    size_bytes: int = 0
+    lineno: int = 0
+    def __hash__(self):
+        return hash(self.id)
+    def __eq__(self, other):
+        return isinstance(other, Node) and self.id == other.id
+@dataclass
+class Edge:
+    source_id: str
+    target_id: str
+    relation: str
+    weight: float = 1.0
+    def __hash__(self):
+        return hash((self.source_id, self.target_id, self.relation))
+    def __eq__(self, other):
+        return (
+            isinstance(other, Edge)
+            and self.source_id == other.source_id
+            and self.target_id == other.target_id
+            and self.relation == other.relation
+        )
+@dataclass
+class Graph:
+    nodes: dict[str, Node] = field(default_factory=dict)
+    edges: list[Edge] = field(default_factory=list)
+    def add_node(self, node: Node):
+        self.nodes[node.id] = node
+    def add_edge(self, edge: Edge):
+        self.edges.append(edge)
+    def get_node(self, node_id: str) -> Optional[Node]:
+        return self.nodes.get(node_id)
+    def get_edges_from(self, source_id: str) -> list[Edge]:
+        return [e for e in self.edges if e.source_id == source_id]
+    def get_edges_to(self, target_id: str) -> list[Edge]:
+        return [e for e in self.edges if e.target_id == target_id]
+    def get_neighbors(self, node_id: str) -> list[str]:
+        result = set()
+        for e in self.edges:
+            if e.source_id == node_id:
+                result.add(e.target_id)
+            if e.target_id == node_id:
+                result.add(e.source_id)
+        return list(result)
+    def merge(self, other: Graph):
+        for node_id, node in other.nodes.items():
+            if node_id not in self.nodes:
+                self.nodes[node_id] = node
+        existing = {(e.source_id, e.target_id, e.relation) for e in self.edges}
+        for e in other.edges:
+            key = (e.source_id, e.target_id, e.relation)
+            if key not in existing:
+                self.edges.append(e)
+                existing.add(key)

ctxgraph_code/graph/query.py ADDED Viewed

@@ -0,0 +1,115 @@
+from __future__ import annotations
+import re
+from typing import Optional
+from ctxgraph_code.graph.models import Node
+from ctxgraph_code.graph.storage import Storage
+def search_relevant_nodes(
+    storage: Storage,
+    query: str,
+    max_nodes: int = 15,
+    max_depth: int = 2,
+) -> list[tuple[Node, float]]:
+    tokens = _tokenize(query)
+    if not tokens:
+        return []
+    scored: dict[str, float] = {}
+    seen_ids: set[str] = set()
+    matched_nodes = storage.search_nodes(query)
+    for node in matched_nodes:
+        score = _compute_relevance(node, tokens)
+        if score > 0:
+            scored[node.id] = score
+            seen_ids.add(node.id)
+    if not scored:
+        for token in tokens:
+            token_nodes = storage.search_nodes(token)
+            for node in token_nodes:
+                if node.id not in seen_ids:
+                    seen_ids.add(node.id)
+                    score = _compute_relevance(node, tokens)
+                    if score > 0:
+                        scored[node.id] = score
+    if not scored:
+        return []
+    seed_ids = set(scored.keys())
+    edge_ids = set()
+    for _ in range(max_depth):
+        edges = storage.get_edges_for_nodes(seed_ids | edge_ids)
+        new_ids = set()
+        for e in edges:
+            if e.source_id in (seed_ids | edge_ids):
+                new_ids.add(e.target_id)
+            if e.target_id in (seed_ids | edge_ids):
+                new_ids.add(e.source_id)
+        edge_ids |= new_ids
+    all_ids = seed_ids | edge_ids
+    for nid in edge_ids:
+        if nid not in scored:
+            node = storage.get_node(nid)
+            if node:
+                neighbors = _count_matched_neighbors(nid, storage, seed_ids)
+                scored[nid] = 0.1 * neighbors
+    ranked = sorted(scored.items(), key=lambda x: x[1], reverse=True)
+    ranked = ranked[:max_nodes]
+    result = []
+    for nid, score in ranked:
+        node = storage.get_node(nid)
+        if node:
+            result.append((node, round(score, 3)))
+    return result
+def _tokenize(text: str) -> list[str]:
+    text = text.lower()
+    tokens = re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", text)
+    stopwords = {
+        "the", "a", "an", "in", "on", "at", "to", "for", "of", "is",
+        "fix", "bug", "implement", "add", "change", "update", "remove",
+        "need", "want", "please", "can", "how", "what", "where", "why",
+        "this", "that", "with", "from", "by", "be", "has", "have", "do",
+        "does", "did", "will", "would", "could", "should", "may", "might",
+        "file", "function", "class", "code", "issue", "problem", "error",
+        "work", "make", "get", "set",
+    }
+    return [t for t in tokens if t not in stopwords and len(t) > 1]
+def _compute_relevance(node: Node, tokens: list[str]) -> float:
+    score = 0.0
+    text = f"{node.name} {node.summary or ''} {node.path or ''}".lower()
+    for token in tokens:
+        if token in node.name.lower():
+            score += 2.0
+        count = text.count(token)
+        score += count * 0.5
+    if node.importance:
+        score *= (0.5 + node.importance)
+    return score
+def _count_matched_neighbors(
+    node_id: str, storage: Storage, matched_ids: set[str]
+) -> int:
+    edges = storage.get_edges_for_nodes({node_id})
+    count = 0
+    for e in edges:
+        if e.source_id in matched_ids or e.target_id in matched_ids:
+            count += 1
+    return count

ctxgraph_code/graph/storage.py ADDED Viewed

@@ -0,0 +1,224 @@
+from __future__ import annotations
+import sqlite3
+from pathlib import Path
+from typing import Optional
+from ctxgraph_code.graph.models import Edge, Graph, Node
+def _table_schema() -> str:
+    return """
+    CREATE TABLE IF NOT EXISTS nodes (
+        id TEXT PRIMARY KEY,
+        type TEXT NOT NULL,
+        name TEXT NOT NULL,
+        path TEXT,
+        parent_id TEXT,
+        summary TEXT,
+        importance REAL DEFAULT 0.5,
+        size_bytes INTEGER DEFAULT 0,
+        lineno INTEGER DEFAULT 0
+    );
+    CREATE TABLE IF NOT EXISTS edges (
+        source_id TEXT NOT NULL,
+        target_id TEXT NOT NULL,
+        relation TEXT NOT NULL,
+        weight REAL DEFAULT 1.0,
+        PRIMARY KEY (source_id, target_id, relation)
+    );
+    CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
+    CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id);
+    CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
+    CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(type);
+    CREATE TABLE IF NOT EXISTS metadata (
+        key TEXT PRIMARY KEY,
+        value TEXT
+    );
+    """
+class Storage:
+    def __init__(self, db_path: str | Path):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self._conn: Optional[sqlite3.Connection] = None
+    def connect(self):
+        self._conn = sqlite3.connect(str(self.db_path))
+        self._conn.execute("PRAGMA journal_mode=WAL")
+        self._conn.execute("PRAGMA synchronous=NORMAL")
+        self._conn.row_factory = sqlite3.Row
+        self._init_schema()
+    def _init_schema(self):
+        self._conn.executescript(_table_schema())
+        self._conn.commit()
+    def close(self):
+        if self._conn:
+            self._conn.close()
+            self._conn = None
+    @property
+    def conn(self) -> sqlite3.Connection:
+        if self._conn is None:
+            raise RuntimeError("Storage not connected. Call connect() first.")
+        return self._conn
+    def update_node_summary(self, node_id: str, summary: str):
+        self.conn.execute(
+            "UPDATE nodes SET summary = ? WHERE id = ?", (summary, node_id)
+        )
+        self.conn.commit()
+    def save_node(self, node: Node):
+        self.conn.execute(
+            """INSERT OR REPLACE INTO nodes
+               (id, type, name, path, parent_id, summary, importance, size_bytes, lineno)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+            (
+                node.id,
+                node.type,
+                node.name,
+                node.path,
+                node.parent_id,
+                node.summary,
+                node.importance,
+                node.size_bytes,
+                node.lineno,
+            ),
+        )
+    def save_edge(self, edge: Edge):
+        self.conn.execute(
+            """INSERT OR REPLACE INTO edges
+               (source_id, target_id, relation, weight)
+               VALUES (?, ?, ?, ?)""",
+            (edge.source_id, edge.target_id, edge.relation, edge.weight),
+        )
+    def save_graph(self, graph: Graph):
+        for node in graph.nodes.values():
+            self.save_node(node)
+        for edge in graph.edges:
+            self.save_edge(edge)
+        self.conn.commit()
+    def get_node(self, node_id: str) -> Optional[Node]:
+        row = self.conn.execute(
+            "SELECT * FROM nodes WHERE id = ?", (node_id,)
+        ).fetchone()
+        if row is None:
+            return None
+        return Node(
+            id=row["id"],
+            type=row["type"],
+            name=row["name"],
+            path=row["path"],
+            parent_id=row["parent_id"],
+            summary=row["summary"],
+            importance=row["importance"],
+            size_bytes=row["size_bytes"],
+            lineno=row["lineno"],
+        )
+    def search_nodes(self, text: str) -> list[Node]:
+        query = f"%{text}%"
+        rows = self.conn.execute(
+            """SELECT * FROM nodes WHERE
+               name LIKE ? OR summary LIKE ? OR path LIKE ?
+               ORDER BY importance DESC
+               LIMIT 50""",
+            (query, query, query),
+        ).fetchall()
+        return [
+            Node(
+                id=r["id"],
+                type=r["type"],
+                name=r["name"],
+                path=r["path"],
+                parent_id=r["parent_id"],
+                summary=r["summary"],
+                importance=r["importance"],
+                size_bytes=r["size_bytes"],
+                lineno=r["lineno"],
+            )
+            for r in rows
+        ]
+    def get_edges_for_nodes(self, node_ids: set[str]) -> list[Edge]:
+        if not node_ids:
+            return []
+        placeholders = ",".join("?" for _ in node_ids)
+        rows = self.conn.execute(
+            f"""SELECT * FROM edges WHERE
+               source_id IN ({placeholders}) OR target_id IN ({placeholders})""",
+            list(node_ids) + list(node_ids),
+        ).fetchall()
+        return [
+            Edge(
+                source_id=r["source_id"],
+                target_id=r["target_id"],
+                relation=r["relation"],
+                weight=r["weight"],
+            )
+            for r in rows
+        ]
+    def get_all_nodes(self) -> list[Node]:
+        rows = self.conn.execute("SELECT * FROM nodes").fetchall()
+        return [
+            Node(
+                id=r["id"],
+                type=r["type"],
+                name=r["name"],
+                path=r["path"],
+                parent_id=r["parent_id"],
+                summary=r["summary"],
+                importance=r["importance"],
+                size_bytes=r["size_bytes"],
+                lineno=r["lineno"],
+            )
+            for r in rows
+        ]
+    def get_all_edges(self) -> list[Edge]:
+        rows = self.conn.execute("SELECT * FROM edges").fetchall()
+        return [
+            Edge(
+                source_id=r["source_id"],
+                target_id=r["target_id"],
+                relation=r["relation"],
+                weight=r["weight"],
+            )
+            for r in rows
+        ]
+    def stats(self) -> dict:
+        node_count = self.conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]
+        edge_count = self.conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0]
+        type_counts = self.conn.execute(
+            "SELECT type, COUNT(*) as cnt FROM nodes GROUP BY type"
+        ).fetchall()
+        return {
+            "nodes": node_count,
+            "edges": edge_count,
+            "types": {r["type"]: r["cnt"] for r in type_counts},
+        }
+    def save_metadata(self, key: str, value: str):
+        self.conn.execute(
+            "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)",
+            (key, value),
+        )
+        self.conn.commit()
+    def get_metadata(self, key: str) -> Optional[str]:
+        row = self.conn.execute(
+            "SELECT value FROM metadata WHERE key = ?", (key,)
+        ).fetchone()
+        return row["value"] if row else None

ctxgraph_code/render.py ADDED Viewed

@@ -0,0 +1,244 @@
+from __future__ import annotations
+from collections import defaultdict
+from typing import Optional
+from ctxgraph_code.graph.models import Node
+from ctxgraph_code.graph.storage import Storage
+def render_overview(storage: Storage, max_files: int = 30) -> str:
+    all_nodes = storage.get_all_nodes()
+    file_nodes = [n for n in all_nodes if n.type == "file"][:max_files]
+    lines = ["Project Overview", ""]
+    for node in file_nodes:
+        summary = node.summary or ""
+        lines.append(f"  [F] {node.path or node.name}")
+        if summary:
+            lines.append(f"      {summary}")
+        children = [
+            n for n in all_nodes
+            if n.parent_id == node.id and n.type in ("class", "function")
+        ]
+        if children:
+            names = [c.name for c in children[:8]]
+            lines.append(f"      Symbols: {', '.join(names)}")
+    lines.append("")
+    return "\n".join(lines)
+def render_deps(storage: Storage, file_path: str) -> str:
+    all_nodes = storage.get_all_nodes()
+    all_edges = storage.get_all_edges()
+    node_id = f"file:{file_path}"
+    node = storage.get_node(node_id)
+    if not node:
+        return f"File not found in graph: {file_path}"
+    imports = []
+    imported_by = []
+    for e in all_edges:
+        if e.source_id == node_id and e.relation == "imports":
+            target = storage.get_node(e.target_id)
+            if target:
+                imports.append(target.path or target.name)
+        if e.target_id == node_id and e.relation == "imports":
+            source = storage.get_node(e.source_id)
+            if source:
+                imported_by.append(source.path or source.name)
+    lines = [f"Dependencies for: {file_path}", ""]
+    symbols = [n for n in all_nodes if n.parent_id == node_id]
+    if symbols:
+        class_names = [n.name for n in symbols if n.type == "class"]
+        func_names = [n.name for n in symbols if n.type == "function"]
+        if class_names:
+            lines.append(f"  Classes: {', '.join(class_names)}")
+        if func_names:
+            lines.append(f"  Functions: {', '.join(func_names)}")
+        lines.append("")
+    if imports:
+        lines.append("  Imports:")
+        for imp in sorted(imports):
+            lines.append(f"    -> {imp}")
+    else:
+        lines.append("  Imports: (none)")
+    if imported_by:
+        lines.append("")
+        lines.append("  Imported by:")
+        for imp in sorted(imported_by):
+            lines.append(f"    <- {imp}")
+    calls_made = []
+    called_by = []
+    for e in all_edges:
+        if e.source_id == node_id and e.relation == "calls":
+            target = storage.get_node(e.target_id)
+            if target:
+                calls_made.append(f"{target.name} ({target.path})")
+        if e.target_id == node_id and e.relation == "calls":
+            source = storage.get_node(e.source_id)
+            if source:
+                called_by.append(f"{source.name} ({source.path})")
+    if calls_made:
+        lines.append("")
+        lines.append("  Calls:")
+        for c in sorted(calls_made):
+            lines.append(f"    -> {c}")
+    if called_by:
+        lines.append("")
+        lines.append("  Called by:")
+        for c in sorted(called_by):
+            lines.append(f"    <- {c}")
+    return "\n".join(lines)
+def render_usedby(storage: Storage, file_path: str) -> str:
+    node_id = f"file:{file_path}"
+    node = storage.get_node(node_id)
+    if not node:
+        return f"File not found in graph: {file_path}"
+    all_edges = storage.get_all_edges()
+    imported_by = []
+    called_by = []
+    for e in all_edges:
+        if e.target_id == node_id and e.relation == "imports":
+            source = storage.get_node(e.source_id)
+            if source:
+                imported_by.append(source.path or source.name)
+        if e.target_id == node_id and e.relation == "calls":
+            source = storage.get_node(e.source_id)
+            if source:
+                called_by.append(f"{source.name} ({source.path})")
+    lines = [f"References to: {file_path}", ""]
+    if imported_by:
+        lines.append(f"  Imported by ({len(imported_by)}):")
+        for ref in sorted(imported_by):
+            lines.append(f"    {ref}")
+    else:
+        lines.append("  Imported by: (none)")
+    if called_by:
+        lines.append("")
+        lines.append(f"  Called by ({len(called_by)}):")
+        for ref in sorted(called_by):
+            lines.append(f"    {ref}")
+    return "\n".join(lines)
+def render_symbols(storage: Storage, file_path: str) -> str:
+    node_id = f"file:{file_path}"
+    node = storage.get_node(node_id)
+    if not node:
+        return f"File not found in graph: {file_path}"
+    all_nodes = storage.get_all_nodes()
+    symbols = [n for n in all_nodes if n.parent_id == node_id]
+    if not symbols:
+        return f"No symbols found in: {file_path}"
+    lines = [f"Symbols in: {file_path}", ""]
+    for s in symbols:
+        tag = "[C]" if s.type == "class" else "[M]"
+        summary = f" - {s.summary}" if s.summary else ""
+        lines.append(f"  {tag} {s.name} (line {s.lineno}){summary}")
+        if s.type == "class":
+            methods = [n for n in all_nodes if n.parent_id == s.id]
+            if methods:
+                for m in methods:
+                    ms = f" - {m.summary}" if m.summary else ""
+                    lines.append(f"      [M] {m.name} (line {m.lineno}){ms}")
+    return "\n".join(lines)
+def render_context(storage: Storage, query: str, max_nodes: int = 15) -> str:
+    from ctxgraph_code.graph.query import search_relevant_nodes
+    ranked = search_relevant_nodes(storage, query, max_nodes)
+    if not ranked:
+        return f"No context found for: {query}"
+    all_nodes = storage.get_all_nodes()
+    all_edges = storage.get_all_edges()
+    node_ids = {n.id for n, _ in ranked}
+    relevant_edges = [
+        e for e in all_edges
+        if e.source_id in node_ids and e.target_id in node_ids
+    ]
+    lines = [f"Context: {query}", ""]
+    file_nodes = [n for n, _ in ranked if n.type == "file"]
+    symbol_nodes = [n for n, _ in ranked if n.type != "file"]
+    for node in file_nodes:
+        lines.append(f"  [F] {node.path or node.name}")
+        if node.summary:
+            lines.append(f"      {node.summary}")
+        children = [
+            n for n in all_nodes
+            if n.parent_id == node.id and n.type in ("class", "function")
+        ]
+        if children:
+            child_names = [c.name for c in children[:10]]
+            lines.append(f"      Symbols: {', '.join(child_names)}")
+    if symbol_nodes:
+        lines.append("")
+        for node in symbol_nodes:
+            tag = "[C]" if node.type == "class" else "[M]"
+            name = node.name
+            if node.parent_id and "::" not in node.parent_id:
+                parent_short = node.parent_id.split(":")[-1] if ":" in node.parent_id else node.parent_id
+                name = f"{parent_short}.{node.name}"
+            lines.append(f"  {tag} {name}")
+            if node.summary:
+                lines.append(f"      {node.summary}")
+    import_edges = [(s, t) for s, t, r in [(e.source_id, e.target_id, e.relation) for e in relevant_edges] if r == "imports"]
+    call_edges = [(s, t) for s, t, r in [(e.source_id, e.target_id, e.relation) for e in relevant_edges] if r == "calls"]
+    if import_edges or call_edges:
+        lines.append("")
+        if import_edges:
+            lines.append("  Dependencies:")
+            for src, tgt in import_edges[:10]:
+                src_name = _short_name(src, {n.id: n for n in all_nodes})
+                tgt_name = _short_name(tgt, {n.id: n for n in all_nodes})
+                if src_name and tgt_name:
+                    lines.append(f"    {src_name} -> {tgt_name}")
+        if call_edges:
+            lines.append("  Calls:")
+            for src, tgt in call_edges[:10]:
+                src_name = _short_name(src, {n.id: n for n in all_nodes})
+                tgt_name = _short_name(tgt, {n.id: n for n in all_nodes})
+                if src_name and tgt_name:
+                    lines.append(f"    {src_name} -> {tgt_name}")
+    return "\n".join(lines)
+def _short_name(node_id: str, node_map: dict[str, Node]) -> Optional[str]:
+    if node_id in node_map:
+        n = node_map[node_id]
+        if n.type == "file":
+            return n.path or n.name
+        return f"{n.path}:{n.name}" if n.path else n.name
+    return node_id.split(":")[-1] if ":" in node_id else node_id