PyPI - codegraph-ai - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codegraph-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

codegraph/__init__.py +1 -0
codegraph/__main__.py +5 -0
codegraph/adapters/__init__.py +1 -0
codegraph/adapters/base.py +38 -0
codegraph/adapters/c_adapter.py +520 -0
codegraph/adapters/js_adapter.py +556 -0
codegraph/adapters/python_adapter.py +337 -0
codegraph/analyzer.py +432 -0
codegraph/cli.py +463 -0
codegraph/core.py +3606 -0
codegraph/mcp_server.py +588 -0
codegraph/models.py +284 -0
codegraph/qa.py +471 -0
codegraph_ai-0.1.0.dist-info/METADATA +14 -0
codegraph_ai-0.1.0.dist-info/RECORD +18 -0
codegraph_ai-0.1.0.dist-info/WHEEL +5 -0
codegraph_ai-0.1.0.dist-info/entry_points.txt +2 -0
codegraph_ai-0.1.0.dist-info/top_level.txt +1 -0

codegraph/qa.py ADDED Viewed

@@ -0,0 +1,471 @@
+"""Unified Code Intelligence Q&A — retrieval layer.
+This module classifies natural-language questions and routes them to the
+appropriate CodeScope retrieval strategies across three knowledge graph layers
+(Structure, Evolution, Intent).  It returns structured context — NOT
+LLM-generated answers.  The LLM reasoning is the AI assistant's job (e.g.
+Cursor, OpenClaw).
+Usage (programmatic)::
+    from codegraph.core import CodeScope
+    from codegraph.qa import codegraph_query
+    cs = CodeScope("my_db")
+    result = codegraph_query(cs, "Who calls jv_free?")
+    print(result)
+"""
+from __future__ import annotations
+import re
+import time
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from codegraph.core import CodeScope
+class QuestionCategory(str, Enum):
+    STRUCTURAL = "structural"
+    SEMANTIC = "semantic"
+    HISTORICAL = "historical"
+    INTENTIONAL = "intentional"
+    COMBINED = "combined"
+@dataclass
+class ContextItem:
+    """A single piece of evidence retrieved from the knowledge graph."""
+    type: str
+    content: str
+    source: str
+    score: float = 0.0
+@dataclass
+class RetrievalResult:
+    """Structured retrieval output for a question."""
+    question: str
+    category: QuestionCategory
+    context_items: list[ContextItem] = field(default_factory=list)
+    retrieval_time_ms: float = 0.0
+    entities: dict = field(default_factory=dict)
+    def to_context_string(self) -> str:
+        """Format as a text block suitable for LLM context injection."""
+        lines = [
+            f"Question type: {self.category.value}",
+            f"Retrieved {len(self.context_items)} evidence items "
+            f"in {self.retrieval_time_ms:.0f}ms:",
+            "",
+        ]
+        for i, item in enumerate(self.context_items, 1):
+            lines.append(f"[{i}] ({item.type}) {item.content}")
+            lines.append(f"    Source: {item.source}")
+            if item.score > 0:
+                lines.append(f"    Relevance: {item.score:.3f}")
+            lines.append("")
+        return "\n".join(lines)
+    def to_dict(self) -> dict:
+        return {
+            "question": self.question,
+            "category": self.category.value,
+            "retrieval_time_ms": self.retrieval_time_ms,
+            "entities": self.entities,
+            "context_items": [
+                {
+                    "type": c.type,
+                    "content": c.content,
+                    "source": c.source,
+                    "score": c.score,
+                }
+                for c in self.context_items
+            ],
+        }
+# ---------------------------------------------------------------------------
+# Question classification
+# ---------------------------------------------------------------------------
+_STRUCTURAL_PATTERNS = [
+    (r"\bwho\s+calls?\b", 0.9),
+    (r"\bcallers?\s+of\b", 0.9),
+    (r"\bdepend(?:s|encies)?\s+on\b", 0.8),
+    (r"\bdead\s+code\b", 0.9),
+    (r"\bhotspot", 0.9),
+    (r"\bcoupling\b", 0.8),
+    (r"\bcircular\b", 0.8),
+    (r"\bfan.?in\b", 0.8),
+    (r"\bfan.?out\b", 0.8),
+    (r"\bimpact\b", 0.7),
+    (r"\b调用\b", 0.8),
+    (r"\b依赖\b", 0.8),
+    (r"\b热点\b", 0.9),
+    (r"\b死代码\b", 0.9),
+    (r"\b耦合\b", 0.8),
+]
+_HISTORICAL_PATTERNS = [
+    (r"\bhow\s+has\b.*\bchanged\b", 0.9),
+    (r"\bwhen\s+was\b.*\b(introduced|added|created|modified)\b", 0.9),
+    (r"\bwhat\s+changed\b", 0.9),
+    (r"\bwho\s+(modified|changed|wrote|last)\b", 0.9),
+    (r"\bhistory\s+of\b", 0.9),
+    (r"\bcommit", 0.7),
+    (r"\bmodified\b.*\btimes\b", 0.8),
+    (r"\brecently\b.*\b(changed|modified)\b", 0.8),
+    (r"\bchurn\b", 0.8),
+    (r"\b变更\b", 0.8),
+    (r"\b历史\b", 0.8),
+    (r"\b谁.*修改\b", 0.9),
+    (r"\b什么时候\b", 0.8),
+]
+_INTENTIONAL_PATTERNS = [
+    (r"\bwhy\s+(was|were|did|is)\b", 0.95),
+    (r"\bmotivat", 0.9),
+    (r"\breason\s+for\b", 0.9),
+    (r"\bpurpose\s+of\b", 0.8),
+    (r"\bwhat\s+was\s+the\s+(reason|motivation|purpose)\b", 0.95),
+    (r"\bfix\b.*\b(bug|leak|crash|race|error)\b", 0.7),
+    (r"\b为什么\b", 0.95),
+    (r"\b原因\b", 0.8),
+    (r"\b目的\b", 0.8),
+    (r"\b动机\b", 0.9),
+]
+_COMBINED_PATTERNS = [
+    (r"\bvolatile\b.*\bhotspot\b", 0.9),
+    (r"\bhigh\b.*\b(coupling|churn)\b.*\b(coupling|churn)\b", 0.9),
+    (r"\brecently\b.*\bdead\s+code\b", 0.9),
+    (r"\bmost\b.*\b(modified|changed)\b.*\bhotspot\b", 0.9),
+    (r"\brisk\b.*\b(churn|change)\b", 0.8),
+]
+_SEMANTIC_PATTERNS = [
+    (r"\bhow\s+does\b.*\bhandle\b", 0.8),
+    (r"\bfind\b.*\b(functions?|code)\b.*\brelated\b", 0.8),
+    (r"\bsimilar\s+to\b", 0.8),
+    (r"\bcode\b.*\babout\b", 0.7),
+    (r"\b相关\b.*\b(代码|函数)\b", 0.8),
+    (r"\b类似\b", 0.8),
+]
+def classify_question(question: str) -> tuple[QuestionCategory, float]:
+    """Classify a question into a retrieval category using pattern matching."""
+    q = question.lower().strip()
+    best_cat = QuestionCategory.SEMANTIC
+    best_score = 0.0
+    for patterns, cat in [
+        (_COMBINED_PATTERNS, QuestionCategory.COMBINED),
+        (_INTENTIONAL_PATTERNS, QuestionCategory.INTENTIONAL),
+        (_HISTORICAL_PATTERNS, QuestionCategory.HISTORICAL),
+        (_STRUCTURAL_PATTERNS, QuestionCategory.STRUCTURAL),
+        (_SEMANTIC_PATTERNS, QuestionCategory.SEMANTIC),
+    ]:
+        for pattern, weight in patterns:
+            if re.search(pattern, q, re.IGNORECASE):
+                if weight > best_score:
+                    best_score = weight
+                    best_cat = cat
+                break
+    return best_cat, best_score
+def extract_entities(question: str) -> dict:
+    """Extract function names, file paths, and subsystem references."""
+    entities: dict = {}
+    func_match = re.search(
+        r"(?:calls?|callers?\s+of|impact\s+of\s+\w+|history\s+of|"
+        r"who\s+calls?|depends?\s+on|co.?changed?\s+(?:of|with))"
+        r"\s+[`'\"]?(\w+)[`'\"]?",
+        question,
+        re.IGNORECASE,
+    )
+    if func_match:
+        entities["function"] = func_match.group(1)
+    func_match2 = re.search(
+        r"[`'\"](\w+)\(\)[`'\"]",
+        question,
+    )
+    if func_match2 and "function" not in entities:
+        entities["function"] = func_match2.group(1)
+    backtick = re.search(r"`(\w+)`", question)
+    if backtick and "function" not in entities:
+        entities["function"] = backtick.group(1)
+    file_match = re.search(r"([\w/]+\.\w{1,4})", question)
+    if file_match:
+        entities["file_path"] = file_match.group(1)
+    subsys_match = re.search(
+        r"\bin\s+[`'\"]?([\w/]+)[`'\"]?(?:\s+(?:subsystem|module|directory))?",
+        question,
+        re.IGNORECASE,
+    )
+    if subsys_match:
+        candidate = subsys_match.group(1)
+        if "/" in candidate or candidate in (
+            "src", "vendor", "tests", "kernel", "drivers", "fs", "mm", "net",
+        ):
+            entities["subsystem"] = candidate
+    return entities
+# ---------------------------------------------------------------------------
+# Retrieval strategies
+# ---------------------------------------------------------------------------
+def _retrieve_structural(
+    cs: "CodeScope", question: str, entities: dict
+) -> list[ContextItem]:
+    items: list[ContextItem] = []
+    q = question.lower()
+    func_name = entities.get("function")
+    if any(kw in q for kw in ("who calls", "callers of", "调用", "impact")):
+        if func_name:
+            results = cs.impact(
+                func_name, question, max_hops=2
+            )
+            for r in results[:15]:
+                items.append(ContextItem(
+                    type="caller",
+                    content=f"{r.name} ({r.file_path}) — "
+                            f"hop={r.hop_distance}, relevance={r.relevance:.3f}",
+                    source=f"{r.file_path}",
+                    score=r.relevance,
+                ))
+    if any(kw in q for kw in ("hotspot", "热点", "risk", "fan_in", "fan_out", "fan-in", "fan-out", "ranking")):
+        results = cs.hotspots(topk=15)
+        for r in results:
+            items.append(ContextItem(
+                type="hotspot",
+                content=f"{r.name} ({r.file_path}) — "
+                        f"fan_in={r.fan_in}, fan_out={r.fan_out}, "
+                        f"risk={r.risk_score:.0f}",
+                source=r.file_path,
+                score=r.risk_score,
+            ))
+    if any(kw in q for kw in ("dead code", "死代码", "unreachable", "unused")):
+        results = cs.dead_code()
+        subsys = entities.get("subsystem")
+        if subsys:
+            results = [r for r in results if r.file_path.startswith(subsys)]
+        for r in results[:20]:
+            items.append(ContextItem(
+                type="dead_code",
+                content=f"{r.name} ({r.file_path}) — {r.reason}",
+                source=r.file_path,
+            ))
+    if any(kw in q for kw in ("coupling", "耦合", "depends", "依赖")):
+        results = cs.module_coupling(topk=10)
+        for r in results:
+            items.append(ContextItem(
+                type="coupling",
+                content=f"{r.module_a} <-> {r.module_b}: "
+                        f"{r.calls_a_to_b}+{r.calls_b_to_a} cross-calls",
+                source=f"{r.module_a}, {r.module_b}",
+                score=float(r.calls_a_to_b + r.calls_b_to_a),
+            ))
+    if any(kw in q for kw in ("circular", "cycle")):
+        cycles = cs.circular_deps()
+        for cyc in cycles[:10]:
+            items.append(ContextItem(
+                type="circular_dep",
+                content=" -> ".join(cyc),
+                source="file-level imports",
+            ))
+    if not items and func_name:
+        results = cs.impact(func_name, question, max_hops=2)
+        for r in results[:10]:
+            items.append(ContextItem(
+                type="caller",
+                content=f"{r.name} ({r.file_path}) — relevance={r.relevance:.3f}",
+                source=r.file_path,
+                score=r.relevance,
+            ))
+    return items
+def _retrieve_semantic(
+    cs: "CodeScope", question: str, entities: dict
+) -> list[ContextItem]:
+    items: list[ContextItem] = []
+    func_name = entities.get("function")
+    scope = entities.get("subsystem", "")
+    if func_name:
+        results = cs.similar(func_name, scope, topk=10)
+        for r in results:
+            items.append(ContextItem(
+                type="similar_function",
+                content=f"{r.name} — {r.signature} ({r.file_path})",
+                source=r.file_path,
+                score=r.score,
+            ))
+    if not items:
+        results = cs.intent_search(question, topk=10)
+        for r in results:
+            funcs = ", ".join(r.functions_modified[:5])
+            items.append(ContextItem(
+                type="semantic_match",
+                content=f"Commit {r.commit_hash[:8]}: {r.message[:100]}",
+                source=f"commit:{r.commit_hash[:8]}",
+                score=r.similarity_score,
+            ))
+            if funcs:
+                items.append(ContextItem(
+                    type="modified_functions",
+                    content=f"Functions: {funcs}",
+                    source=f"commit:{r.commit_hash[:8]}",
+                    score=r.similarity_score,
+                ))
+    return items
+def _retrieve_historical(
+    cs: "CodeScope", question: str, entities: dict
+) -> list[ContextItem]:
+    items: list[ContextItem] = []
+    func_name = entities.get("function")
+    file_path = entities.get("file_path")
+    if func_name:
+        attr = cs.change_attribution(func_name, file_path, limit=15)
+        for a in attr:
+            items.append(ContextItem(
+                type="commit",
+                content=f"[{a.change_type}] {a.author}: {a.message[:120]}",
+                source=f"commit:{a.commit_hash[:8]}",
+                score=float(a.timestamp),
+            ))
+        co = cs.co_change(func_name, file_path, min_commits=1, topk=10)
+        for c in co:
+            items.append(ContextItem(
+                type="co_change",
+                content=f"{c.function_name} ({c.file_path}) — "
+                        f"co-changed {c.co_change_count}x in "
+                        f"{', '.join(h[:8] for h in c.shared_commits[:3])}",
+                source=c.file_path,
+                score=float(c.co_change_count),
+            ))
+    else:
+        results = cs.intent_search(question, topk=10)
+        for r in results:
+            funcs = ", ".join(r.functions_modified[:5])
+            items.append(ContextItem(
+                type="commit",
+                content=f"{r.commit_hash[:8]}: {r.message[:120]}",
+                source=f"commit:{r.commit_hash[:8]}",
+                score=r.similarity_score,
+            ))
+    return items
+def _retrieve_intentional(
+    cs: "CodeScope", question: str, entities: dict
+) -> list[ContextItem]:
+    items: list[ContextItem] = []
+    results = cs.intent_search(question, topk=10)
+    for r in results:
+        items.append(ContextItem(
+            type="intent_match",
+            content=f"Commit {r.commit_hash[:8]}: {r.message[:150]}",
+            source=f"commit:{r.commit_hash[:8]}",
+            score=r.similarity_score,
+        ))
+        if r.functions_modified:
+            funcs = ", ".join(r.functions_modified[:5])
+            items.append(ContextItem(
+                type="modified_functions",
+                content=f"Modified: {funcs}",
+                source=f"commit:{r.commit_hash[:8]}",
+                score=r.similarity_score,
+            ))
+    func_name = entities.get("function")
+    if func_name:
+        attr = cs.change_attribution(func_name, entities.get("file_path"))
+        for a in attr[:5]:
+            items.append(ContextItem(
+                type="attribution",
+                content=f"[{a.change_type}] {a.author}: {a.message[:120]}",
+                source=f"commit:{a.commit_hash[:8]}",
+            ))
+    return items
+def _retrieve_combined(
+    cs: "CodeScope", question: str, entities: dict
+) -> list[ContextItem]:
+    items: list[ContextItem] = []
+    items.extend(_retrieve_structural(cs, question, entities))
+    items.extend(_retrieve_historical(cs, question, entities))
+    if not items:
+        items.extend(_retrieve_semantic(cs, question, entities))
+    return items
+_STRATEGY_MAP = {
+    QuestionCategory.STRUCTURAL: _retrieve_structural,
+    QuestionCategory.SEMANTIC: _retrieve_semantic,
+    QuestionCategory.HISTORICAL: _retrieve_historical,
+    QuestionCategory.INTENTIONAL: _retrieve_intentional,
+    QuestionCategory.COMBINED: _retrieve_combined,
+}
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def codegraph_query(cs: "CodeScope", question: str) -> RetrievalResult:
+    """Unified entry point: classify, retrieve, return structured context."""
+    t0 = time.time()
+    category, confidence = classify_question(question)
+    entities = extract_entities(question)
+    strategy = _STRATEGY_MAP[category]
+    context_items = strategy(cs, question, entities)
+    elapsed_ms = (time.time() - t0) * 1000
+    return RetrievalResult(
+        question=question,
+        category=category,
+        context_items=context_items,
+        retrieval_time_ms=elapsed_ms,
+        entities=entities,
+    )

codegraph_ai-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,14 @@
+Metadata-Version: 2.4
+Name: codegraph-ai
+Version: 0.1.0
+Summary: Hybrid graph + vector code intelligence powered by NeuG and zvec
+Requires-Python: >=3.10
+Requires-Dist: neug
+Requires-Dist: zvec
+Requires-Dist: tree-sitter-language-pack
+Requires-Dist: sentence-transformers
+Requires-Dist: numpy
+Provides-Extra: server
+Requires-Dist: fastmcp; extra == "server"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"

codegraph_ai-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+codegraph/__init__.py,sha256=qIBQEFex1NisDhc5DBNwFkj29HNoGQNCaGOEH_Ozwxo,76
+codegraph/__main__.py,sha256=f1U6TofPM03KHXoVidcWv24Y8wwXb_L-CYNEW9iu2D0,95
+codegraph/analyzer.py,sha256=x093iNVrTCtba-MvlZEFkzHmotm_2uh_8N92T0SCcw8,15400
+codegraph/cli.py,sha256=keim_D2fHntw29jxKn_KFnAHHjRXN2qHEIAG1KGHSpg,16772
+codegraph/core.py,sha256=I_XvWPtl7u5Du8ke6byMdvJsLh23PMHiQPdcfjsgRJ4,134160
+codegraph/mcp_server.py,sha256=OCqJx2tsynrvixMwL1p26lbwHSX7ODQJ4zpGQizAfqM,19511
+codegraph/models.py,sha256=g9id4c1LuoQDoA5fzTLf091vQLTANIAvoPCovf1kFJU,6814
+codegraph/qa.py,sha256=mktPfvmO984Ao9vWWwCCFv9o5QCc6xQcj2awf6HHCT4,15371
+codegraph/adapters/__init__.py,sha256=DBKvBU_xkVHlqxzvg6Ea-5h3BH5NsYCDe2tHMEYRrkI,75
+codegraph/adapters/base.py,sha256=O2EP59EC-VwWTKUKngdBUih317mfvIiF57yNhupoaEY,1160
+codegraph/adapters/c_adapter.py,sha256=-_LJ0_wLHzVPsPUFtJXA1uTlSMoyo3PBTdgbGTxQepc,17413
+codegraph/adapters/js_adapter.py,sha256=iIuBlybfhl2PUBmxWUFHnf6eVwVHuGHALPS18NmJIXk,19489
+codegraph/adapters/python_adapter.py,sha256=b5mB9Ixy7jDHOrIGKLtabUYjPhhQI9JAakPq8y4i8nc,12497
+codegraph_ai-0.1.0.dist-info/METADATA,sha256=WDvTYj9n7J4KfnMPq0pjHXC9JC1QtH16HKG9VsrbLG0,421
+codegraph_ai-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+codegraph_ai-0.1.0.dist-info/entry_points.txt,sha256=3f2dJK7oR3dBzP21qRk_KuQa6Li8MVbVXeKcx3UjQ6c,49
+codegraph_ai-0.1.0.dist-info/top_level.txt,sha256=RqBj9sPbifZTb9aeHtnbxTgKJvfHIQdmYE1Brv8Wdkg,10
+codegraph_ai-0.1.0.dist-info/RECORD,,

codegraph_ai-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

codegraph_ai-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ codegraph = codegraph.cli:main

codegraph_ai-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ codegraph