PyPI - code-graph-builder - Versions diffs - 0.2.0__py3-none-any.whl - Mend

code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

code_graph_builder/__init__.py +82 -0
code_graph_builder/builder.py +366 -0
code_graph_builder/cgb_cli.py +32 -0
code_graph_builder/cli.py +564 -0
code_graph_builder/commands_cli.py +1288 -0
code_graph_builder/config.py +340 -0
code_graph_builder/constants.py +708 -0
code_graph_builder/embeddings/__init__.py +40 -0
code_graph_builder/embeddings/qwen3_embedder.py +573 -0
code_graph_builder/embeddings/vector_store.py +584 -0
code_graph_builder/examples/__init__.py +0 -0
code_graph_builder/examples/example_configuration.py +276 -0
code_graph_builder/examples/example_kuzu_usage.py +109 -0
code_graph_builder/examples/example_semantic_search_full.py +347 -0
code_graph_builder/examples/generate_wiki.py +915 -0
code_graph_builder/examples/graph_export_example.py +100 -0
code_graph_builder/examples/rag_example.py +206 -0
code_graph_builder/examples/test_cli_demo.py +129 -0
code_graph_builder/examples/test_embedding_api.py +153 -0
code_graph_builder/examples/test_kuzu_local.py +190 -0
code_graph_builder/examples/test_rag_redis.py +390 -0
code_graph_builder/graph_updater.py +605 -0
code_graph_builder/guidance/__init__.py +1 -0
code_graph_builder/guidance/agent.py +123 -0
code_graph_builder/guidance/prompts.py +74 -0
code_graph_builder/guidance/toolset.py +264 -0
code_graph_builder/language_spec.py +536 -0
code_graph_builder/mcp/__init__.py +21 -0
code_graph_builder/mcp/api_doc_generator.py +764 -0
code_graph_builder/mcp/file_editor.py +207 -0
code_graph_builder/mcp/pipeline.py +777 -0
code_graph_builder/mcp/server.py +161 -0
code_graph_builder/mcp/tools.py +1800 -0
code_graph_builder/models.py +115 -0
code_graph_builder/parser_loader.py +344 -0
code_graph_builder/parsers/__init__.py +7 -0
code_graph_builder/parsers/call_processor.py +306 -0
code_graph_builder/parsers/call_resolver.py +139 -0
code_graph_builder/parsers/definition_processor.py +796 -0
code_graph_builder/parsers/factory.py +119 -0
code_graph_builder/parsers/import_processor.py +293 -0
code_graph_builder/parsers/structure_processor.py +145 -0
code_graph_builder/parsers/type_inference.py +143 -0
code_graph_builder/parsers/utils.py +134 -0
code_graph_builder/rag/__init__.py +68 -0
code_graph_builder/rag/camel_agent.py +429 -0
code_graph_builder/rag/client.py +298 -0
code_graph_builder/rag/config.py +239 -0
code_graph_builder/rag/cypher_generator.py +67 -0
code_graph_builder/rag/llm_backend.py +210 -0
code_graph_builder/rag/markdown_generator.py +352 -0
code_graph_builder/rag/prompt_templates.py +440 -0
code_graph_builder/rag/rag_engine.py +640 -0
code_graph_builder/rag/review_report.md +172 -0
code_graph_builder/rag/tests/__init__.py +3 -0
code_graph_builder/rag/tests/test_camel_agent.py +313 -0
code_graph_builder/rag/tests/test_client.py +221 -0
code_graph_builder/rag/tests/test_config.py +177 -0
code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
code_graph_builder/services/__init__.py +39 -0
code_graph_builder/services/graph_service.py +465 -0
code_graph_builder/services/kuzu_service.py +665 -0
code_graph_builder/services/memory_service.py +171 -0
code_graph_builder/settings.py +75 -0
code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
code_graph_builder/tests/__init__.py +1 -0
code_graph_builder/tests/run_acceptance_check.py +378 -0
code_graph_builder/tests/test_api_find.py +231 -0
code_graph_builder/tests/test_api_find_integration.py +226 -0
code_graph_builder/tests/test_basic.py +78 -0
code_graph_builder/tests/test_c_api_extraction.py +388 -0
code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
code_graph_builder/tests/test_embedder.py +411 -0
code_graph_builder/tests/test_integration_semantic.py +434 -0
code_graph_builder/tests/test_mcp_protocol.py +298 -0
code_graph_builder/tests/test_mcp_user_flow.py +190 -0
code_graph_builder/tests/test_rag.py +404 -0
code_graph_builder/tests/test_settings.py +135 -0
code_graph_builder/tests/test_step1_graph_build.py +264 -0
code_graph_builder/tests/test_step2_api_docs.py +323 -0
code_graph_builder/tests/test_step3_embedding.py +278 -0
code_graph_builder/tests/test_vector_store.py +552 -0
code_graph_builder/tools/__init__.py +40 -0
code_graph_builder/tools/graph_query.py +495 -0
code_graph_builder/tools/semantic_search.py +387 -0
code_graph_builder/types.py +333 -0
code_graph_builder/utils/__init__.py +0 -0
code_graph_builder/utils/path_utils.py +30 -0
code_graph_builder-0.2.0.dist-info/METADATA +321 -0
code_graph_builder-0.2.0.dist-info/RECORD +93 -0
code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0

code_graph_builder/rag/llm_backend.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""LLM backend abstraction for RAG and Cypher generation.
+Provides a unified interface to call any OpenAI-compatible chat-completion API.
+The provider is auto-detected from environment variables in this priority:
+    1. ``LLM_API_KEY`` / ``LLM_BASE_URL`` / ``LLM_MODEL``   (generic, highest)
+    2. ``OPENAI_API_KEY`` / ``OPENAI_BASE_URL`` / ``OPENAI_MODEL``
+    3. ``MOONSHOT_API_KEY`` / ``MOONSHOT_MODEL``              (legacy default)
+When installed as an MCP server in Claude Code, configure the environment
+variables in ``settings.json`` → ``mcpServers`` → ``env``.
+"""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from typing import Any
+from loguru import logger
+# Provider detection order: each tuple is (key_env, base_url_env, model_env, default_base_url, default_model)
+_PROVIDER_ENVS: list[tuple[str, str, str, str, str]] = [
+    # Generic — user explicitly chose an LLM
+    ("LLM_API_KEY", "LLM_BASE_URL", "LLM_MODEL", "https://api.openai.com/v1", "gpt-4o"),
+    # OpenAI / compatible (DeepSeek, Together, etc.)
+    ("OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_MODEL", "https://api.openai.com/v1", "gpt-4o"),
+    # Moonshot / Kimi (legacy default)
+    ("MOONSHOT_API_KEY", "LLM_BASE_URL", "MOONSHOT_MODEL", "https://api.moonshot.cn/v1", "kimi-k2.5"),
+]
+@dataclass
+class ToolCall:
+    """A single tool invocation returned by the LLM."""
+    id: str
+    function_name: str
+    arguments: str  # JSON-encoded string
+@dataclass
+class ChatMessage:
+    """Structured response from a chat completion that may contain tool calls."""
+    content: str | None
+    tool_calls: list[ToolCall] | None
+    finish_reason: str
+@dataclass
+class LLMBackend:
+    """Generic LLM backend that calls an OpenAI-compatible chat-completion API."""
+    api_key: str = ""
+    model: str = "gpt-4o"
+    base_url: str = "https://api.openai.com/v1"
+    temperature: float = 1.0
+    max_tokens: int = 4096
+    @property
+    def available(self) -> bool:
+        """Return *True* when an API key has been configured."""
+        return bool(self.api_key)
+    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
+        """Send a chat completion request and return the assistant's response text."""
+        try:
+            import httpx
+        except ImportError:
+            raise ImportError(
+                "httpx is required for LLM backend. "
+                "Install it with: pip install httpx"
+            )
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "messages": messages,
+            "temperature": kwargs.get("temperature", self.temperature),
+            "max_tokens": kwargs.get("max_tokens", self.max_tokens),
+        }
+        resp = httpx.post(
+            f"{self.base_url}/chat/completions",
+            json=payload,
+            headers=headers,
+            timeout=60.0,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        message = data["choices"][0]["message"]
+        return message.get("content") or message.get("reasoning_content", "")
+    def chat_with_tools(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+        **kwargs: Any,
+    ) -> ChatMessage:
+        """Send a chat completion with optional tool definitions.
+        Returns a :class:`ChatMessage` that may contain ``tool_calls`` when the
+        LLM decides to invoke one or more tools.  If *tools* is ``None`` or
+        empty, behaves like :meth:`chat` but returns a structured message.
+        """
+        try:
+            import httpx
+        except ImportError:
+            raise ImportError(
+                "httpx is required for LLM backend. "
+                "Install it with: pip install httpx"
+            )
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "messages": messages,
+            "temperature": kwargs.get("temperature", self.temperature),
+            "max_tokens": kwargs.get("max_tokens", self.max_tokens),
+        }
+        if tools:
+            payload["tools"] = tools
+            payload["tool_choice"] = kwargs.get("tool_choice", "auto")
+        resp = httpx.post(
+            f"{self.base_url}/chat/completions",
+            json=payload,
+            headers=headers,
+            timeout=kwargs.get("timeout", 120.0),
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        choice = data["choices"][0]
+        message = choice["message"]
+        finish_reason = choice.get("finish_reason", "stop")
+        parsed_calls: list[ToolCall] | None = None
+        raw_calls = message.get("tool_calls")
+        if raw_calls:
+            parsed_calls = [
+                ToolCall(
+                    id=tc["id"],
+                    function_name=tc["function"]["name"],
+                    arguments=tc["function"]["arguments"],
+                )
+                for tc in raw_calls
+            ]
+        return ChatMessage(
+            content=message.get("content"),
+            tool_calls=parsed_calls,
+            finish_reason=finish_reason,
+        )
+def create_llm_backend(**kwargs: Any) -> LLMBackend:
+    """Create an LLM backend by auto-detecting available provider env vars.
+    Detection priority (first match wins):
+        1. ``LLM_API_KEY``      — generic override
+        2. ``OPENAI_API_KEY``   — OpenAI or any compatible endpoint
+        3. ``MOONSHOT_API_KEY`` — Moonshot / Kimi (legacy)
+    Any of these can be overridden by passing explicit keyword arguments
+    (``api_key``, ``base_url``, ``model``).
+    """
+    explicit_key = kwargs.pop("api_key", None)
+    explicit_url = kwargs.pop("base_url", None)
+    explicit_model = kwargs.pop("model", None)
+    # Walk providers until we find one with a key
+    detected_key = ""
+    detected_url = ""
+    detected_model = ""
+    detected_provider = ""
+    for key_env, url_env, model_env, default_url, default_model in _PROVIDER_ENVS:
+        env_key = os.environ.get(key_env, "")
+        if env_key:
+            detected_key = env_key
+            detected_url = os.environ.get(url_env, default_url)
+            detected_model = os.environ.get(model_env, default_model)
+            detected_provider = key_env
+            break
+    api_key = explicit_key or detected_key
+    base_url = explicit_url or detected_url or "https://api.openai.com/v1"
+    model = explicit_model or detected_model or "gpt-4o"
+    if api_key:
+        logger.info(
+            f"LLM backend: model={model}, base_url={base_url} "
+            f"(detected via {detected_provider or 'explicit kwargs'})"
+        )
+    else:
+        logger.warning(
+            "No LLM API key found in environment. "
+            "Set one of: LLM_API_KEY, OPENAI_API_KEY, or MOONSHOT_API_KEY. "
+            "Tools that require LLM (query_code_graph, wiki generation) will be unavailable."
+        )
+    return LLMBackend(api_key=api_key, base_url=base_url, model=model, **kwargs)

code_graph_builder/rag/markdown_generator.py ADDED Viewed

@@ -0,0 +1,352 @@
+"""Markdown output generator for RAG responses.
+This module provides utilities for generating well-formatted markdown
+documentation from RAG analysis results.
+Examples:
+    >>> from code_graph_builder.rag.markdown_generator import MarkdownGenerator
+    >>> generator = MarkdownGenerator()
+    >>> markdown = generator.generate_analysis_doc(
+    ...     title="Authentication System",
+    ...     query="Explain authentication",
+    ...     response="The auth system...",
+    ...     sources=[{"name": "auth.py", "path": "src/auth.py"}]
+    ... )
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from loguru import logger
+if TYPE_CHECKING:
+    from .prompt_templates import CodeContext
+@dataclass
+class SourceReference:
+    """Reference to a source code entity.
+    Attributes:
+        name: Entity name
+        qualified_name: Fully qualified name
+        file_path: Source file path
+        line_start: Start line number
+        line_end: End line number
+        entity_type: Type of entity (Function, Class, etc.)
+    """
+    name: str
+    qualified_name: str
+    file_path: str
+    line_start: int | None = None
+    line_end: int | None = None
+    entity_type: str | None = None
+    def format_link(self) -> str:
+        """Format as markdown link."""
+        location = self.file_path
+        if self.line_start:
+            location += f":{self.line_start}"
+            if self.line_end and self.line_end != self.line_start:
+                location += f"-{self.line_end}"
+        return f"[{self.qualified_name}]({location})"
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "name": self.name,
+            "qualified_name": self.qualified_name,
+            "file_path": self.file_path,
+            "line_start": self.line_start,
+            "line_end": self.line_end,
+            "entity_type": self.entity_type,
+        }
+@dataclass
+class AnalysisResult:
+    """Result of a RAG analysis.
+    Attributes:
+        query: Original user query
+        response: Generated response
+        sources: List of source references
+        metadata: Additional metadata
+        timestamp: Analysis timestamp
+    """
+    query: str
+    response: str
+    sources: list[SourceReference] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "query": self.query,
+            "response": self.response,
+            "sources": [s.to_dict() for s in self.sources],
+            "metadata": self.metadata,
+            "timestamp": self.timestamp.isoformat(),
+        }
+class MarkdownGenerator:
+    """Generator for markdown documentation.
+    Creates well-formatted markdown documents from RAG analysis results.
+    Args:
+        include_toc: Whether to include table of contents
+        include_timestamp: Whether to include generation timestamp
+        include_metadata: Whether to include metadata section
+    Examples:
+        >>> generator = MarkdownGenerator(include_toc=True)
+        >>> doc = generator.generate_analysis_doc(
+        ...     title="Code Analysis",
+        ...     result=analysis_result
+        ... )
+    """
+    def __init__(
+        self,
+        include_toc: bool = True,
+        include_timestamp: bool = True,
+        include_metadata: bool = True,
+    ):
+        self.include_toc = include_toc
+        self.include_timestamp = include_timestamp
+        self.include_metadata = include_metadata
+    def generate_analysis_doc(
+        self,
+        title: str,
+        result: AnalysisResult,
+    ) -> str:
+        """Generate markdown document from analysis result.
+        Args:
+            title: Document title
+            result: Analysis result
+        Returns:
+            Markdown document as string
+        """
+        lines = []
+        # Title
+        lines.append(f"# {title}")
+        lines.append("")
+        # Timestamp
+        if self.include_timestamp:
+            lines.append(f"*Generated: {result.timestamp.strftime('%Y-%m-%d %H:%M:%S UTC')}*")
+            lines.append("")
+        # Table of Contents
+        if self.include_toc:
+            lines.append("## Table of Contents")
+            lines.append("")
+            lines.append("- [Query](#query)")
+            lines.append("- [Analysis](#analysis)")
+            if result.sources:
+                lines.append("- [Sources](#sources)")
+            if self.include_metadata and result.metadata:
+                lines.append("- [Metadata](#metadata)")
+            lines.append("")
+        # Query section
+        lines.append("## Query")
+        lines.append("")
+        lines.append(f"> {result.query}")
+        lines.append("")
+        # Analysis section
+        lines.append("## Analysis")
+        lines.append("")
+        lines.append(result.response)
+        lines.append("")
+        # Sources section
+        if result.sources:
+            lines.append("## Sources")
+            lines.append("")
+            for i, source in enumerate(result.sources, 1):
+                lines.append(f"{i}. {source.format_link()}")
+                if source.entity_type:
+                    lines.append(f"   - Type: {source.entity_type}")
+            lines.append("")
+        # Metadata section
+        if self.include_metadata and result.metadata:
+            lines.append("## Metadata")
+            lines.append("")
+            for key, value in result.metadata.items():
+                lines.append(f"- **{key}**: {value}")
+            lines.append("")
+        return "\n".join(lines)
+    def generate_code_documentation(
+        self,
+        context: CodeContext,
+        analysis: str,
+    ) -> str:
+        """Generate documentation for a code entity.
+        Args:
+            context: Code context
+            analysis: Analysis text
+        Returns:
+            Markdown documentation
+        """
+        lines = []
+        # Title
+        title = context.qualified_name or context.entity_type or "Code Documentation"
+        lines.append(f"# {title}")
+        lines.append("")
+        # Entity info
+        if context.entity_type:
+            lines.append(f"**Type:** {context.entity_type}")
+        if context.file_path:
+            lines.append(f"**File:** `{context.file_path}`")
+        lines.append("")
+        # Documentation
+        lines.append(analysis)
+        lines.append("")
+        # Source code
+        lines.append("## Source Code")
+        lines.append("")
+        lines.append("```python")
+        lines.append(context.source_code)
+        lines.append("```")
+        lines.append("")
+        # Relationships
+        if context.callers:
+            lines.append("## Called By")
+            lines.append("")
+            for caller in context.callers:
+                lines.append(f"- `{caller}`")
+            lines.append("")
+        if context.callees:
+            lines.append("## Calls")
+            lines.append("")
+            for callee in context.callees:
+                lines.append(f"- `{callee}`")
+            lines.append("")
+        return "\n".join(lines)
+    def generate_comparison_doc(
+        self,
+        title: str,
+        query: str,
+        contexts: list[CodeContext],
+        analysis: str,
+    ) -> str:
+        """Generate comparison document for multiple code entities.
+        Args:
+            title: Document title
+            query: Original query
+            contexts: List of code contexts
+            analysis: Comparative analysis
+        Returns:
+            Markdown document
+        """
+        lines = []
+        lines.append(f"# {title}")
+        lines.append("")
+        lines.append(f"**Query:** {query}")
+        lines.append("")
+        lines.append("## Comparison Analysis")
+        lines.append("")
+        lines.append(analysis)
+        lines.append("")
+        lines.append("## Compared Entities")
+        lines.append("")
+        for i, ctx in enumerate(contexts, 1):
+            name = ctx.qualified_name or f"Entity {i}"
+            lines.append(f"### {i}. {name}")
+            if ctx.file_path:
+                lines.append(f"**File:** `{ctx.file_path}`")
+            lines.append("")
+            lines.append("```python")
+            lines.append(ctx.source_code[:500] + "..." if len(ctx.source_code) > 500 else ctx.source_code)
+            lines.append("```")
+            lines.append("")
+        return "\n".join(lines)
+    def save_document(
+        self,
+        content: str,
+        output_path: str | Path,
+    ) -> Path:
+        """Save markdown document to file.
+        Args:
+            content: Markdown content
+            output_path: Output file path
+        Returns:
+            Path to saved file
+        """
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(content)
+        logger.info(f"Saved markdown document to {output_path}")
+        return output_path
+def create_source_reference_from_context(
+    context: CodeContext,
+) -> SourceReference:
+    """Create a SourceReference from CodeContext.
+    Args:
+        context: Code context
+    Returns:
+        SourceReference instance
+    """
+    return SourceReference(
+        name=context.qualified_name.split(".")[-1] if context.qualified_name else "unknown",
+        qualified_name=context.qualified_name or "unknown",
+        file_path=context.file_path or "",
+        entity_type=context.entity_type,
+    )
+def format_code_block(code: str, language: str = "python") -> str:
+    """Format code as markdown code block.
+    Args:
+        code: Source code
+        language: Language identifier
+    Returns:
+        Formatted code block
+    """
+    return f"```{language}\n{code}\n```"