PyPI - code-graph-builder - Versions diffs - 0.2.0__py3-none-any.whl - Mend

code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

code_graph_builder/__init__.py +82 -0
code_graph_builder/builder.py +366 -0
code_graph_builder/cgb_cli.py +32 -0
code_graph_builder/cli.py +564 -0
code_graph_builder/commands_cli.py +1288 -0
code_graph_builder/config.py +340 -0
code_graph_builder/constants.py +708 -0
code_graph_builder/embeddings/__init__.py +40 -0
code_graph_builder/embeddings/qwen3_embedder.py +573 -0
code_graph_builder/embeddings/vector_store.py +584 -0
code_graph_builder/examples/__init__.py +0 -0
code_graph_builder/examples/example_configuration.py +276 -0
code_graph_builder/examples/example_kuzu_usage.py +109 -0
code_graph_builder/examples/example_semantic_search_full.py +347 -0
code_graph_builder/examples/generate_wiki.py +915 -0
code_graph_builder/examples/graph_export_example.py +100 -0
code_graph_builder/examples/rag_example.py +206 -0
code_graph_builder/examples/test_cli_demo.py +129 -0
code_graph_builder/examples/test_embedding_api.py +153 -0
code_graph_builder/examples/test_kuzu_local.py +190 -0
code_graph_builder/examples/test_rag_redis.py +390 -0
code_graph_builder/graph_updater.py +605 -0
code_graph_builder/guidance/__init__.py +1 -0
code_graph_builder/guidance/agent.py +123 -0
code_graph_builder/guidance/prompts.py +74 -0
code_graph_builder/guidance/toolset.py +264 -0
code_graph_builder/language_spec.py +536 -0
code_graph_builder/mcp/__init__.py +21 -0
code_graph_builder/mcp/api_doc_generator.py +764 -0
code_graph_builder/mcp/file_editor.py +207 -0
code_graph_builder/mcp/pipeline.py +777 -0
code_graph_builder/mcp/server.py +161 -0
code_graph_builder/mcp/tools.py +1800 -0
code_graph_builder/models.py +115 -0
code_graph_builder/parser_loader.py +344 -0
code_graph_builder/parsers/__init__.py +7 -0
code_graph_builder/parsers/call_processor.py +306 -0
code_graph_builder/parsers/call_resolver.py +139 -0
code_graph_builder/parsers/definition_processor.py +796 -0
code_graph_builder/parsers/factory.py +119 -0
code_graph_builder/parsers/import_processor.py +293 -0
code_graph_builder/parsers/structure_processor.py +145 -0
code_graph_builder/parsers/type_inference.py +143 -0
code_graph_builder/parsers/utils.py +134 -0
code_graph_builder/rag/__init__.py +68 -0
code_graph_builder/rag/camel_agent.py +429 -0
code_graph_builder/rag/client.py +298 -0
code_graph_builder/rag/config.py +239 -0
code_graph_builder/rag/cypher_generator.py +67 -0
code_graph_builder/rag/llm_backend.py +210 -0
code_graph_builder/rag/markdown_generator.py +352 -0
code_graph_builder/rag/prompt_templates.py +440 -0
code_graph_builder/rag/rag_engine.py +640 -0
code_graph_builder/rag/review_report.md +172 -0
code_graph_builder/rag/tests/__init__.py +3 -0
code_graph_builder/rag/tests/test_camel_agent.py +313 -0
code_graph_builder/rag/tests/test_client.py +221 -0
code_graph_builder/rag/tests/test_config.py +177 -0
code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
code_graph_builder/services/__init__.py +39 -0
code_graph_builder/services/graph_service.py +465 -0
code_graph_builder/services/kuzu_service.py +665 -0
code_graph_builder/services/memory_service.py +171 -0
code_graph_builder/settings.py +75 -0
code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
code_graph_builder/tests/__init__.py +1 -0
code_graph_builder/tests/run_acceptance_check.py +378 -0
code_graph_builder/tests/test_api_find.py +231 -0
code_graph_builder/tests/test_api_find_integration.py +226 -0
code_graph_builder/tests/test_basic.py +78 -0
code_graph_builder/tests/test_c_api_extraction.py +388 -0
code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
code_graph_builder/tests/test_embedder.py +411 -0
code_graph_builder/tests/test_integration_semantic.py +434 -0
code_graph_builder/tests/test_mcp_protocol.py +298 -0
code_graph_builder/tests/test_mcp_user_flow.py +190 -0
code_graph_builder/tests/test_rag.py +404 -0
code_graph_builder/tests/test_settings.py +135 -0
code_graph_builder/tests/test_step1_graph_build.py +264 -0
code_graph_builder/tests/test_step2_api_docs.py +323 -0
code_graph_builder/tests/test_step3_embedding.py +278 -0
code_graph_builder/tests/test_vector_store.py +552 -0
code_graph_builder/tools/__init__.py +40 -0
code_graph_builder/tools/graph_query.py +495 -0
code_graph_builder/tools/semantic_search.py +387 -0
code_graph_builder/types.py +333 -0
code_graph_builder/utils/__init__.py +0 -0
code_graph_builder/utils/path_utils.py +30 -0
code_graph_builder-0.2.0.dist-info/METADATA +321 -0
code_graph_builder-0.2.0.dist-info/RECORD +93 -0
code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0

code_graph_builder/rag/client.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""OpenAI-compatible LLM client for RAG.
+This module provides a client for interacting with any OpenAI-compatible LLM API.
+Supported providers include Moonshot (Kimi), OpenAI, DeepSeek, and others.
+Examples:
+    >>> from code_graph_builder.rag.client import LLMClient
+    >>> client = LLMClient(api_key="sk-xxxxx")
+    >>> response = client.chat("Explain this code", context="def foo(): pass")
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+import requests
+from loguru import logger
+@dataclass
+class ChatResponse:
+    """Response from chat completion.
+    Attributes:
+        content: Generated text content
+        usage: Token usage information
+        model: Model used for generation
+        finish_reason: Reason for completion finish
+    """
+    content: str
+    usage: dict[str, int]
+    model: str
+    finish_reason: str
+class LLMClient:
+    """Client for OpenAI-compatible LLM API.
+    Provides a simple interface for chat completions with any OpenAI-compatible model.
+    Args:
+        api_key: LLM API key
+        model: Model name (default: kimi-k2.5)
+        base_url: API base URL
+        max_tokens: Maximum tokens for generation
+        temperature: Sampling temperature
+        timeout: Request timeout in seconds
+    Examples:
+        >>> client = LLMClient(api_key="sk-xxxxx")
+        >>> response = client.chat(
+        ...     query="What does this function do?",
+        ...     context="def add(a, b): return a + b"
+        ... )
+        >>> print(response.content)
+    Note:
+        DEFAULT_MODEL and DEFAULT_BASE_URL default to Moonshot/Kimi but can be
+        overridden via constructor arguments or ``create_llm_client()`` auto-detection.
+    """
+    DEFAULT_MODEL = "kimi-k2.5"
+    DEFAULT_BASE_URL = "https://api.moonshot.cn/v1"
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str = DEFAULT_MODEL,
+        base_url: str = DEFAULT_BASE_URL,
+        max_tokens: int = 4096,
+        temperature: float = 1.0,
+        timeout: int = 300,
+    ):
+        self.api_key = api_key
+        self.model = model
+        self.base_url = base_url.rstrip("/")
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+        self.timeout = timeout
+        if not self.api_key:
+            raise ValueError(
+                "LLM API key is required. "
+                "Set one of: LLM_API_KEY, OPENAI_API_KEY, or MOONSHOT_API_KEY "
+                "environment variable, or pass api_key directly. "
+                "Use create_llm_client() for automatic provider detection."
+            )
+        logger.info(f"Initialized LLMClient with model: {self.model}")
+    def _get_headers(self) -> dict[str, str]:
+        """Get API request headers."""
+        return {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+    def chat(
+        self,
+        query: str,
+        context: str | None = None,
+        system_prompt: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+    ) -> ChatResponse:
+        """Send a chat completion request.
+        Args:
+            query: User query
+            context: Optional context to include
+            system_prompt: Optional system prompt
+            max_tokens: Override max tokens
+            temperature: Override temperature
+        Returns:
+            ChatResponse with generated content
+        Raises:
+            RuntimeError: If API request fails
+        """
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        if context:
+            content = f"Context:\n{context}\n\nQuery: {query}"
+        else:
+            content = query
+        messages.append({"role": "user", "content": content})
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": max_tokens or self.max_tokens,
+            "temperature": temperature or self.temperature,
+        }
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self._get_headers(),
+                json=payload,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            data = response.json()
+            choice = data["choices"][0]
+            return ChatResponse(
+                content=choice["message"]["content"],
+                usage=data.get("usage", {}),
+                model=data.get("model", self.model),
+                finish_reason=choice.get("finish_reason", "unknown"),
+            )
+        except requests.exceptions.HTTPError as e:
+            logger.error(f"HTTP error: {e}")
+            try:
+                error_data = e.response.json() if e.response else {}
+                error_msg = error_data.get("error", {}).get("message", str(e))
+            except Exception:
+                error_msg = str(e)
+            raise RuntimeError(f"API request failed: {error_msg}")
+        except requests.exceptions.Timeout:
+            logger.error("Request timeout")
+            raise RuntimeError(f"API request timeout after {self.timeout}s")
+        except Exception as e:
+            logger.error(f"Request failed: {e}")
+            raise RuntimeError(f"API request failed: {e}")
+    def chat_with_messages(
+        self,
+        messages: list[dict[str, str]],
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+    ) -> ChatResponse:
+        """Send a chat completion request with raw messages.
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            max_tokens: Override max tokens
+            temperature: Override temperature
+        Returns:
+            ChatResponse with generated content
+        """
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": max_tokens or self.max_tokens,
+            "temperature": temperature or self.temperature,
+        }
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self._get_headers(),
+                json=payload,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            data = response.json()
+            choice = data["choices"][0]
+            return ChatResponse(
+                content=choice["message"]["content"],
+                usage=data.get("usage", {}),
+                model=data.get("model", self.model),
+                finish_reason=choice.get("finish_reason", "unknown"),
+            )
+        except Exception as e:
+            logger.error(f"Request failed: {e}")
+            raise RuntimeError(f"API request failed: {e}")
+    def health_check(self) -> bool:
+        """Check if API is accessible.
+        Returns:
+            True if healthy, False otherwise
+        """
+        try:
+            response = requests.get(
+                f"{self.base_url}/models",
+                headers=self._get_headers(),
+                timeout=10,
+            )
+            return response.status_code == 200
+        except Exception as e:
+            logger.error(f"Health check failed: {e}")
+            return False
+def create_llm_client(
+    api_key: str | None = None,
+    model: str | None = None,
+    base_url: str | None = None,
+    **kwargs: Any,
+) -> LLMClient:
+    """Factory function to create LLMClient with auto-detection.
+    Auto-detects API credentials from environment variables in this priority:
+        1. ``LLM_API_KEY`` / ``LLM_BASE_URL`` / ``LLM_MODEL``   (generic, highest)
+        2. ``OPENAI_API_KEY`` / ``OPENAI_BASE_URL`` / ``OPENAI_MODEL``
+        3. ``MOONSHOT_API_KEY`` / ``MOONSHOT_MODEL``              (Moonshot/Kimi default)
+    This allows any OpenAI-compatible model provider (DeepSeek, OpenAI,
+    Moonshot, etc.) to be used seamlessly.
+    Args:
+        api_key: API key (auto-detected from env if not provided)
+        model: Model name (auto-detected from env if not provided)
+        base_url: API base URL (auto-detected from env if not provided)
+        **kwargs: Additional arguments for LLMClient
+    Returns:
+        Configured LLMClient
+    """
+    import os
+    # Provider detection order: (key_env, url_env, model_env, default_url, default_model)
+    _providers = [
+        ("LLM_API_KEY", "LLM_BASE_URL", "LLM_MODEL", "https://api.openai.com/v1", "gpt-4o"),
+        ("OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_MODEL", "https://api.openai.com/v1", "gpt-4o"),
+        ("MOONSHOT_API_KEY", "LLM_BASE_URL", "MOONSHOT_MODEL", "https://api.moonshot.cn/v1", "kimi-k2.5"),
+    ]
+    detected_key = api_key or ""
+    detected_url = base_url or ""
+    detected_model = model or ""
+    if not detected_key:
+        for key_env, url_env, model_env, default_url, default_model in _providers:
+            env_key = os.environ.get(key_env, "")
+            if env_key:
+                detected_key = env_key
+                detected_url = detected_url or os.environ.get(url_env, default_url)
+                detected_model = detected_model or os.environ.get(model_env, default_model)
+                logger.info(f"LLMClient: auto-detected provider via {key_env}")
+                break
+    # Apply defaults for any still-missing values
+    detected_model = detected_model or "kimi-k2.5"
+    detected_url = detected_url or LLMClient.DEFAULT_BASE_URL
+    return LLMClient(
+        api_key=detected_key or None,
+        model=detected_model,
+        base_url=detected_url,
+        **kwargs,
+    )

code_graph_builder/rag/config.py ADDED Viewed

@@ -0,0 +1,239 @@
+"""Configuration for RAG module.
+This module provides configuration classes for RAG components including
+Moonshot API settings, retrieval parameters, and output options.
+Examples:
+    >>> from code_graph_builder.rag.config import RAGConfig
+    >>> config = RAGConfig.from_env()
+    >>> print(config.moonshot.model)
+    kimi-k2.5
+"""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+@dataclass
+class MoonshotConfig:
+    """Configuration for Moonshot AI API (Kimi k2.5).
+    Args:
+        api_key: Moonshot API key (or from MOONSHOT_API_KEY env var)
+        model: Model name (default: kimi-k2.5)
+        base_url: API base URL
+        max_tokens: Maximum tokens for generation
+        temperature: Sampling temperature (0-2)
+        timeout: Request timeout in seconds
+    Examples:
+        >>> config = MoonshotConfig(api_key="sk-xxxxx")
+        >>> config = MoonshotConfig(
+        ...     api_key="sk-xxxxx",
+        ...     model="kimi-k2.5",
+        ...     temperature=0.7
+        ... )
+    """
+    api_key: str | None = None
+    model: str = "kimi-k2.5"
+    base_url: str = "https://api.moonshot.cn/v1"
+    max_tokens: int = 4096
+    temperature: float = 0.7
+    timeout: int = 120
+    def __post_init__(self):
+        """Load API key from environment if not provided."""
+        if self.api_key is None:
+            self.api_key = os.getenv("MOONSHOT_API_KEY")
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "api_key": self.api_key,
+            "model": self.model,
+            "base_url": self.base_url,
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "timeout": self.timeout,
+        }
+    def validate(self) -> None:
+        """Validate configuration.
+        Raises:
+            ValueError: If configuration is invalid
+        """
+        if not self.api_key:
+            raise ValueError(
+                "Moonshot API key is required. "
+                "Set MOONSHOT_API_KEY environment variable or pass api_key."
+            )
+        if not self.api_key.startswith("sk-"):
+            raise ValueError(
+                "Moonshot API key format is invalid. Expected to start with 'sk-'."
+            )
+        if self.temperature < 0 or self.temperature > 2:
+            raise ValueError("Temperature must be between 0 and 2.")
+@dataclass
+class RetrievalConfig:
+    """Configuration for code retrieval.
+    Args:
+        semantic_top_k: Number of semantic search results
+        graph_max_depth: Maximum depth for graph traversal
+        include_callers: Whether to include calling functions
+        include_callees: Whether to include called functions
+        include_related: Whether to include related nodes
+        max_context_tokens: Maximum tokens for context
+        code_chunk_size: Maximum size of code chunks
+    Examples:
+        >>> config = RetrievalConfig(semantic_top_k=10, include_callers=True)
+    """
+    semantic_top_k: int = 10
+    graph_max_depth: int = 2
+    include_callers: bool = True
+    include_callees: bool = True
+    include_related: bool = True
+    max_context_tokens: int = 8000
+    code_chunk_size: int = 2000
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "semantic_top_k": self.semantic_top_k,
+            "graph_max_depth": self.graph_max_depth,
+            "include_callers": self.include_callers,
+            "include_callees": self.include_callees,
+            "include_related": self.include_related,
+            "max_context_tokens": self.max_context_tokens,
+            "code_chunk_size": self.code_chunk_size,
+        }
+@dataclass
+class OutputConfig:
+    """Configuration for RAG output.
+    Args:
+        format: Output format (markdown, json)
+        include_source_links: Whether to include source code links
+        include_code_snippets: Whether to include code snippets
+        output_dir: Directory for output files
+    Examples:
+        >>> config = OutputConfig(format="markdown", include_source_links=True)
+    """
+    format: str = "markdown"
+    include_source_links: bool = True
+    include_code_snippets: bool = True
+    output_dir: str | Path = "./rag_output"
+    def __post_init__(self):
+        """Normalize output directory path."""
+        if isinstance(self.output_dir, str):
+            self.output_dir = Path(self.output_dir)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "format": self.format,
+            "include_source_links": self.include_source_links,
+            "include_code_snippets": self.include_code_snippets,
+            "output_dir": str(self.output_dir),
+        }
+@dataclass
+class RAGConfig:
+    """Main configuration for RAG module.
+    Combines all sub-configurations for Moonshot API, retrieval,
+    and output settings.
+    Args:
+        moonshot: Moonshot API configuration
+        retrieval: Retrieval configuration
+        output: Output configuration
+        verbose: Enable verbose logging
+    Examples:
+        >>> # From environment variables
+        >>> config = RAGConfig.from_env()
+        >>>
+        >>> # With explicit settings
+        >>> config = RAGConfig(
+        ...     moonshot=MoonshotConfig(api_key="sk-xxxxx"),
+        ...     retrieval=RetrievalConfig(semantic_top_k=15)
+        ... )
+    """
+    moonshot: MoonshotConfig = field(default_factory=MoonshotConfig)
+    retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
+    output: OutputConfig = field(default_factory=OutputConfig)
+    verbose: bool = False
+    @classmethod
+    def from_env(cls) -> RAGConfig:
+        """Create configuration from environment variables.
+        Environment variables:
+            MOONSHOT_API_KEY: Moonshot API key
+            MOONSHOT_MODEL: Model name (default: kimi-k2.5)
+            MOONSHOT_BASE_URL: API base URL
+            RAG_SEMANTIC_TOP_K: Number of semantic search results
+            RAG_OUTPUT_FORMAT: Output format
+            RAG_VERBOSE: Enable verbose logging
+        Returns:
+            RAGConfig instance
+        """
+        moonshot_config = MoonshotConfig(
+            api_key=os.getenv("MOONSHOT_API_KEY"),
+            model=os.getenv("MOONSHOT_MODEL", "kimi-k2.5"),
+            base_url=os.getenv("MOONSHOT_BASE_URL", "https://api.moonshot.cn/v1"),
+        )
+        retrieval_config = RetrievalConfig(
+            semantic_top_k=int(os.getenv("RAG_SEMANTIC_TOP_K", "10")),
+        )
+        output_config = OutputConfig(
+            format=os.getenv("RAG_OUTPUT_FORMAT", "markdown"),
+            output_dir=os.getenv("RAG_OUTPUT_DIR", "./rag_output"),
+        )
+        verbose = os.getenv("RAG_VERBOSE", "false").lower() == "true"
+        return cls(
+            moonshot=moonshot_config,
+            retrieval=retrieval_config,
+            output=output_config,
+            verbose=verbose,
+        )
+    def validate(self) -> None:
+        """Validate all configurations.
+        Raises:
+            ValueError: If any configuration is invalid
+        """
+        self.moonshot.validate()
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "moonshot": self.moonshot.to_dict(),
+            "retrieval": self.retrieval.to_dict(),
+            "output": self.output.to_dict(),
+            "verbose": self.verbose,
+        }

code_graph_builder/rag/cypher_generator.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Natural language to Cypher query translator.
+Uses an LLM backend to convert user questions into Cypher queries
+that can be executed against the code knowledge graph.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from loguru import logger
+if TYPE_CHECKING:
+    from .llm_backend import LLMBackend
+# System prompt describing the graph schema for Cypher generation.
+_SCHEMA_PROMPT = """\
+You are a Cypher query generator for a code knowledge graph stored in Kùzu.
+Node labels: Project, Package, Folder, File, Module, Class, Function, Method, \
+Interface, Enum, Type, Union, ExternalPackage.
+Common properties: qualified_name (PK), name, path, start_line, end_line, \
+docstring, return_type, signature, visibility, parameters (STRING[]), kind.
+Relationship types: CONTAINS_PACKAGE, CONTAINS_FOLDER, CONTAINS_FILE, \
+CONTAINS_MODULE, DEFINES, DEFINES_METHOD, IMPORTS, EXPORTS, EXPORTS_MODULE, \
+IMPLEMENTS_MODULE, INHERITS, IMPLEMENTS, OVERRIDES, CALLS, DEPENDS_ON_EXTERNAL.
+Rules:
+- Output ONLY a single Cypher query, nothing else.
+- Do NOT use OPTIONAL MATCH.
+- Always LIMIT results to at most 50 unless the user specifies otherwise.
+"""
+class CypherGenerator:
+    """Translates natural-language questions to Cypher queries using an LLM."""
+    def __init__(self, llm: LLMBackend) -> None:
+        self._llm = llm
+    def generate(self, question: str) -> str:
+        """Return a Cypher query string for *question*."""
+        if not self._llm.api_key:
+            raise RuntimeError(
+                "LLM backend has no API key configured. "
+                "Set MOONSHOT_API_KEY to enable query_code_graph."
+            )
+        messages = [
+            {"role": "system", "content": _SCHEMA_PROMPT},
+            {"role": "user", "content": question},
+        ]
+        raw = self._llm.chat(messages, temperature=0.0)
+        # Strip markdown code fences if present
+        query = raw.strip()
+        if query.startswith("```"):
+            lines = query.splitlines()
+            # Remove first and last fence lines
+            lines = [l for l in lines if not l.strip().startswith("```")]
+            query = "\n".join(lines).strip()
+        logger.debug(f"Generated Cypher: {query}")
+        return query