PyPI - code-graph-builder - Versions diffs - 0.2.0__py3-none-any.whl - Mend

code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

code_graph_builder/__init__.py +82 -0
code_graph_builder/builder.py +366 -0
code_graph_builder/cgb_cli.py +32 -0
code_graph_builder/cli.py +564 -0
code_graph_builder/commands_cli.py +1288 -0
code_graph_builder/config.py +340 -0
code_graph_builder/constants.py +708 -0
code_graph_builder/embeddings/__init__.py +40 -0
code_graph_builder/embeddings/qwen3_embedder.py +573 -0
code_graph_builder/embeddings/vector_store.py +584 -0
code_graph_builder/examples/__init__.py +0 -0
code_graph_builder/examples/example_configuration.py +276 -0
code_graph_builder/examples/example_kuzu_usage.py +109 -0
code_graph_builder/examples/example_semantic_search_full.py +347 -0
code_graph_builder/examples/generate_wiki.py +915 -0
code_graph_builder/examples/graph_export_example.py +100 -0
code_graph_builder/examples/rag_example.py +206 -0
code_graph_builder/examples/test_cli_demo.py +129 -0
code_graph_builder/examples/test_embedding_api.py +153 -0
code_graph_builder/examples/test_kuzu_local.py +190 -0
code_graph_builder/examples/test_rag_redis.py +390 -0
code_graph_builder/graph_updater.py +605 -0
code_graph_builder/guidance/__init__.py +1 -0
code_graph_builder/guidance/agent.py +123 -0
code_graph_builder/guidance/prompts.py +74 -0
code_graph_builder/guidance/toolset.py +264 -0
code_graph_builder/language_spec.py +536 -0
code_graph_builder/mcp/__init__.py +21 -0
code_graph_builder/mcp/api_doc_generator.py +764 -0
code_graph_builder/mcp/file_editor.py +207 -0
code_graph_builder/mcp/pipeline.py +777 -0
code_graph_builder/mcp/server.py +161 -0
code_graph_builder/mcp/tools.py +1800 -0
code_graph_builder/models.py +115 -0
code_graph_builder/parser_loader.py +344 -0
code_graph_builder/parsers/__init__.py +7 -0
code_graph_builder/parsers/call_processor.py +306 -0
code_graph_builder/parsers/call_resolver.py +139 -0
code_graph_builder/parsers/definition_processor.py +796 -0
code_graph_builder/parsers/factory.py +119 -0
code_graph_builder/parsers/import_processor.py +293 -0
code_graph_builder/parsers/structure_processor.py +145 -0
code_graph_builder/parsers/type_inference.py +143 -0
code_graph_builder/parsers/utils.py +134 -0
code_graph_builder/rag/__init__.py +68 -0
code_graph_builder/rag/camel_agent.py +429 -0
code_graph_builder/rag/client.py +298 -0
code_graph_builder/rag/config.py +239 -0
code_graph_builder/rag/cypher_generator.py +67 -0
code_graph_builder/rag/llm_backend.py +210 -0
code_graph_builder/rag/markdown_generator.py +352 -0
code_graph_builder/rag/prompt_templates.py +440 -0
code_graph_builder/rag/rag_engine.py +640 -0
code_graph_builder/rag/review_report.md +172 -0
code_graph_builder/rag/tests/__init__.py +3 -0
code_graph_builder/rag/tests/test_camel_agent.py +313 -0
code_graph_builder/rag/tests/test_client.py +221 -0
code_graph_builder/rag/tests/test_config.py +177 -0
code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
code_graph_builder/services/__init__.py +39 -0
code_graph_builder/services/graph_service.py +465 -0
code_graph_builder/services/kuzu_service.py +665 -0
code_graph_builder/services/memory_service.py +171 -0
code_graph_builder/settings.py +75 -0
code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
code_graph_builder/tests/__init__.py +1 -0
code_graph_builder/tests/run_acceptance_check.py +378 -0
code_graph_builder/tests/test_api_find.py +231 -0
code_graph_builder/tests/test_api_find_integration.py +226 -0
code_graph_builder/tests/test_basic.py +78 -0
code_graph_builder/tests/test_c_api_extraction.py +388 -0
code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
code_graph_builder/tests/test_embedder.py +411 -0
code_graph_builder/tests/test_integration_semantic.py +434 -0
code_graph_builder/tests/test_mcp_protocol.py +298 -0
code_graph_builder/tests/test_mcp_user_flow.py +190 -0
code_graph_builder/tests/test_rag.py +404 -0
code_graph_builder/tests/test_settings.py +135 -0
code_graph_builder/tests/test_step1_graph_build.py +264 -0
code_graph_builder/tests/test_step2_api_docs.py +323 -0
code_graph_builder/tests/test_step3_embedding.py +278 -0
code_graph_builder/tests/test_vector_store.py +552 -0
code_graph_builder/tools/__init__.py +40 -0
code_graph_builder/tools/graph_query.py +495 -0
code_graph_builder/tools/semantic_search.py +387 -0
code_graph_builder/types.py +333 -0
code_graph_builder/utils/__init__.py +0 -0
code_graph_builder/utils/path_utils.py +30 -0
code_graph_builder-0.2.0.dist-info/METADATA +321 -0
code_graph_builder-0.2.0.dist-info/RECORD +93 -0
code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0

code_graph_builder/parsers/utils.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""Code Graph Builder - Parser Utilities."""
+from __future__ import annotations
+from collections.abc import Callable
+from functools import lru_cache
+from typing import TYPE_CHECKING, NamedTuple
+from loguru import logger
+from tree_sitter import Node, Query, QueryCursor
+from .. import constants as cs
+from ..types import ASTNode, LanguageQueries, NodeType, PropertyDict, SimpleNameLookup
+if TYPE_CHECKING:
+    from ..language_spec import LanguageSpec
+    from ..services import IngestorProtocol
+    from ..types import FunctionRegistryTrieProtocol
+class FunctionCapturesResult(NamedTuple):
+    """Result of capturing functions from AST."""
+    lang_config: LanguageSpec
+    captures: dict[str, list[ASTNode]]
+def get_function_captures(
+    root_node: ASTNode,
+    language: cs.SupportedLanguage,
+    queries: dict[cs.SupportedLanguage, LanguageQueries],
+) -> FunctionCapturesResult | None:
+    """Get function captures from AST using Tree-sitter query."""
+    lang_queries = queries[language]
+    lang_config = lang_queries[cs.QUERY_CONFIG]
+    if not (query := lang_queries[cs.QUERY_FUNCTIONS]):
+        return None
+    cursor = QueryCursor(query)
+    captures = cursor.captures(root_node)
+    return FunctionCapturesResult(lang_config, captures)
+@lru_cache(maxsize=10000)
+def _cached_decode_bytes(text_bytes: bytes) -> str:
+    """Cached byte decoding for performance."""
+    return text_bytes.decode(cs.ENCODING_UTF8)
+def safe_decode_text(node: ASTNode | None) -> str | None:
+    """Safely decode node text to string."""
+    if node is None or (text_bytes := node.text) is None:
+        return None
+    if isinstance(text_bytes, bytes):
+        return _cached_decode_bytes(text_bytes)
+    return str(text_bytes)
+def safe_decode_with_fallback(node: ASTNode | None, fallback: str = "") -> str:
+    """Safely decode node text with fallback."""
+    return result if (result := safe_decode_text(node)) is not None else fallback
+def contains_node(parent: ASTNode, target: ASTNode) -> bool:
+    """Check if parent contains target node."""
+    return parent == target or any(
+        contains_node(child, target) for child in parent.children
+    )
+def ingest_method(
+    method_node: ASTNode,
+    container_qn: str,
+    container_type: cs.NodeLabel,
+    ingestor: IngestorProtocol,
+    function_registry: FunctionRegistryTrieProtocol,
+    simple_name_lookup: SimpleNameLookup,
+    get_docstring_func: Callable[[ASTNode], str | None],
+    language: cs.SupportedLanguage | None = None,
+    extract_decorators_func: Callable[[ASTNode], list[str]] | None = None,
+    method_qualified_name: str | None = None,
+) -> None:
+    """Ingest a method node into the graph."""
+    # Extract method name
+    if language == cs.SupportedLanguage.CPP:
+        from .cpp import utils as cpp_utils
+        method_name = cpp_utils.extract_function_name(method_node)
+        if not method_name:
+            return
+    elif not (method_name_node := method_node.child_by_field_name(cs.FIELD_NAME)):
+        return
+    elif (text := method_name_node.text) is None:
+        return
+    else:
+        method_name = text.decode(cs.ENCODING_UTF8)
+    method_qn = method_qualified_name or f"{container_qn}.{method_name}"
+    decorators = extract_decorators_func(method_node) if extract_decorators_func else []
+    method_props: PropertyDict = {
+        cs.KEY_QUALIFIED_NAME: method_qn,
+        cs.KEY_NAME: method_name,
+        cs.KEY_DECORATORS: decorators,
+        cs.KEY_START_LINE: method_node.start_point[0] + 1,
+        cs.KEY_END_LINE: method_node.end_point[0] + 1,
+        cs.KEY_DOCSTRING: get_docstring_func(method_node),
+    }
+    logger.info(f"    Found Method: {method_name} (qn: {method_qn})")
+    ingestor.ensure_node_batch(cs.NodeLabel.METHOD, method_props)
+    function_registry[method_qn] = NodeType.METHOD
+    simple_name_lookup[method_name].add(method_qn)
+    ingestor.ensure_relationship_batch(
+        (container_type, cs.KEY_QUALIFIED_NAME, container_qn),
+        cs.RelationshipType.DEFINES_METHOD,
+        (cs.NodeLabel.METHOD, cs.KEY_QUALIFIED_NAME, method_qn),
+    )
+def is_method_node(func_node: ASTNode, lang_config: LanguageSpec) -> bool:
+    """Check if a function node is actually a method."""
+    current = func_node.parent
+    if not isinstance(current, Node):
+        return False
+    while current and current.type not in lang_config.module_node_types:
+        if current.type in lang_config.class_node_types:
+            return True
+        current = current.parent
+    return False

code_graph_builder/rag/__init__.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""RAG module for code graph-based retrieval and generation.
+This module provides RAG (Retrieval-Augmented Generation) capabilities
+for code analysis using CAMEL framework and OpenAI-compatible LLM APIs.
+Example:
+    >>> from code_graph_builder.rag import RAGConfig, create_rag_engine
+    >>> from code_graph_builder.rag.camel_agent import CamelAgent
+    >>>
+    >>> config = RAGConfig.from_env()
+    >>> engine = create_rag_engine(config)
+    >>> result = engine.query("Explain the authentication flow")
+"""
+from __future__ import annotations
+from .config import (
+    MoonshotConfig,
+    OutputConfig,
+    RAGConfig,
+    RetrievalConfig,
+)
+from .client import (
+    ChatResponse,
+    LLMClient,
+    create_llm_client,
+)
+from .markdown_generator import (
+    AnalysisResult,
+    MarkdownGenerator,
+    SourceReference,
+)
+from .prompt_templates import (
+    CodeAnalysisPrompts,
+    CodeContext,
+    RAGPrompts,
+    create_code_context,
+)
+from .rag_engine import (
+    RAGEngine,
+    RAGResult,
+    create_rag_engine,
+)
+__all__ = [
+    # Config
+    "RAGConfig",
+    "MoonshotConfig",
+    "RetrievalConfig",
+    "OutputConfig",
+    # Engine
+    "RAGEngine",
+    "RAGResult",
+    "create_rag_engine",
+    # LLM Client
+    "LLMClient",
+    "ChatResponse",
+    "create_llm_client",
+    # Prompts
+    "CodeAnalysisPrompts",
+    "RAGPrompts",
+    "CodeContext",
+    "create_code_context",
+    # Markdown
+    "MarkdownGenerator",
+    "AnalysisResult",
+    "SourceReference",
+]

code_graph_builder/rag/camel_agent.py ADDED Viewed

@@ -0,0 +1,429 @@
+"""CAMEL Agent wrapper for RAG integration.
+This module provides integration with the CAMEL framework for multi-agent
+code analysis workflows.
+Note: This is a simplified implementation that provides CAMEL-like interfaces
+without requiring the full CAMEL framework dependency. It can be extended
+to use the actual CAMEL library if needed.
+Examples:
+    >>> from code_graph_builder.rag.camel_agent import CamelAgent
+    >>> agent = CamelAgent(
+    ...     role="Code Analyst",
+    ...     goal="Analyze code and provide insights",
+    ...     backstory="Expert in software architecture"
+    ... )
+    >>> result = agent.analyze("Explain this function", context="def foo(): pass")
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Protocol
+from loguru import logger
+from .client import LLMClient, create_llm_client
+from .prompt_templates import CodeAnalysisPrompts, CodeContext
+if TYPE_CHECKING:
+    from .rag_engine import RAGEngine, RAGResult
+class AgentResponse(Protocol):
+    """Protocol for agent responses."""
+    content: str
+    metadata: dict[str, Any]
+@dataclass
+class CamelAgentResponse:
+    """Response from CAMEL agent.
+    Attributes:
+        content: Generated response content
+        metadata: Additional metadata
+        role: Agent role that generated the response
+    """
+    content: str
+    metadata: dict[str, Any] = field(default_factory=dict)
+    role: str = "agent"
+class CamelAgent:
+    """CAMEL-style agent for code analysis.
+    Provides a CAMEL-like interface for single-agent code analysis tasks.
+    This implementation uses an OpenAI-compatible LLM as the underlying model.
+    Args:
+        role: Agent's role (e.g., "Code Analyst")
+        goal: Agent's goal/objective
+        backstory: Agent's background/context
+        llm_client: LLM API client
+        verbose: Enable verbose logging
+    Examples:
+        >>> agent = CamelAgent(
+        ...     role="Senior Python Developer",
+        ...     goal="Review code for best practices",
+        ...     backstory="10+ years of Python experience"
+        ... )
+        >>> response = agent.analyze("Review this function", code="def foo(): pass")
+        >>> print(response.content)
+    """
+    def __init__(
+        self,
+        role: str,
+        goal: str,
+        backstory: str,
+        llm_client: LLMClient | None = None,
+        verbose: bool = False,
+    ):
+        self.role = role
+        self.goal = goal
+        self.backstory = backstory
+        self.llm_client = llm_client or create_llm_client()
+        self.verbose = verbose
+        self.prompts = CodeAnalysisPrompts()
+        # Build system prompt from role definition
+        self.system_prompt = self._build_system_prompt()
+        logger.info(f"Initialized CamelAgent: {role}")
+    def _build_system_prompt(self) -> str:
+        """Build system prompt from agent definition."""
+        return f"""You are a {self.role}.
+Your Goal: {self.goal}
+Your Backstory: {self.backstory}
+Guidelines:
+1. Always stay in character as a {self.role}
+2. Focus on achieving your stated goal
+3. Use your expertise and background to provide insightful analysis
+4. Be thorough but concise in your responses
+5. When analyzing code, consider best practices, patterns, and potential issues
+Respond in a professional, helpful manner."""
+    def analyze(
+        self,
+        task: str,
+        code: str | None = None,
+        context: str | None = None,
+    ) -> CamelAgentResponse:
+        """Analyze code or answer a question.
+        Args:
+            task: Task description or question
+            code: Code to analyze (optional)
+            context: Additional context (optional)
+        Returns:
+            CamelAgentResponse with analysis
+        """
+        # Build user message
+        user_content = task
+        if code:
+            user_content += f"\n\n```python\n{code}\n```"
+        if context:
+            user_content += f"\n\nContext: {context}"
+        messages = [
+            {"role": "system", "content": self.system_prompt},
+            {"role": "user", "content": user_content},
+        ]
+        try:
+            response = self.llm_client.chat_with_messages(messages)
+            return CamelAgentResponse(
+                content=response.content,
+                metadata={
+                    "usage": response.usage,
+                    "model": response.model,
+                },
+                role=self.role,
+            )
+        except Exception as e:
+            logger.error(f"Agent analysis failed: {e}")
+            return CamelAgentResponse(
+                content=f"Error during analysis: {e}",
+                metadata={"error": str(e)},
+                role=self.role,
+            )
+    def review_code(
+        self,
+        code: str,
+        review_type: str = "general",
+    ) -> CamelAgentResponse:
+        """Review code for specific aspects.
+        Args:
+            code: Code to review
+            review_type: Type of review (general, security, performance, style)
+        Returns:
+            Code review response
+        """
+        review_prompts = {
+            "general": "Please review this code for general quality, correctness, and best practices.",
+            "security": "Please review this code for security vulnerabilities and best practices.",
+            "performance": "Please review this code for performance issues and optimization opportunities.",
+            "style": "Please review this code for code style, readability, and maintainability.",
+        }
+        prompt = review_prompts.get(review_type, review_prompts["general"])
+        return self.analyze(
+            task=f"{prompt}\n\nProvide specific recommendations with examples.",
+            code=code,
+        )
+    def explain_code(
+        self,
+        code: str,
+        detail_level: str = "medium",
+    ) -> CamelAgentResponse:
+        """Explain code in detail.
+        Args:
+            code: Code to explain
+            detail_level: Level of detail (brief, medium, detailed)
+        Returns:
+            Code explanation
+        """
+        detail_instructions = {
+            "brief": "Provide a brief, high-level summary of what this code does.",
+            "medium": "Explain this code with a balance of high-level overview and key details.",
+            "detailed": "Provide a detailed explanation covering all logic, edge cases, and design decisions.",
+        }
+        instruction = detail_instructions.get(detail_level, detail_instructions["medium"])
+        return self.analyze(
+            task=f"{instruction}\n\nFormat your response in markdown.",
+            code=code,
+        )
+    def suggest_improvements(
+        self,
+        code: str,
+        focus_areas: list[str] | None = None,
+    ) -> CamelAgentResponse:
+        """Suggest improvements for code.
+        Args:
+            code: Code to improve
+            focus_areas: Specific areas to focus on (e.g., ["readability", "performance"])
+        Returns:
+            Improvement suggestions
+        """
+        task = "Suggest improvements for this code."
+        if focus_areas:
+            task += f"\n\nFocus on: {', '.join(focus_areas)}"
+        task += "\n\nFor each suggestion, provide:\n1. The issue\n2. Why it matters\n3. A concrete improved example"
+        return self.analyze(task=task, code=code)
+    def answer_question(
+        self,
+        question: str,
+        code_context: str | None = None,
+    ) -> CamelAgentResponse:
+        """Answer a question about code.
+        Args:
+            question: The question to answer
+            code_context: Relevant code context
+        Returns:
+            Answer response
+        """
+        return self.analyze(
+            task=question,
+            code=code_context,
+        )
+class MultiAgentRAG:
+    """Multi-agent RAG system using CAMEL-style agents.
+    Coordinates multiple specialized agents for comprehensive code analysis.
+    Args:
+        rag_engine: RAG engine for retrieval
+        verbose: Enable verbose logging
+    Example:
+        >>> multi_agent = MultiAgentRAG(rag_engine)
+        >>> result = multi_agent.analyze(
+        ...     query="Explain the authentication system",
+        ...     analysis_types=["architecture", "security"]
+        ... )
+    """
+    def __init__(
+        self,
+        rag_engine: RAGEngine,
+        verbose: bool = False,
+    ):
+        self.rag_engine = rag_engine
+        self.verbose = verbose
+        # Initialize specialized agents
+        self._init_agents()
+    def _init_agents(self) -> None:
+        """Initialize specialized agents."""
+        self.architect = CamelAgent(
+            role="Software Architect",
+            goal="Analyze code architecture and design patterns",
+            backstory="Senior architect with 15+ years of experience in system design",
+            llm_client=self.rag_engine.llm_client,
+            verbose=self.verbose,
+        )
+        self.security_expert = CamelAgent(
+            role="Security Engineer",
+            goal="Identify security vulnerabilities and best practices",
+            backstory="Security specialist with expertise in secure coding practices",
+            llm_client=self.rag_engine.llm_client,
+            verbose=self.verbose,
+        )
+        self.performance_expert = CamelAgent(
+            role="Performance Engineer",
+            goal="Optimize code performance and resource usage",
+            backstory="Performance optimization specialist with deep knowledge of algorithms",
+            llm_client=self.rag_engine.llm_client,
+            verbose=self.verbose,
+        )
+        self.documentation_writer = CamelAgent(
+            role="Technical Writer",
+            goal="Create clear, comprehensive documentation",
+            backstory="Technical writer specializing in developer documentation",
+            llm_client=self.rag_engine.llm_client,
+            verbose=self.verbose,
+        )
+    def analyze(
+        self,
+        query: str,
+        analysis_types: list[str] | None = None,
+    ) -> dict[str, CamelAgentResponse]:
+        """Run multi-agent analysis on a query.
+        Args:
+            query: User query
+            analysis_types: Types of analysis to run (architecture, security, performance, docs)
+        Returns:
+            Dictionary of agent responses
+        """
+        if analysis_types is None:
+            analysis_types = ["architecture", "docs"]
+        # First, retrieve relevant code
+        rag_result = self.rag_engine.query(query)
+        # Build context from retrieved code
+        context_parts = []
+        for ctx in rag_result.contexts[:3]:  # Limit to top 3 contexts
+            context_parts.append(ctx.format_context())
+        code_context = "\n\n---\n\n".join(context_parts)
+        # Run agent analyses
+        results: dict[str, CamelAgentResponse] = {}
+        if "architecture" in analysis_types:
+            results["architecture"] = self.architect.analyze(
+                task=f"Analyze the architecture and design patterns for: {query}",
+                context=code_context,
+            )
+        if "security" in analysis_types:
+            results["security"] = self.security_expert.analyze(
+                task=f"Review security aspects of: {query}",
+                context=code_context,
+            )
+        if "performance" in analysis_types:
+            results["performance"] = self.performance_expert.analyze(
+                task=f"Analyze performance characteristics of: {query}",
+                context=code_context,
+            )
+        if "docs" in analysis_types:
+            results["documentation"] = self.documentation_writer.analyze(
+                task=f"Create documentation for: {query}",
+                context=code_context,
+            )
+        return results
+    def comprehensive_review(
+        self,
+        qualified_name: str,
+    ) -> dict[str, CamelAgentResponse]:
+        """Run comprehensive review of a code entity.
+        Args:
+            qualified_name: Fully qualified name of the entity
+        Returns:
+            Dictionary of agent reviews
+        """
+        # Get code explanation first
+        rag_result = self.rag_engine.explain_code(qualified_name)
+        # Get source code
+        code = ""
+        if rag_result.contexts:
+            code = rag_result.contexts[0].source_code
+        # Run all agents
+        results: dict[str, CamelAgentResponse] = {
+            "explanation": rag_result,
+            "architecture": self.architect.analyze(
+                task="Analyze the architecture and design patterns in this code",
+                code=code,
+            ),
+            "security": self.security_expert.review_code(code, review_type="security"),
+            "performance": self.performance_expert.review_code(code, review_type="performance"),
+            "documentation": self.documentation_writer.explain_code(code, detail_level="detailed"),
+        }
+        return results
+def create_camel_agent(
+    role: str,
+    goal: str,
+    backstory: str,
+    **kwargs: Any,
+) -> CamelAgent:
+    """Factory function to create a CAMEL agent.
+    Args:
+        role: Agent role
+        goal: Agent goal
+        backstory: Agent backstory
+        **kwargs: Additional arguments for CamelAgent
+    Returns:
+        Configured CamelAgent
+    """
+    return CamelAgent(role=role, goal=goal, backstory=backstory, **kwargs)