PyPI - code-graph-builder - Versions diffs - 0.2.0__py3-none-any.whl - Mend

code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

code_graph_builder/__init__.py +82 -0
code_graph_builder/builder.py +366 -0
code_graph_builder/cgb_cli.py +32 -0
code_graph_builder/cli.py +564 -0
code_graph_builder/commands_cli.py +1288 -0
code_graph_builder/config.py +340 -0
code_graph_builder/constants.py +708 -0
code_graph_builder/embeddings/__init__.py +40 -0
code_graph_builder/embeddings/qwen3_embedder.py +573 -0
code_graph_builder/embeddings/vector_store.py +584 -0
code_graph_builder/examples/__init__.py +0 -0
code_graph_builder/examples/example_configuration.py +276 -0
code_graph_builder/examples/example_kuzu_usage.py +109 -0
code_graph_builder/examples/example_semantic_search_full.py +347 -0
code_graph_builder/examples/generate_wiki.py +915 -0
code_graph_builder/examples/graph_export_example.py +100 -0
code_graph_builder/examples/rag_example.py +206 -0
code_graph_builder/examples/test_cli_demo.py +129 -0
code_graph_builder/examples/test_embedding_api.py +153 -0
code_graph_builder/examples/test_kuzu_local.py +190 -0
code_graph_builder/examples/test_rag_redis.py +390 -0
code_graph_builder/graph_updater.py +605 -0
code_graph_builder/guidance/__init__.py +1 -0
code_graph_builder/guidance/agent.py +123 -0
code_graph_builder/guidance/prompts.py +74 -0
code_graph_builder/guidance/toolset.py +264 -0
code_graph_builder/language_spec.py +536 -0
code_graph_builder/mcp/__init__.py +21 -0
code_graph_builder/mcp/api_doc_generator.py +764 -0
code_graph_builder/mcp/file_editor.py +207 -0
code_graph_builder/mcp/pipeline.py +777 -0
code_graph_builder/mcp/server.py +161 -0
code_graph_builder/mcp/tools.py +1800 -0
code_graph_builder/models.py +115 -0
code_graph_builder/parser_loader.py +344 -0
code_graph_builder/parsers/__init__.py +7 -0
code_graph_builder/parsers/call_processor.py +306 -0
code_graph_builder/parsers/call_resolver.py +139 -0
code_graph_builder/parsers/definition_processor.py +796 -0
code_graph_builder/parsers/factory.py +119 -0
code_graph_builder/parsers/import_processor.py +293 -0
code_graph_builder/parsers/structure_processor.py +145 -0
code_graph_builder/parsers/type_inference.py +143 -0
code_graph_builder/parsers/utils.py +134 -0
code_graph_builder/rag/__init__.py +68 -0
code_graph_builder/rag/camel_agent.py +429 -0
code_graph_builder/rag/client.py +298 -0
code_graph_builder/rag/config.py +239 -0
code_graph_builder/rag/cypher_generator.py +67 -0
code_graph_builder/rag/llm_backend.py +210 -0
code_graph_builder/rag/markdown_generator.py +352 -0
code_graph_builder/rag/prompt_templates.py +440 -0
code_graph_builder/rag/rag_engine.py +640 -0
code_graph_builder/rag/review_report.md +172 -0
code_graph_builder/rag/tests/__init__.py +3 -0
code_graph_builder/rag/tests/test_camel_agent.py +313 -0
code_graph_builder/rag/tests/test_client.py +221 -0
code_graph_builder/rag/tests/test_config.py +177 -0
code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
code_graph_builder/services/__init__.py +39 -0
code_graph_builder/services/graph_service.py +465 -0
code_graph_builder/services/kuzu_service.py +665 -0
code_graph_builder/services/memory_service.py +171 -0
code_graph_builder/settings.py +75 -0
code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
code_graph_builder/tests/__init__.py +1 -0
code_graph_builder/tests/run_acceptance_check.py +378 -0
code_graph_builder/tests/test_api_find.py +231 -0
code_graph_builder/tests/test_api_find_integration.py +226 -0
code_graph_builder/tests/test_basic.py +78 -0
code_graph_builder/tests/test_c_api_extraction.py +388 -0
code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
code_graph_builder/tests/test_embedder.py +411 -0
code_graph_builder/tests/test_integration_semantic.py +434 -0
code_graph_builder/tests/test_mcp_protocol.py +298 -0
code_graph_builder/tests/test_mcp_user_flow.py +190 -0
code_graph_builder/tests/test_rag.py +404 -0
code_graph_builder/tests/test_settings.py +135 -0
code_graph_builder/tests/test_step1_graph_build.py +264 -0
code_graph_builder/tests/test_step2_api_docs.py +323 -0
code_graph_builder/tests/test_step3_embedding.py +278 -0
code_graph_builder/tests/test_vector_store.py +552 -0
code_graph_builder/tools/__init__.py +40 -0
code_graph_builder/tools/graph_query.py +495 -0
code_graph_builder/tools/semantic_search.py +387 -0
code_graph_builder/types.py +333 -0
code_graph_builder/utils/__init__.py +0 -0
code_graph_builder/utils/path_utils.py +30 -0
code_graph_builder-0.2.0.dist-info/METADATA +321 -0
code_graph_builder-0.2.0.dist-info/RECORD +93 -0
code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0

code_graph_builder/guidance/agent.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""GuidanceAgent — ReAct-loop LLM agent that produces code generation guidance.
+The agent receives a design document, uses tools to research the target
+codebase, then synthesises a structured guidance Markdown file.
+"""
+from __future__ import annotations
+import asyncio
+import json
+from typing import Any
+from loguru import logger
+from ..rag.llm_backend import ChatMessage, LLMBackend, ToolCall
+from .prompts import SYSTEM_PROMPT
+from .toolset import ToolSet
+class GuidanceAgent:
+    """LLM agent with a tool-calling loop.
+    Depends only on :class:`ToolSet` (abstract) and :class:`LLMBackend` —
+    has no knowledge of concrete MCP services.
+    """
+    def __init__(
+        self,
+        toolset: ToolSet,
+        llm: LLMBackend,
+        max_iterations: int = 8,
+        max_tokens: int = 8192,
+    ) -> None:
+        self._toolset = toolset
+        self._llm = llm
+        self._max_iterations = max_iterations
+        self._max_tokens = max_tokens
+    async def run(self, design_doc: str) -> str:
+        """Execute the ReAct loop and return the guidance Markdown."""
+        messages: list[dict[str, Any]] = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": design_doc},
+        ]
+        tool_specs = self._toolset.tool_specs()
+        for iteration in range(self._max_iterations):
+            logger.debug(f"GuidanceAgent iteration {iteration + 1}/{self._max_iterations}")
+            response = await asyncio.to_thread(
+                self._llm.chat_with_tools,
+                messages,
+                tools=tool_specs or None,
+                max_tokens=self._max_tokens,
+            )
+            if response.tool_calls:
+                messages.append(self._assistant_msg(response))
+                for tc in response.tool_calls:
+                    result = await self._safe_call(tc)
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": result,
+                    })
+            else:
+                # No tool calls — final output
+                return response.content or ""
+        # Hit max iterations — force a final output without tools
+        logger.warning(
+            f"GuidanceAgent reached max iterations ({self._max_iterations}), "
+            "forcing final output."
+        )
+        messages.append({
+            "role": "user",
+            "content": (
+                "You have reached the maximum number of tool calls. "
+                "Please produce the final guidance document now based on "
+                "the information you have already gathered."
+            ),
+        })
+        final = await asyncio.to_thread(
+            self._llm.chat_with_tools,
+            messages,
+            tools=None,
+            max_tokens=self._max_tokens,
+        )
+        return final.content or ""
+    # -- Helpers -------------------------------------------------------------
+    async def _safe_call(self, tc: ToolCall) -> str:
+        """Execute a tool call, catching exceptions and returning them as text."""
+        try:
+            args = json.loads(tc.arguments)
+        except json.JSONDecodeError:
+            return json.dumps({"error": f"Invalid JSON arguments: {tc.arguments}"})
+        logger.debug(f"Tool call: {tc.function_name}({args})")
+        return await self._toolset.call(tc.function_name, args)
+    @staticmethod
+    def _assistant_msg(response: ChatMessage) -> dict[str, Any]:
+        """Build the assistant message dict including tool_calls for the
+        conversation history."""
+        msg: dict[str, Any] = {"role": "assistant"}
+        if response.content:
+            msg["content"] = response.content
+        if response.tool_calls:
+            msg["tool_calls"] = [
+                {
+                    "id": tc.id,
+                    "type": "function",
+                    "function": {
+                        "name": tc.function_name,
+                        "arguments": tc.arguments,
+                    },
+                }
+                for tc in response.tool_calls
+            ]
+        return msg

code_graph_builder/guidance/prompts.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""System prompt templates for the GuidanceAgent."""
+from __future__ import annotations
+SYSTEM_PROMPT = """\
+You are a code architecture expert.  Your task is to convert a **design \
+document** into a **code generation guidance file** by researching the target \
+codebase.
+## Workflow
+1. **Read** the design document carefully.  Identify the modules, functions, \
+data types, and interfaces that will be created or modified.
+2. **Search** the codebase using the tools available to you:
+   - Use `find_api` to locate existing APIs that the new code must integrate \
+with.  This is the most important step — the generated code must call real \
+interfaces with correct signatures.
+   - Use `semantic_search` to find similar implementations that can serve as \
+reference patterns (code style, error handling, naming conventions).
+   - Use `query_code_graph` to understand call relationships and dependency \
+chains — who calls what, which modules depend on which.
+3. **Synthesise** everything into a single Markdown guidance document \
+(described below).
+## Guidelines
+- Be efficient: use the minimum number of tool calls needed.  Do not repeat \
+searches with near-identical queries.
+- When a tool returns no useful results, move on rather than retrying with \
+trivial variations.
+- Focus on information that a code-generation agent **cannot infer** from the \
+design document alone: real function signatures, existing patterns, actual \
+file paths.
+## Output Format
+Produce a single Markdown document with the following sections.  Omit a \
+section if you found no relevant information for it.
+```
+# Code Generation Guidance
+## Implementation Goal
+[One-paragraph summary of what needs to be built, derived from the design \
+document.]
+## Existing APIs to Use
+[For each API the new code must call, list:]
+- Fully qualified name
+- Signature (parameters + return type)
+- File path and line number
+- Brief usage note
+## Reference Implementations
+[2-3 most relevant existing functions that demonstrate the coding patterns \
+to follow.  Include file path and key code snippets.]
+## Dependency & Call Relationships
+[Upstream: who will call the new code.  Downstream: what the new code needs \
+to call.  Module-level dependency notes.]
+## Type Definitions
+[Structs, enums, interfaces, or classes that the new code will consume or \
+produce.]
+## Code Conventions
+[Naming style, error handling pattern, comment format, return conventions — \
+derived from the reference implementations above.]
+## Implementation Constraints
+[Constraints from the design document + any architectural constraints \
+discovered during research.]
+```
+"""

code_graph_builder/guidance/toolset.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""ToolSet abstraction and MCPToolSet adapter for GuidanceAgent.
+The ``ToolSet`` protocol defines the contract between the agent and its tools.
+``MCPToolSet`` implements this contract by wrapping the existing MCP services
+(semantic search, Cypher generation, API doc lookup) without going through
+the MCP protocol layer.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Protocol
+from loguru import logger
+class ToolSet(Protocol):
+    """Abstract tool interface that GuidanceAgent depends on."""
+    def tool_specs(self) -> list[dict[str, Any]]:
+        """Return OpenAI function-calling format tool definitions."""
+        ...
+    async def call(self, name: str, arguments: dict[str, Any]) -> str:
+        """Execute a tool by name with the given arguments.
+        Returns a JSON-encoded string suitable for inclusion in the
+        LLM conversation as a tool result message.
+        """
+        ...
+# ---------------------------------------------------------------------------
+# Tool schema definitions (OpenAI function-calling format)
+# ---------------------------------------------------------------------------
+_SEMANTIC_SEARCH_SPEC: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": "semantic_search",
+        "description": (
+            "Search the codebase for functions, classes, or methods that are "
+            "semantically similar to the query. Returns source code snippets "
+            "with similarity scores."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Natural language description of what to search for",
+                },
+                "top_k": {
+                    "type": "integer",
+                    "description": "Number of results to return (default: 5)",
+                },
+            },
+            "required": ["query"],
+        },
+    },
+}
+_FIND_API_SPEC: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": "find_api",
+        "description": (
+            "Find existing API interfaces by semantic search and return their "
+            "detailed documentation including function signatures, parameters, "
+            "call trees, and source code."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Natural language description of the API to find",
+                },
+                "top_k": {
+                    "type": "integer",
+                    "description": "Number of results to return (default: 5)",
+                },
+            },
+            "required": ["query"],
+        },
+    },
+}
+_QUERY_CODE_GRAPH_SPEC: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": "query_code_graph",
+        "description": (
+            "Query the code knowledge graph using natural language. "
+            "Useful for finding call relationships, module dependencies, "
+            "class hierarchies, and structural patterns in the codebase."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "question": {
+                    "type": "string",
+                    "description": "Natural language question about code structure",
+                },
+            },
+            "required": ["question"],
+        },
+    },
+}
+_ALL_SPECS = [_SEMANTIC_SEARCH_SPEC, _FIND_API_SPEC, _QUERY_CODE_GRAPH_SPEC]
+# Maximum characters per tool result to avoid blowing up the context window.
+_DEFAULT_MAX_RESULT_CHARS = 4000
+# ---------------------------------------------------------------------------
+# MCPToolSet — adapter that wraps existing Python services
+# ---------------------------------------------------------------------------
+class MCPToolSet:
+    """Adapter that exposes existing MCP services as a :class:`ToolSet`.
+    This calls the underlying Python service objects directly — it does NOT
+    go through the MCP protocol.
+    """
+    def __init__(
+        self,
+        semantic_service: Any | None,
+        cypher_gen: Any | None,
+        ingestor: Any | None,
+        artifact_dir: Path | None,
+        max_result_chars: int = _DEFAULT_MAX_RESULT_CHARS,
+    ) -> None:
+        self._semantic_service = semantic_service
+        self._cypher_gen = cypher_gen
+        self._ingestor = ingestor
+        self._artifact_dir = artifact_dir
+        self._max_chars = max_result_chars
+        self._dispatch = {
+            "semantic_search": self._call_semantic_search,
+            "find_api": self._call_find_api,
+            "query_code_graph": self._call_query_code_graph,
+        }
+    def tool_specs(self) -> list[dict[str, Any]]:
+        """Return tool definitions, excluding tools whose services are unavailable."""
+        specs: list[dict[str, Any]] = []
+        if self._semantic_service is not None:
+            specs.append(_SEMANTIC_SEARCH_SPEC)
+            specs.append(_FIND_API_SPEC)
+        if self._cypher_gen is not None and self._ingestor is not None:
+            specs.append(_QUERY_CODE_GRAPH_SPEC)
+        return specs
+    async def call(self, name: str, arguments: dict[str, Any]) -> str:
+        handler = self._dispatch.get(name)
+        if handler is None:
+            return json.dumps({"error": f"Unknown tool: {name}"}, ensure_ascii=False)
+        try:
+            result = await handler(**arguments)
+        except Exception as exc:
+            logger.warning(f"Tool '{name}' failed: {exc}")
+            return json.dumps(
+                {"error": f"Tool execution failed: {exc}"},
+                ensure_ascii=False,
+                default=str,
+            )
+        text = json.dumps(result, ensure_ascii=False, default=str)
+        if len(text) > self._max_chars:
+            text = text[: self._max_chars] + "\n... (truncated)"
+        return text
+    # -- Tool implementations ------------------------------------------------
+    async def _call_semantic_search(
+        self, query: str, top_k: int = 5, **_: Any
+    ) -> dict[str, Any]:
+        assert self._semantic_service is not None
+        results = self._semantic_service.search(query, top_k=top_k)
+        return {
+            "query": query,
+            "result_count": len(results),
+            "results": [
+                {
+                    "qualified_name": r.qualified_name,
+                    "name": r.name,
+                    "type": r.type,
+                    "score": r.score,
+                    "file_path": r.file_path,
+                    "start_line": r.start_line,
+                    "end_line": r.end_line,
+                    "source_code": r.source_code,
+                }
+                for r in results
+            ],
+        }
+    async def _call_find_api(
+        self, query: str, top_k: int = 5, **_: Any
+    ) -> dict[str, Any]:
+        assert self._semantic_service is not None
+        results = self._semantic_service.search(query, top_k=top_k)
+        api_dir = self._artifact_dir / "api_docs" if self._artifact_dir else None
+        funcs_dir = api_dir / "funcs" if api_dir else None
+        has_api_docs = funcs_dir is not None and funcs_dir.exists()
+        combined = []
+        for r in results:
+            entry: dict[str, Any] = {
+                "qualified_name": r.qualified_name,
+                "name": r.name,
+                "type": r.type,
+                "score": r.score,
+                "file_path": r.file_path,
+                "source_code": r.source_code,
+                "api_doc": None,
+            }
+            if has_api_docs and r.qualified_name:
+                safe_qn = r.qualified_name.replace("/", "_").replace("\\", "_")
+                doc_file = funcs_dir / f"{safe_qn}.md"  # type: ignore[union-attr]
+                if doc_file.exists():
+                    entry["api_doc"] = doc_file.read_text(
+                        encoding="utf-8", errors="ignore"
+                    )
+            combined.append(entry)
+        return {
+            "query": query,
+            "result_count": len(combined),
+            "api_docs_available": has_api_docs,
+            "results": combined,
+        }
+    async def _call_query_code_graph(
+        self, question: str, **_: Any
+    ) -> dict[str, Any]:
+        assert self._cypher_gen is not None
+        assert self._ingestor is not None
+        cypher = self._cypher_gen.generate(question)
+        rows = self._ingestor.query(cypher)
+        serialisable = []
+        for row in rows:
+            raw = row.get("result", row)
+            if isinstance(raw, (list, tuple)):
+                serialisable.append(list(raw))
+            else:
+                serialisable.append(raw)
+        return {
+            "question": question,
+            "cypher": cypher,
+            "row_count": len(serialisable),
+            "rows": serialisable,
+        }