PyPI - code-graph-builder - Versions diffs - 0.2.0__py3-none-any.whl - Mend

code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

code_graph_builder/__init__.py +82 -0
code_graph_builder/builder.py +366 -0
code_graph_builder/cgb_cli.py +32 -0
code_graph_builder/cli.py +564 -0
code_graph_builder/commands_cli.py +1288 -0
code_graph_builder/config.py +340 -0
code_graph_builder/constants.py +708 -0
code_graph_builder/embeddings/__init__.py +40 -0
code_graph_builder/embeddings/qwen3_embedder.py +573 -0
code_graph_builder/embeddings/vector_store.py +584 -0
code_graph_builder/examples/__init__.py +0 -0
code_graph_builder/examples/example_configuration.py +276 -0
code_graph_builder/examples/example_kuzu_usage.py +109 -0
code_graph_builder/examples/example_semantic_search_full.py +347 -0
code_graph_builder/examples/generate_wiki.py +915 -0
code_graph_builder/examples/graph_export_example.py +100 -0
code_graph_builder/examples/rag_example.py +206 -0
code_graph_builder/examples/test_cli_demo.py +129 -0
code_graph_builder/examples/test_embedding_api.py +153 -0
code_graph_builder/examples/test_kuzu_local.py +190 -0
code_graph_builder/examples/test_rag_redis.py +390 -0
code_graph_builder/graph_updater.py +605 -0
code_graph_builder/guidance/__init__.py +1 -0
code_graph_builder/guidance/agent.py +123 -0
code_graph_builder/guidance/prompts.py +74 -0
code_graph_builder/guidance/toolset.py +264 -0
code_graph_builder/language_spec.py +536 -0
code_graph_builder/mcp/__init__.py +21 -0
code_graph_builder/mcp/api_doc_generator.py +764 -0
code_graph_builder/mcp/file_editor.py +207 -0
code_graph_builder/mcp/pipeline.py +777 -0
code_graph_builder/mcp/server.py +161 -0
code_graph_builder/mcp/tools.py +1800 -0
code_graph_builder/models.py +115 -0
code_graph_builder/parser_loader.py +344 -0
code_graph_builder/parsers/__init__.py +7 -0
code_graph_builder/parsers/call_processor.py +306 -0
code_graph_builder/parsers/call_resolver.py +139 -0
code_graph_builder/parsers/definition_processor.py +796 -0
code_graph_builder/parsers/factory.py +119 -0
code_graph_builder/parsers/import_processor.py +293 -0
code_graph_builder/parsers/structure_processor.py +145 -0
code_graph_builder/parsers/type_inference.py +143 -0
code_graph_builder/parsers/utils.py +134 -0
code_graph_builder/rag/__init__.py +68 -0
code_graph_builder/rag/camel_agent.py +429 -0
code_graph_builder/rag/client.py +298 -0
code_graph_builder/rag/config.py +239 -0
code_graph_builder/rag/cypher_generator.py +67 -0
code_graph_builder/rag/llm_backend.py +210 -0
code_graph_builder/rag/markdown_generator.py +352 -0
code_graph_builder/rag/prompt_templates.py +440 -0
code_graph_builder/rag/rag_engine.py +640 -0
code_graph_builder/rag/review_report.md +172 -0
code_graph_builder/rag/tests/__init__.py +3 -0
code_graph_builder/rag/tests/test_camel_agent.py +313 -0
code_graph_builder/rag/tests/test_client.py +221 -0
code_graph_builder/rag/tests/test_config.py +177 -0
code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
code_graph_builder/services/__init__.py +39 -0
code_graph_builder/services/graph_service.py +465 -0
code_graph_builder/services/kuzu_service.py +665 -0
code_graph_builder/services/memory_service.py +171 -0
code_graph_builder/settings.py +75 -0
code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
code_graph_builder/tests/__init__.py +1 -0
code_graph_builder/tests/run_acceptance_check.py +378 -0
code_graph_builder/tests/test_api_find.py +231 -0
code_graph_builder/tests/test_api_find_integration.py +226 -0
code_graph_builder/tests/test_basic.py +78 -0
code_graph_builder/tests/test_c_api_extraction.py +388 -0
code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
code_graph_builder/tests/test_embedder.py +411 -0
code_graph_builder/tests/test_integration_semantic.py +434 -0
code_graph_builder/tests/test_mcp_protocol.py +298 -0
code_graph_builder/tests/test_mcp_user_flow.py +190 -0
code_graph_builder/tests/test_rag.py +404 -0
code_graph_builder/tests/test_settings.py +135 -0
code_graph_builder/tests/test_step1_graph_build.py +264 -0
code_graph_builder/tests/test_step2_api_docs.py +323 -0
code_graph_builder/tests/test_step3_embedding.py +278 -0
code_graph_builder/tests/test_vector_store.py +552 -0
code_graph_builder/tools/__init__.py +40 -0
code_graph_builder/tools/graph_query.py +495 -0
code_graph_builder/tools/semantic_search.py +387 -0
code_graph_builder/types.py +333 -0
code_graph_builder/utils/__init__.py +0 -0
code_graph_builder/utils/path_utils.py +30 -0
code_graph_builder-0.2.0.dist-info/METADATA +321 -0
code_graph_builder-0.2.0.dist-info/RECORD +93 -0
code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0

code_graph_builder/tests/test_mcp_protocol.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""MCP protocol layer tests: tool registration, dispatch, error handling.
+Tests the MCP server's tool listing, call_tool dispatch, ToolError propagation,
+and JSON serialization — without requiring a live stdio transport.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+from pathlib import Path
+import pytest
+TINYCC_PATH = Path(__file__).resolve().parents[3] / "tinycc"
+pytestmark = pytest.mark.skipif(
+    not TINYCC_PATH.exists(),
+    reason=f"tinycc source not found at {TINYCC_PATH}",
+)
+@pytest.fixture(scope="module")
+def registry(tmp_path_factory):
+    from code_graph_builder.mcp.tools import MCPToolsRegistry
+    workspace = tmp_path_factory.mktemp("workspace")
+    reg = MCPToolsRegistry(workspace=workspace)
+    yield reg
+    reg.close()
+@pytest.fixture(scope="module")
+def indexed_registry(tmp_path_factory):
+    """Registry with tinycc indexed (graph + api-docs, skip embed/wiki)."""
+    from code_graph_builder.mcp.tools import MCPToolsRegistry
+    workspace = tmp_path_factory.mktemp("indexed_workspace")
+    reg = MCPToolsRegistry(workspace=workspace)
+    asyncio.get_event_loop().run_until_complete(
+        reg._handle_initialize_repository(
+            repo_path=str(TINYCC_PATH),
+            rebuild=True,
+            skip_wiki=True,
+            skip_embed=True,
+        )
+    )
+    yield reg
+    reg.close()
+def _run(coro):
+    return asyncio.get_event_loop().run_until_complete(coro)
+# ---------------------------------------------------------------------------
+# Tool registration & discovery
+# ---------------------------------------------------------------------------
+class TestToolRegistration:
+    """Verify tools are correctly registered and discoverable."""
+    def test_tools_list_not_empty(self, registry):
+        tools = registry.tools()
+        assert len(tools) > 0
+    def test_all_tools_have_name(self, registry):
+        for t in registry.tools():
+            assert t.name, f"Tool missing name: {t}"
+    def test_all_tools_have_description(self, registry):
+        for t in registry.tools():
+            assert t.description, f"Tool {t.name} missing description"
+    def test_all_tools_have_input_schema(self, registry):
+        for t in registry.tools():
+            assert isinstance(t.input_schema, dict), f"Tool {t.name} missing input_schema"
+            assert "type" in t.input_schema
+    def test_expected_tools_present(self, registry):
+        names = {t.name for t in registry.tools()}
+        expected = {
+            "initialize_repository", "get_repository_info",
+            "list_repositories", "switch_repository",
+            "query_code_graph", "get_code_snippet",
+            "semantic_search", "find_api",
+            "list_wiki_pages", "get_wiki_page",
+            "locate_function", "list_api_interfaces",
+            "list_api_docs", "get_api_doc",
+            "generate_wiki", "rebuild_embeddings",
+            "build_graph", "generate_api_docs",
+        }
+        missing = expected - names
+        assert not missing, f"Missing expected tools: {missing}"
+    def test_every_tool_has_handler(self, registry):
+        for t in registry.tools():
+            handler = registry.get_handler(t.name)
+            assert handler is not None, f"Tool {t.name} has no handler"
+            assert callable(handler)
+    def test_unknown_tool_returns_none(self, registry):
+        assert registry.get_handler("nonexistent_tool") is None
+    def test_input_schema_is_valid_jsonschema(self, registry):
+        for t in registry.tools():
+            schema = t.input_schema
+            assert schema.get("type") == "object"
+            assert "properties" in schema
+# ---------------------------------------------------------------------------
+# call_tool dispatch simulation
+# ---------------------------------------------------------------------------
+class TestCallToolDispatch:
+    """Simulate the server.call_tool dispatch logic."""
+    def _simulate_call_tool(self, registry, name: str, arguments: dict):
+        """Replicate server.py call_tool logic without MCP server."""
+        handler = registry.get_handler(name)
+        if handler is None:
+            raise ValueError(f"Unknown tool: {name}")
+        kwargs = dict(arguments or {})
+        result = _run(handler(**kwargs))
+        if isinstance(result, (dict, list)):
+            text = json.dumps(result, ensure_ascii=False, indent=2, default=str)
+        else:
+            text = str(result)
+        return json.loads(text) if text.startswith(("{", "[")) else text
+    def test_dispatch_list_repositories(self, registry):
+        result = self._simulate_call_tool(registry, "list_repositories", {})
+        assert isinstance(result, dict)
+    def test_dispatch_unknown_tool_raises(self, registry):
+        with pytest.raises(ValueError, match="Unknown tool"):
+            self._simulate_call_tool(registry, "nonexistent", {})
+    def test_dispatch_get_repository_info_no_repo(self, registry):
+        """Should raise ToolError when no repo is indexed."""
+        from code_graph_builder.mcp.tools import ToolError
+        with pytest.raises(ToolError):
+            self._simulate_call_tool(registry, "get_repository_info", {})
+    def test_dispatch_get_repository_info_with_repo(self, indexed_registry):
+        result = self._simulate_call_tool(
+            indexed_registry, "get_repository_info", {}
+        )
+        assert isinstance(result, dict)
+        assert "repo_name" in result or "repo_path" in result or "status" in result
+    def test_dispatch_result_is_json_serializable(self, indexed_registry):
+        result = self._simulate_call_tool(
+            indexed_registry, "list_repositories", {}
+        )
+        # Should not raise
+        json.dumps(result, default=str)
+# ---------------------------------------------------------------------------
+# ToolError propagation
+# ---------------------------------------------------------------------------
+class TestToolErrorHandling:
+    """Verify ToolError is properly raised and structured."""
+    def test_require_active_raises_toolerror(self, registry):
+        from code_graph_builder.mcp.tools import ToolError
+        # Tools that require an active repo and take no required args
+        tools_no_args = [
+            "get_repository_info",
+            "list_wiki_pages", "list_api_interfaces",
+            "list_api_docs",
+        ]
+        for tool_name in tools_no_args:
+            with pytest.raises(ToolError):
+                _run(registry.get_handler(tool_name)())
+        # semantic_search requires query arg — should still raise ToolError (no repo)
+        with pytest.raises(ToolError):
+            _run(registry.get_handler("semantic_search")(query="test"))
+    def test_find_api_without_embeddings_raises(self, indexed_registry):
+        """find_api without embeddings should raise ToolError."""
+        from code_graph_builder.mcp.tools import ToolError
+        # indexed_registry was created with skip_embed=True
+        with pytest.raises(ToolError):
+            _run(indexed_registry._handle_find_api(query="test"))
+    def test_switch_nonexistent_repo_raises(self, registry):
+        from code_graph_builder.mcp.tools import ToolError
+        with pytest.raises(ToolError):
+            _run(registry._handle_switch_repository(repo_name="nonexistent_abc"))
+# ---------------------------------------------------------------------------
+# State management
+# ---------------------------------------------------------------------------
+class TestStateManagement:
+    """Verify repository state management."""
+    def test_list_repos_empty_initially(self, registry):
+        result = _run(registry._handle_list_repositories())
+        assert isinstance(result, dict)
+    def test_list_repos_after_index(self, indexed_registry):
+        result = _run(indexed_registry._handle_list_repositories())
+        assert isinstance(result, dict)
+        repos = result.get("repositories", [])
+        assert len(repos) > 0, "Should list the indexed repo"
+    def test_indexed_repo_has_entry(self, indexed_registry):
+        result = _run(indexed_registry._handle_list_repositories())
+        repos = result.get("repositories", [])
+        assert len(repos) > 0, "Should have at least one indexed repo"
+        # Check any field contains tinycc reference
+        repo = repos[0]
+        repo_str = str(repo).lower()
+        assert "tinycc" in repo_str or len(repos) > 0, f"Repo entry: {repo}"
+# ---------------------------------------------------------------------------
+# Tool handlers (graph-only, no embedding needed)
+# ---------------------------------------------------------------------------
+class TestGraphOnlyTools:
+    """Test tools that only need a graph (no embeddings)."""
+    def test_list_api_interfaces(self, indexed_registry):
+        result = _run(indexed_registry._handle_list_api_interfaces())
+        assert isinstance(result, dict)
+    def test_list_api_docs(self, indexed_registry):
+        result = _run(indexed_registry._handle_list_api_docs())
+        assert isinstance(result, (dict, str))
+    def test_get_api_doc_known_function(self, indexed_registry):
+        """Should return API doc for a function that exists."""
+        from code_graph_builder.mcp.tools import ToolError
+        # First get a real qualified name from list_api_interfaces
+        apis = _run(indexed_registry._handle_list_api_interfaces())
+        # Find any function qn from the result
+        qn = None
+        for item in apis.get("interfaces", apis.get("functions", [])):
+            if isinstance(item, dict) and item.get("qualified_name"):
+                qn = item["qualified_name"]
+                break
+        if qn is None:
+            pytest.skip("No APIs found to test get_api_doc")
+        try:
+            result = _run(indexed_registry._handle_get_api_doc(qualified_name=qn))
+            assert result is not None
+        except ToolError:
+            pass  # Acceptable if doc file doesn't match exactly
+    def test_list_wiki_pages_no_wiki(self, indexed_registry):
+        """Wiki was skipped, should handle gracefully."""
+        from code_graph_builder.mcp.tools import ToolError
+        try:
+            result = _run(indexed_registry._handle_list_wiki_pages())
+            assert isinstance(result, (dict, list))
+        except ToolError:
+            pass  # Acceptable if wiki not generated
+    def test_get_code_snippet(self, indexed_registry):
+        """get_code_snippet should return source or raise ToolError."""
+        from code_graph_builder.mcp.tools import ToolError
+        # Use a function known to exist in the graph
+        try:
+            result = _run(indexed_registry._handle_get_code_snippet(
+                qualified_name="tinycc.tcc.tcc_compile"
+            ))
+            assert result is not None
+        except ToolError as e:
+            # ToolError with "Not found" is acceptable behavior
+            assert "Not found" in str(e) or "error" in str(e)
+    def test_generate_api_docs_standalone(self, indexed_registry):
+        result = _run(indexed_registry._handle_generate_api_docs(rebuild=False))
+        assert isinstance(result, dict)

code_graph_builder/tests/test_mcp_user_flow.py ADDED Viewed

@@ -0,0 +1,190 @@
+"""End-to-end user flow test: simulates what happens after a user installs
+the MCP server and starts using it with a real codebase.
+Flow:
+  1. User starts MCP server (list_tools)
+  2. User indexes a repo (initialize_repository)
+  3. User queries APIs (find_api, list_api_docs, get_api_doc)
+  4. User switches context (list_repositories, get_repository_info)
+  5. User browses docs (list_api_interfaces)
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+from pathlib import Path
+import pytest
+TINYCC_PATH = Path(__file__).resolve().parents[3] / "tinycc"
+pytestmark = [
+    pytest.mark.skipif(
+        not TINYCC_PATH.exists(),
+        reason=f"tinycc source not found at {TINYCC_PATH}",
+    ),
+    pytest.mark.skipif(
+        not os.environ.get("DASHSCOPE_API_KEY"),
+        reason="DASHSCOPE_API_KEY not set",
+    ),
+]
+def _run(coro):
+    return asyncio.get_event_loop().run_until_complete(coro)
+@pytest.fixture(scope="module")
+def workspace(tmp_path_factory):
+    return tmp_path_factory.mktemp("user_workspace")
+@pytest.fixture(scope="module")
+def registry(workspace):
+    from code_graph_builder.mcp.tools import MCPToolsRegistry
+    reg = MCPToolsRegistry(workspace=workspace)
+    yield reg
+    reg.close()
+def _call(registry, tool_name: str, args: dict | None = None):
+    """Simulate MCP call_tool: dispatch → handler → JSON serialize → parse."""
+    handler = registry.get_handler(tool_name)
+    assert handler is not None, f"Tool '{tool_name}' not found"
+    result = _run(handler(**(args or {})))
+    # Round-trip through JSON like the real MCP server does
+    text = json.dumps(result, ensure_ascii=False, default=str)
+    return json.loads(text)
+# The tests below MUST run in order — each step depends on the previous.
+# pytest-ordering is not needed; pytest preserves definition order within a class.
+class TestUserFlow:
+    """Simulates the complete user journey after MCP installation."""
+    # --- Step 1: Discovery ---
+    def test_01_list_tools(self, registry):
+        """User's MCP client calls list_tools on first connect."""
+        tools = registry.tools()
+        names = [t.name for t in tools]
+        assert len(names) >= 10, f"Expected many tools, got {len(names)}"
+        assert "initialize_repository" in names
+        assert "find_api" in names
+        print(f"  → {len(names)} tools available")
+    # --- Step 2: Index repository ---
+    def test_02_initialize_repository(self, registry):
+        """User says: 'Index /path/to/tinycc'."""
+        result = _call(registry, "initialize_repository", {
+            "repo_path": str(TINYCC_PATH),
+            "rebuild": True,
+            "skip_wiki": True,   # Skip wiki to save time
+            "skip_embed": False,  # Need embeddings for find_api
+        })
+        assert result["status"] == "success", f"Init failed: {result}"
+        print(f"  → Indexed: {result.get('graph', {})}")
+    # --- Step 3: Check repo info ---
+    def test_03_get_repository_info(self, registry):
+        """User asks: 'What repo is active?'"""
+        result = _call(registry, "get_repository_info")
+        assert "tinycc" in str(result).lower() or "repo" in str(result).lower()
+        print(f"  → Repo info keys: {list(result.keys())}")
+    def test_04_list_repositories(self, registry):
+        """User asks: 'What repos have I indexed?'"""
+        result = _call(registry, "list_repositories")
+        repos = result.get("repositories", [])
+        assert len(repos) >= 1
+        print(f"  → {len(repos)} repo(s) indexed")
+    # --- Step 4: Browse API documentation ---
+    def test_05_list_api_docs_index(self, registry):
+        """User asks: 'Show me the API docs overview.'"""
+        result = _call(registry, "list_api_docs")
+        # Should return L1 index content
+        assert result is not None
+        content = str(result)
+        assert "module" in content.lower() or "模块" in content
+        print(f"  → Index returned ({len(content)} chars)")
+    def test_06_list_api_interfaces(self, registry):
+        """User asks: 'What public APIs are available?'"""
+        result = _call(registry, "list_api_interfaces")
+        assert isinstance(result, dict)
+        print(f"  → API interfaces keys: {list(result.keys())}")
+    # --- Step 5: Semantic search ---
+    def test_07_find_api_compile(self, registry):
+        """User asks: 'Find APIs related to compiling source code.'"""
+        result = _call(registry, "find_api", {"query": "compile source code", "top_k": 5})
+        assert result["result_count"] > 0
+        assert result["api_docs_available"] is True
+        top = result["results"][0]
+        assert top["qualified_name"]
+        assert top["score"] > 0
+        print(f"  → Top result: {top['qualified_name']} (score={top['score']:.3f})")
+    def test_08_find_api_parse(self, registry):
+        """User asks: 'How does expression parsing work?'"""
+        result = _call(registry, "find_api", {"query": "parse expression", "top_k": 5})
+        assert result["result_count"] > 0
+        # At least one result should have an API doc attached
+        with_doc = sum(1 for r in result["results"] if r.get("api_doc"))
+        print(f"  → {result['result_count']} results, {with_doc} with API docs")
+    def test_09_find_api_chinese(self, registry):
+        """User asks in Chinese: '内存分配相关的函数'."""
+        result = _call(registry, "find_api", {"query": "内存分配", "top_k": 3})
+        assert result["result_count"] > 0
+        print(f"  → Chinese query returned {result['result_count']} results")
+    # --- Step 6: Verify API doc content quality ---
+    def test_10_api_doc_has_signature(self, registry):
+        """API docs attached to search results should have C signatures."""
+        result = _call(registry, "find_api", {"query": "compile", "top_k": 10})
+        for r in result["results"]:
+            doc = r.get("api_doc") or ""
+            if "签名:" in doc and "(" in doc:
+                print(f"  → Found signature in: {r['qualified_name']}")
+                return
+        # Acceptable if signatures exist in some results
+        assert result["result_count"] > 0
+    def test_11_api_doc_has_call_tree(self, registry):
+        """API docs should include call relationship info."""
+        result = _call(registry, "find_api", {"query": "generate code output", "top_k": 10})
+        for r in result["results"]:
+            doc = r.get("api_doc") or ""
+            if "被调用" in doc or "调用树" in doc:
+                print(f"  → Call info in: {r['qualified_name']}")
+                return
+        assert result["result_count"] > 0
+    # --- Step 7: Full round-trip JSON serialization ---
+    def test_12_all_results_json_serializable(self, registry):
+        """Every tool result must survive JSON round-trip (MCP requirement)."""
+        test_calls = [
+            ("list_repositories", {}),
+            ("get_repository_info", {}),
+            ("list_api_docs", {}),
+            ("list_api_interfaces", {}),
+            ("find_api", {"query": "function", "top_k": 2}),
+        ]
+        for tool_name, args in test_calls:
+            result = _call(registry, tool_name, args)
+            # _call already does JSON round-trip; if we get here, it worked
+            assert result is not None, f"{tool_name} returned None"
+        print("  → All 5 tools passed JSON round-trip")