code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Integration test: api-find (semantic search + API doc attachment) on tinycc.
|
|
2
|
+
|
|
3
|
+
Tests the full find_api pipeline: query → embedding → vector search →
|
|
4
|
+
API doc lookup → combined result. Validates relevance, doc attachment,
|
|
5
|
+
and result structure.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import pytest
|
|
15
|
+
|
|
16
|
+
TINYCC_PATH = Path(__file__).resolve().parents[3] / "tinycc"
|
|
17
|
+
|
|
18
|
+
pytestmark = [
|
|
19
|
+
pytest.mark.skipif(
|
|
20
|
+
not TINYCC_PATH.exists(),
|
|
21
|
+
reason=f"tinycc source not found at {TINYCC_PATH}",
|
|
22
|
+
),
|
|
23
|
+
pytest.mark.skipif(
|
|
24
|
+
not os.environ.get("DASHSCOPE_API_KEY"),
|
|
25
|
+
reason="DASHSCOPE_API_KEY not set",
|
|
26
|
+
),
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture(scope="module")
|
|
31
|
+
def mcp_registry(tmp_path_factory):
|
|
32
|
+
"""Set up MCPToolsRegistry with fully indexed tinycc repo."""
|
|
33
|
+
from code_graph_builder.mcp.tools import MCPToolsRegistry
|
|
34
|
+
|
|
35
|
+
workspace = tmp_path_factory.mktemp("workspace")
|
|
36
|
+
registry = MCPToolsRegistry(workspace=workspace)
|
|
37
|
+
|
|
38
|
+
# Run full pipeline via initialize_repository handler
|
|
39
|
+
result = asyncio.get_event_loop().run_until_complete(
|
|
40
|
+
registry._handle_initialize_repository(
|
|
41
|
+
repo_path=str(TINYCC_PATH),
|
|
42
|
+
rebuild=True,
|
|
43
|
+
skip_wiki=True,
|
|
44
|
+
skip_embed=False,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
assert result.get("status") == "success", f"Init failed: {result}"
|
|
48
|
+
|
|
49
|
+
yield registry
|
|
50
|
+
registry.close()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _find_api(registry, query: str, top_k: int = 5) -> dict:
|
|
54
|
+
"""Helper to call find_api synchronously."""
|
|
55
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
56
|
+
registry._handle_find_api(query=query, top_k=top_k)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Result structure
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TestResultStructure:
|
|
66
|
+
"""Verify find_api returns well-structured results."""
|
|
67
|
+
|
|
68
|
+
def test_returns_dict(self, mcp_registry):
|
|
69
|
+
result = _find_api(mcp_registry, "compile")
|
|
70
|
+
assert isinstance(result, dict)
|
|
71
|
+
|
|
72
|
+
def test_has_required_keys(self, mcp_registry):
|
|
73
|
+
result = _find_api(mcp_registry, "compile")
|
|
74
|
+
assert "query" in result
|
|
75
|
+
assert "result_count" in result
|
|
76
|
+
assert "api_docs_available" in result
|
|
77
|
+
assert "results" in result
|
|
78
|
+
|
|
79
|
+
def test_query_echoed(self, mcp_registry):
|
|
80
|
+
result = _find_api(mcp_registry, "parse expression")
|
|
81
|
+
assert result["query"] == "parse expression"
|
|
82
|
+
|
|
83
|
+
def test_result_count_matches(self, mcp_registry):
|
|
84
|
+
result = _find_api(mcp_registry, "compile", top_k=3)
|
|
85
|
+
assert result["result_count"] == len(result["results"])
|
|
86
|
+
assert result["result_count"] <= 3
|
|
87
|
+
|
|
88
|
+
def test_api_docs_available(self, mcp_registry):
|
|
89
|
+
result = _find_api(mcp_registry, "compile")
|
|
90
|
+
assert result["api_docs_available"] is True
|
|
91
|
+
|
|
92
|
+
def test_result_entry_keys(self, mcp_registry):
|
|
93
|
+
result = _find_api(mcp_registry, "compile")
|
|
94
|
+
assert len(result["results"]) > 0
|
|
95
|
+
entry = result["results"][0]
|
|
96
|
+
expected_keys = {
|
|
97
|
+
"qualified_name", "name", "type", "score",
|
|
98
|
+
"file_path", "start_line", "end_line",
|
|
99
|
+
"source_code", "api_doc",
|
|
100
|
+
}
|
|
101
|
+
assert expected_keys.issubset(entry.keys())
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
# Search relevance
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class TestSearchRelevance:
|
|
110
|
+
"""Verify find_api returns relevant results for various queries."""
|
|
111
|
+
|
|
112
|
+
def test_search_compile(self, mcp_registry):
|
|
113
|
+
result = _find_api(mcp_registry, "compile source code")
|
|
114
|
+
qns = [r["qualified_name"] for r in result["results"]]
|
|
115
|
+
found = any("compile" in qn.lower() or "tcc" in qn.lower() for qn in qns)
|
|
116
|
+
assert found, f"Expected compile-related results, got: {qns}"
|
|
117
|
+
|
|
118
|
+
def test_search_parse(self, mcp_registry):
|
|
119
|
+
result = _find_api(mcp_registry, "parse C expression")
|
|
120
|
+
qns = [r["qualified_name"] for r in result["results"]]
|
|
121
|
+
found = any("parse" in qn.lower() or "expr" in qn.lower() for qn in qns)
|
|
122
|
+
assert found, f"Expected parse-related results, got: {qns}"
|
|
123
|
+
|
|
124
|
+
def test_search_memory(self, mcp_registry):
|
|
125
|
+
result = _find_api(mcp_registry, "allocate memory")
|
|
126
|
+
qns = [r["qualified_name"] for r in result["results"]]
|
|
127
|
+
found = any(
|
|
128
|
+
"alloc" in qn.lower() or "malloc" in qn.lower() or "mem" in qn.lower()
|
|
129
|
+
for qn in qns
|
|
130
|
+
)
|
|
131
|
+
assert found, f"Expected memory-related results, got: {qns}"
|
|
132
|
+
|
|
133
|
+
def test_scores_are_valid(self, mcp_registry):
|
|
134
|
+
result = _find_api(mcp_registry, "generate assembly code")
|
|
135
|
+
for r in result["results"]:
|
|
136
|
+
assert isinstance(r["score"], float)
|
|
137
|
+
assert 0.0 <= r["score"] <= 1.0
|
|
138
|
+
|
|
139
|
+
def test_scores_descending(self, mcp_registry):
|
|
140
|
+
result = _find_api(mcp_registry, "output binary")
|
|
141
|
+
scores = [r["score"] for r in result["results"]]
|
|
142
|
+
assert scores == sorted(scores, reverse=True)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
# API doc attachment
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class TestApiDocAttachment:
|
|
151
|
+
"""Verify L3 API docs are attached to search results."""
|
|
152
|
+
|
|
153
|
+
def test_some_results_have_api_doc(self, mcp_registry):
|
|
154
|
+
result = _find_api(mcp_registry, "compile source file", top_k=10)
|
|
155
|
+
with_doc = sum(1 for r in result["results"] if r["api_doc"])
|
|
156
|
+
assert with_doc > 0, "Some results should have API docs attached"
|
|
157
|
+
|
|
158
|
+
def test_api_doc_is_markdown(self, mcp_registry):
|
|
159
|
+
result = _find_api(mcp_registry, "parse tokens", top_k=10)
|
|
160
|
+
for r in result["results"]:
|
|
161
|
+
if r["api_doc"]:
|
|
162
|
+
assert r["api_doc"].startswith("# "), (
|
|
163
|
+
f"API doc should start with markdown title, got: {r['api_doc'][:50]}"
|
|
164
|
+
)
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
def test_api_doc_has_signature(self, mcp_registry):
|
|
168
|
+
"""Attached API docs should contain function signature."""
|
|
169
|
+
result = _find_api(mcp_registry, "lexer tokenizer", top_k=10)
|
|
170
|
+
for r in result["results"]:
|
|
171
|
+
if r["api_doc"] and "签名:" in r["api_doc"]:
|
|
172
|
+
return # Found
|
|
173
|
+
# It's ok if some results don't have signatures (e.g., macros)
|
|
174
|
+
# Just check at least one doc was attached
|
|
175
|
+
with_doc = sum(1 for r in result["results"] if r["api_doc"])
|
|
176
|
+
if with_doc > 0:
|
|
177
|
+
return # Docs attached, signature format may vary
|
|
178
|
+
pytest.fail("No API docs attached to any result")
|
|
179
|
+
|
|
180
|
+
def test_api_doc_has_call_info(self, mcp_registry):
|
|
181
|
+
"""Attached API docs should contain call relationship info."""
|
|
182
|
+
result = _find_api(mcp_registry, "compile", top_k=10)
|
|
183
|
+
for r in result["results"]:
|
|
184
|
+
if r["api_doc"] and "被调用" in r["api_doc"]:
|
|
185
|
+
return
|
|
186
|
+
pytest.fail("No API doc has call relationship info")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
# Edge cases
|
|
191
|
+
# ---------------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class TestEdgeCases:
|
|
195
|
+
"""Test edge cases and boundary conditions."""
|
|
196
|
+
|
|
197
|
+
def test_empty_query(self, mcp_registry):
|
|
198
|
+
"""Empty query should still return results (or handle gracefully)."""
|
|
199
|
+
try:
|
|
200
|
+
result = _find_api(mcp_registry, "")
|
|
201
|
+
# Either returns empty or some results
|
|
202
|
+
assert isinstance(result, dict)
|
|
203
|
+
except Exception:
|
|
204
|
+
pass # Raising an error is also acceptable
|
|
205
|
+
|
|
206
|
+
def test_top_k_1(self, mcp_registry):
|
|
207
|
+
result = _find_api(mcp_registry, "main", top_k=1)
|
|
208
|
+
assert len(result["results"]) <= 1
|
|
209
|
+
|
|
210
|
+
def test_top_k_large(self, mcp_registry):
|
|
211
|
+
result = _find_api(mcp_registry, "function", top_k=50)
|
|
212
|
+
assert len(result["results"]) <= 50
|
|
213
|
+
assert len(result["results"]) > 0
|
|
214
|
+
|
|
215
|
+
def test_chinese_query(self, mcp_registry):
|
|
216
|
+
"""Chinese natural language query should work."""
|
|
217
|
+
result = _find_api(mcp_registry, "编译源代码")
|
|
218
|
+
assert isinstance(result, dict)
|
|
219
|
+
assert result["result_count"] >= 0
|
|
220
|
+
|
|
221
|
+
def test_specific_function_name(self, mcp_registry):
|
|
222
|
+
"""Querying an exact function name should find it."""
|
|
223
|
+
result = _find_api(mcp_registry, "tcc_compile", top_k=10)
|
|
224
|
+
qns = [r["qualified_name"] for r in result["results"]]
|
|
225
|
+
# Should find the function or something very related
|
|
226
|
+
assert len(qns) > 0
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Basic tests for code_graph_builder."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_import():
|
|
7
|
+
"""Test basic imports."""
|
|
8
|
+
from code_graph_builder import CodeGraphBuilder, BuildResult
|
|
9
|
+
from code_graph_builder.constants import SupportedLanguage, NodeLabel
|
|
10
|
+
from code_graph_builder.types import GraphData, GraphSummary
|
|
11
|
+
|
|
12
|
+
assert CodeGraphBuilder is not None
|
|
13
|
+
assert BuildResult is not None
|
|
14
|
+
assert SupportedLanguage is not None
|
|
15
|
+
assert NodeLabel is not None
|
|
16
|
+
assert GraphData is not None
|
|
17
|
+
assert GraphSummary is not None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_constants():
|
|
21
|
+
"""Test constants are defined correctly."""
|
|
22
|
+
from code_graph_builder.constants import SupportedLanguage, NodeLabel, RelationshipType
|
|
23
|
+
|
|
24
|
+
# Test SupportedLanguage enum
|
|
25
|
+
assert SupportedLanguage.PYTHON.value == "python"
|
|
26
|
+
assert SupportedLanguage.JS.value == "javascript"
|
|
27
|
+
|
|
28
|
+
# Test NodeLabel enum
|
|
29
|
+
assert NodeLabel.FUNCTION.value == "Function"
|
|
30
|
+
assert NodeLabel.CLASS.value == "Class"
|
|
31
|
+
|
|
32
|
+
# Test RelationshipType enum
|
|
33
|
+
assert RelationshipType.CALLS.value == "CALLS"
|
|
34
|
+
assert RelationshipType.DEFINES.value == "DEFINES"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_types():
|
|
38
|
+
"""Test type definitions."""
|
|
39
|
+
from code_graph_builder.types import BuildResult, NodeType
|
|
40
|
+
|
|
41
|
+
# Test BuildResult
|
|
42
|
+
result = BuildResult(
|
|
43
|
+
project_name="test",
|
|
44
|
+
nodes_created=10,
|
|
45
|
+
relationships_created=5,
|
|
46
|
+
functions_found=3,
|
|
47
|
+
classes_found=2,
|
|
48
|
+
files_processed=1,
|
|
49
|
+
errors=[],
|
|
50
|
+
)
|
|
51
|
+
assert result.project_name == "test"
|
|
52
|
+
assert result.nodes_created == 10
|
|
53
|
+
|
|
54
|
+
# Test NodeType
|
|
55
|
+
assert NodeType.FUNCTION.value == "Function"
|
|
56
|
+
assert NodeType.METHOD.value == "Method"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_models():
|
|
60
|
+
"""Test data models."""
|
|
61
|
+
from code_graph_builder.models import LanguageSpec, Dependency
|
|
62
|
+
from code_graph_builder.constants import SupportedLanguage
|
|
63
|
+
|
|
64
|
+
# Test LanguageSpec
|
|
65
|
+
spec = LanguageSpec(
|
|
66
|
+
language=SupportedLanguage.PYTHON,
|
|
67
|
+
file_extensions=(".py",),
|
|
68
|
+
function_node_types=("function_definition",),
|
|
69
|
+
class_node_types=("class_definition",),
|
|
70
|
+
module_node_types=("module",),
|
|
71
|
+
)
|
|
72
|
+
assert spec.language == SupportedLanguage.PYTHON
|
|
73
|
+
assert ".py" in spec.file_extensions
|
|
74
|
+
|
|
75
|
+
# Test Dependency
|
|
76
|
+
dep = Dependency(name="requests", spec=">=2.0.0")
|
|
77
|
+
assert dep.name == "requests"
|
|
78
|
+
assert dep.spec == ">=2.0.0"
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""Tests for C language API interface extraction.
|
|
2
|
+
|
|
3
|
+
Tests cover:
|
|
4
|
+
- Function extraction with visibility (public/static/extern)
|
|
5
|
+
- Struct/union/enum member extraction
|
|
6
|
+
- Typedef extraction
|
|
7
|
+
- Macro extraction
|
|
8
|
+
- Header declaration tracking for visibility resolution
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _make_builder(project_path: Path):
|
|
19
|
+
"""Create a CodeGraphBuilder with a project-specific DB path."""
|
|
20
|
+
from code_graph_builder.builder import CodeGraphBuilder
|
|
21
|
+
|
|
22
|
+
db_path = project_path / "test_graph.db"
|
|
23
|
+
return CodeGraphBuilder(
|
|
24
|
+
str(project_path),
|
|
25
|
+
backend_config={"db_path": str(db_path)},
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture
|
|
30
|
+
def c_project_with_header(tmp_path: Path) -> Path:
|
|
31
|
+
"""Create a C project with header and source files."""
|
|
32
|
+
project_path = tmp_path / "c_api_project"
|
|
33
|
+
project_path.mkdir()
|
|
34
|
+
|
|
35
|
+
# Create a Makefile to be recognized as a C package
|
|
36
|
+
(project_path / "Makefile").write_text("all:\n\tgcc -o main main.c\n")
|
|
37
|
+
|
|
38
|
+
# Header file declaring public API
|
|
39
|
+
(project_path / "api.h").write_text(
|
|
40
|
+
"""\
|
|
41
|
+
#ifndef API_H
|
|
42
|
+
#define API_H
|
|
43
|
+
|
|
44
|
+
typedef int error_code;
|
|
45
|
+
typedef struct point Point;
|
|
46
|
+
|
|
47
|
+
struct point {
|
|
48
|
+
int x;
|
|
49
|
+
int y;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
enum color {
|
|
53
|
+
RED,
|
|
54
|
+
GREEN,
|
|
55
|
+
BLUE
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
union value {
|
|
59
|
+
int i;
|
|
60
|
+
float f;
|
|
61
|
+
char c;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
#define MAX_SIZE 1024
|
|
65
|
+
#define VERSION "1.0.0"
|
|
66
|
+
|
|
67
|
+
int api_init(void);
|
|
68
|
+
void api_cleanup(void);
|
|
69
|
+
int api_process(const char *input, int len);
|
|
70
|
+
|
|
71
|
+
#endif
|
|
72
|
+
"""
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Source file with implementations
|
|
76
|
+
(project_path / "api.c").write_text(
|
|
77
|
+
"""\
|
|
78
|
+
#include "api.h"
|
|
79
|
+
|
|
80
|
+
static int _internal_helper(int x) {
|
|
81
|
+
return x * 2;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
int api_init(void) {
|
|
85
|
+
return _internal_helper(0);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
void api_cleanup(void) {
|
|
89
|
+
// cleanup
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
int api_process(const char *input, int len) {
|
|
93
|
+
return _internal_helper(len);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
void undeclared_extern_func(void) {
|
|
97
|
+
// This function has external linkage but is not in a header
|
|
98
|
+
}
|
|
99
|
+
"""
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return project_path
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@pytest.fixture
|
|
106
|
+
def c_struct_project(tmp_path: Path) -> Path:
|
|
107
|
+
"""Create a C project focused on struct/union/enum definitions."""
|
|
108
|
+
project_path = tmp_path / "c_struct_project"
|
|
109
|
+
project_path.mkdir()
|
|
110
|
+
|
|
111
|
+
(project_path / "types.h").write_text(
|
|
112
|
+
"""\
|
|
113
|
+
#ifndef TYPES_H
|
|
114
|
+
#define TYPES_H
|
|
115
|
+
|
|
116
|
+
typedef unsigned long size_t_alias;
|
|
117
|
+
typedef int (*callback_fn)(int, int);
|
|
118
|
+
|
|
119
|
+
struct config {
|
|
120
|
+
int width;
|
|
121
|
+
int height;
|
|
122
|
+
char *name;
|
|
123
|
+
float ratio;
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
enum log_level {
|
|
127
|
+
LOG_DEBUG,
|
|
128
|
+
LOG_INFO,
|
|
129
|
+
LOG_WARN,
|
|
130
|
+
LOG_ERROR
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
union data {
|
|
134
|
+
int integer;
|
|
135
|
+
double floating;
|
|
136
|
+
char string[32];
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
#define MAX_BUFSIZE 4096
|
|
140
|
+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
141
|
+
|
|
142
|
+
#endif
|
|
143
|
+
"""
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return project_path
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_c_function_visibility_header(c_project_with_header: Path) -> None:
|
|
150
|
+
"""Test that functions declared in headers get 'public' visibility."""
|
|
151
|
+
builder = _make_builder(c_project_with_header)
|
|
152
|
+
result = builder.build_graph(clean=True)
|
|
153
|
+
|
|
154
|
+
assert result.nodes_created > 0, "No nodes were created"
|
|
155
|
+
|
|
156
|
+
# Query functions and their visibility
|
|
157
|
+
func_query = """
|
|
158
|
+
MATCH (f:Function)
|
|
159
|
+
RETURN f.name AS name, f.visibility AS visibility, f.signature AS signature
|
|
160
|
+
"""
|
|
161
|
+
functions = builder.query(func_query)
|
|
162
|
+
|
|
163
|
+
func_map = {}
|
|
164
|
+
for row in functions:
|
|
165
|
+
raw = row.get("result", row)
|
|
166
|
+
if isinstance(raw, (list, tuple)):
|
|
167
|
+
func_map[raw[0]] = {"visibility": raw[1], "signature": raw[2]}
|
|
168
|
+
elif isinstance(raw, dict):
|
|
169
|
+
func_map[raw.get("name", "")] = {
|
|
170
|
+
"visibility": raw.get("visibility"),
|
|
171
|
+
"signature": raw.get("signature"),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# Functions declared in api.h should be "public"
|
|
175
|
+
assert "api_init" in func_map, f"api_init not found. Available: {list(func_map.keys())}"
|
|
176
|
+
assert func_map["api_init"]["visibility"] == "public", (
|
|
177
|
+
f"api_init should be 'public', got '{func_map['api_init']['visibility']}'"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Static function should be "static"
|
|
181
|
+
assert "_internal_helper" in func_map, (
|
|
182
|
+
f"_internal_helper not found. Available: {list(func_map.keys())}"
|
|
183
|
+
)
|
|
184
|
+
assert func_map["_internal_helper"]["visibility"] == "static", (
|
|
185
|
+
f"_internal_helper should be 'static', got '{func_map['_internal_helper']['visibility']}'"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_c_function_visibility_extern(c_project_with_header: Path) -> None:
|
|
190
|
+
"""Test that non-static functions not in headers get 'extern' visibility."""
|
|
191
|
+
builder = _make_builder(c_project_with_header)
|
|
192
|
+
result = builder.build_graph(clean=True)
|
|
193
|
+
|
|
194
|
+
func_query = """
|
|
195
|
+
MATCH (f:Function)
|
|
196
|
+
WHERE f.name = 'undeclared_extern_func'
|
|
197
|
+
RETURN f.name AS name, f.visibility AS visibility
|
|
198
|
+
"""
|
|
199
|
+
functions = builder.query(func_query)
|
|
200
|
+
|
|
201
|
+
assert len(functions) > 0, "undeclared_extern_func not found"
|
|
202
|
+
|
|
203
|
+
raw = functions[0].get("result", functions[0])
|
|
204
|
+
if isinstance(raw, (list, tuple)):
|
|
205
|
+
visibility = raw[1]
|
|
206
|
+
else:
|
|
207
|
+
visibility = raw.get("visibility")
|
|
208
|
+
|
|
209
|
+
assert visibility == "extern", (
|
|
210
|
+
f"undeclared_extern_func should be 'extern', got '{visibility}'"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def test_c_struct_member_extraction(c_struct_project: Path) -> None:
|
|
215
|
+
"""Test that struct members are extracted."""
|
|
216
|
+
builder = _make_builder(c_struct_project)
|
|
217
|
+
result = builder.build_graph(clean=True)
|
|
218
|
+
|
|
219
|
+
class_query = """
|
|
220
|
+
MATCH (c:Class)
|
|
221
|
+
RETURN c.name AS name, c.kind AS kind, c.parameters AS members, c.signature AS signature
|
|
222
|
+
"""
|
|
223
|
+
classes = builder.query(class_query)
|
|
224
|
+
|
|
225
|
+
class_map = {}
|
|
226
|
+
for row in classes:
|
|
227
|
+
raw = row.get("result", row)
|
|
228
|
+
if isinstance(raw, (list, tuple)):
|
|
229
|
+
class_map[raw[0]] = {
|
|
230
|
+
"kind": raw[1],
|
|
231
|
+
"members": raw[2],
|
|
232
|
+
"signature": raw[3],
|
|
233
|
+
}
|
|
234
|
+
elif isinstance(raw, dict):
|
|
235
|
+
class_map[raw.get("name", "")] = {
|
|
236
|
+
"kind": raw.get("kind"),
|
|
237
|
+
"members": raw.get("members"),
|
|
238
|
+
"signature": raw.get("signature"),
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
# Check struct
|
|
242
|
+
assert "config" in class_map, f"config struct not found. Available: {list(class_map.keys())}"
|
|
243
|
+
config = class_map["config"]
|
|
244
|
+
assert config["kind"] == "struct", f"Expected kind 'struct', got '{config['kind']}'"
|
|
245
|
+
assert config["members"] is not None, "config struct should have members"
|
|
246
|
+
assert len(config["members"]) >= 3, (
|
|
247
|
+
f"config struct should have at least 3 members, got {len(config['members'])}"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Check enum
|
|
251
|
+
assert "log_level" in class_map, f"log_level enum not found. Available: {list(class_map.keys())}"
|
|
252
|
+
log_level = class_map["log_level"]
|
|
253
|
+
assert log_level["kind"] == "enum", f"Expected kind 'enum', got '{log_level['kind']}'"
|
|
254
|
+
assert log_level["members"] is not None, "log_level enum should have members"
|
|
255
|
+
# Should contain LOG_DEBUG, LOG_INFO, LOG_WARN, LOG_ERROR
|
|
256
|
+
assert len(log_level["members"]) == 4, (
|
|
257
|
+
f"log_level enum should have 4 members, got {len(log_level['members'])}"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Check union
|
|
261
|
+
assert "data" in class_map, f"data union not found. Available: {list(class_map.keys())}"
|
|
262
|
+
data = class_map["data"]
|
|
263
|
+
assert data["kind"] == "union", f"Expected kind 'union', got '{data['kind']}'"
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def test_c_typedef_extraction(c_struct_project: Path) -> None:
|
|
267
|
+
"""Test that typedef declarations are extracted as Type nodes."""
|
|
268
|
+
builder = _make_builder(c_struct_project)
|
|
269
|
+
result = builder.build_graph(clean=True)
|
|
270
|
+
|
|
271
|
+
type_query = """
|
|
272
|
+
MATCH (t:Type)
|
|
273
|
+
RETURN t.name AS name, t.kind AS kind, t.signature AS signature
|
|
274
|
+
"""
|
|
275
|
+
types = builder.query(type_query)
|
|
276
|
+
|
|
277
|
+
type_map = {}
|
|
278
|
+
for row in types:
|
|
279
|
+
raw = row.get("result", row)
|
|
280
|
+
if isinstance(raw, (list, tuple)):
|
|
281
|
+
type_map[raw[0]] = {"kind": raw[1], "signature": raw[2]}
|
|
282
|
+
elif isinstance(raw, dict):
|
|
283
|
+
type_map[raw.get("name", "")] = {
|
|
284
|
+
"kind": raw.get("kind"),
|
|
285
|
+
"signature": raw.get("signature"),
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
assert "size_t_alias" in type_map, (
|
|
289
|
+
f"size_t_alias typedef not found. Available: {list(type_map.keys())}"
|
|
290
|
+
)
|
|
291
|
+
assert type_map["size_t_alias"]["kind"] == "typedef"
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_c_macro_extraction(c_struct_project: Path) -> None:
|
|
295
|
+
"""Test that #define macros are extracted."""
|
|
296
|
+
builder = _make_builder(c_struct_project)
|
|
297
|
+
result = builder.build_graph(clean=True)
|
|
298
|
+
|
|
299
|
+
# Macros are stored as Function nodes with kind='macro'
|
|
300
|
+
macro_query = """
|
|
301
|
+
MATCH (f:Function)
|
|
302
|
+
WHERE f.kind = 'macro'
|
|
303
|
+
RETURN f.name AS name, f.signature AS signature, f.visibility AS visibility
|
|
304
|
+
"""
|
|
305
|
+
macros = builder.query(macro_query)
|
|
306
|
+
|
|
307
|
+
macro_names = set()
|
|
308
|
+
for row in macros:
|
|
309
|
+
raw = row.get("result", row)
|
|
310
|
+
if isinstance(raw, (list, tuple)):
|
|
311
|
+
macro_names.add(raw[0])
|
|
312
|
+
elif isinstance(raw, dict):
|
|
313
|
+
macro_names.add(raw.get("name"))
|
|
314
|
+
|
|
315
|
+
assert "MAX_BUFSIZE" in macro_names, (
|
|
316
|
+
f"MAX_BUFSIZE macro not found. Available: {macro_names}"
|
|
317
|
+
)
|
|
318
|
+
assert "MIN" in macro_names, (
|
|
319
|
+
f"MIN macro not found. Available: {macro_names}"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def test_c_function_signature_extraction(c_project_with_header: Path) -> None:
|
|
324
|
+
"""Test that C function signatures are correctly built."""
|
|
325
|
+
builder = _make_builder(c_project_with_header)
|
|
326
|
+
result = builder.build_graph(clean=True)
|
|
327
|
+
|
|
328
|
+
func_query = """
|
|
329
|
+
MATCH (f:Function)
|
|
330
|
+
WHERE f.name = 'api_process'
|
|
331
|
+
RETURN f.name AS name, f.signature AS signature, f.return_type AS return_type,
|
|
332
|
+
f.parameters AS parameters
|
|
333
|
+
"""
|
|
334
|
+
functions = builder.query(func_query)
|
|
335
|
+
|
|
336
|
+
assert len(functions) > 0, "api_process not found"
|
|
337
|
+
|
|
338
|
+
raw = functions[0].get("result", functions[0])
|
|
339
|
+
if isinstance(raw, (list, tuple)):
|
|
340
|
+
signature = raw[1]
|
|
341
|
+
return_type = raw[2]
|
|
342
|
+
parameters = raw[3]
|
|
343
|
+
else:
|
|
344
|
+
signature = raw.get("signature")
|
|
345
|
+
return_type = raw.get("return_type")
|
|
346
|
+
parameters = raw.get("parameters")
|
|
347
|
+
|
|
348
|
+
assert signature is not None, "api_process should have a signature"
|
|
349
|
+
assert "api_process" in signature, f"Signature should contain function name: {signature}"
|
|
350
|
+
assert return_type is not None, "api_process should have a return type"
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def test_c_header_declarations_tracked(c_project_with_header: Path) -> None:
|
|
354
|
+
"""Test that header declarations are tracked for visibility resolution."""
|
|
355
|
+
builder = _make_builder(c_project_with_header)
|
|
356
|
+
result = builder.build_graph(clean=True)
|
|
357
|
+
|
|
358
|
+
# Query all functions from the .c file
|
|
359
|
+
func_query = """
|
|
360
|
+
MATCH (m:Module)-[:DEFINES]->(f:Function)
|
|
361
|
+
RETURN m.name AS module, f.name AS name, f.visibility AS visibility
|
|
362
|
+
"""
|
|
363
|
+
functions = builder.query(func_query)
|
|
364
|
+
|
|
365
|
+
c_file_funcs = {}
|
|
366
|
+
for row in functions:
|
|
367
|
+
raw = row.get("result", row)
|
|
368
|
+
if isinstance(raw, (list, tuple)):
|
|
369
|
+
mod_name = raw[0]
|
|
370
|
+
func_name = raw[1]
|
|
371
|
+
vis = raw[2]
|
|
372
|
+
elif isinstance(raw, dict):
|
|
373
|
+
mod_name = raw.get("module", "")
|
|
374
|
+
func_name = raw.get("name", "")
|
|
375
|
+
vis = raw.get("visibility")
|
|
376
|
+
else:
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
if mod_name and mod_name.endswith(".c"):
|
|
380
|
+
c_file_funcs[func_name] = vis
|
|
381
|
+
|
|
382
|
+
# Functions also in header should be public
|
|
383
|
+
for fname in ("api_init", "api_cleanup", "api_process"):
|
|
384
|
+
if fname in c_file_funcs:
|
|
385
|
+
assert c_file_funcs[fname] == "public", (
|
|
386
|
+
f"{fname} in .c file should be 'public' (declared in header), "
|
|
387
|
+
f"got '{c_file_funcs[fname]}'"
|
|
388
|
+
)
|