code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,226 @@
1
+ """Integration test: api-find (semantic search + API doc attachment) on tinycc.
2
+
3
+ Tests the full find_api pipeline: query → embedding → vector search →
4
+ API doc lookup → combined result. Validates relevance, doc attachment,
5
+ and result structure.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import os
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+ TINYCC_PATH = Path(__file__).resolve().parents[3] / "tinycc"
17
+
18
+ pytestmark = [
19
+ pytest.mark.skipif(
20
+ not TINYCC_PATH.exists(),
21
+ reason=f"tinycc source not found at {TINYCC_PATH}",
22
+ ),
23
+ pytest.mark.skipif(
24
+ not os.environ.get("DASHSCOPE_API_KEY"),
25
+ reason="DASHSCOPE_API_KEY not set",
26
+ ),
27
+ ]
28
+
29
+
30
+ @pytest.fixture(scope="module")
31
+ def mcp_registry(tmp_path_factory):
32
+ """Set up MCPToolsRegistry with fully indexed tinycc repo."""
33
+ from code_graph_builder.mcp.tools import MCPToolsRegistry
34
+
35
+ workspace = tmp_path_factory.mktemp("workspace")
36
+ registry = MCPToolsRegistry(workspace=workspace)
37
+
38
+ # Run full pipeline via initialize_repository handler
39
+ result = asyncio.get_event_loop().run_until_complete(
40
+ registry._handle_initialize_repository(
41
+ repo_path=str(TINYCC_PATH),
42
+ rebuild=True,
43
+ skip_wiki=True,
44
+ skip_embed=False,
45
+ )
46
+ )
47
+ assert result.get("status") == "success", f"Init failed: {result}"
48
+
49
+ yield registry
50
+ registry.close()
51
+
52
+
53
+ def _find_api(registry, query: str, top_k: int = 5) -> dict:
54
+ """Helper to call find_api synchronously."""
55
+ return asyncio.get_event_loop().run_until_complete(
56
+ registry._handle_find_api(query=query, top_k=top_k)
57
+ )
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Result structure
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ class TestResultStructure:
66
+ """Verify find_api returns well-structured results."""
67
+
68
+ def test_returns_dict(self, mcp_registry):
69
+ result = _find_api(mcp_registry, "compile")
70
+ assert isinstance(result, dict)
71
+
72
+ def test_has_required_keys(self, mcp_registry):
73
+ result = _find_api(mcp_registry, "compile")
74
+ assert "query" in result
75
+ assert "result_count" in result
76
+ assert "api_docs_available" in result
77
+ assert "results" in result
78
+
79
+ def test_query_echoed(self, mcp_registry):
80
+ result = _find_api(mcp_registry, "parse expression")
81
+ assert result["query"] == "parse expression"
82
+
83
+ def test_result_count_matches(self, mcp_registry):
84
+ result = _find_api(mcp_registry, "compile", top_k=3)
85
+ assert result["result_count"] == len(result["results"])
86
+ assert result["result_count"] <= 3
87
+
88
+ def test_api_docs_available(self, mcp_registry):
89
+ result = _find_api(mcp_registry, "compile")
90
+ assert result["api_docs_available"] is True
91
+
92
+ def test_result_entry_keys(self, mcp_registry):
93
+ result = _find_api(mcp_registry, "compile")
94
+ assert len(result["results"]) > 0
95
+ entry = result["results"][0]
96
+ expected_keys = {
97
+ "qualified_name", "name", "type", "score",
98
+ "file_path", "start_line", "end_line",
99
+ "source_code", "api_doc",
100
+ }
101
+ assert expected_keys.issubset(entry.keys())
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Search relevance
106
+ # ---------------------------------------------------------------------------
107
+
108
+
109
+ class TestSearchRelevance:
110
+ """Verify find_api returns relevant results for various queries."""
111
+
112
+ def test_search_compile(self, mcp_registry):
113
+ result = _find_api(mcp_registry, "compile source code")
114
+ qns = [r["qualified_name"] for r in result["results"]]
115
+ found = any("compile" in qn.lower() or "tcc" in qn.lower() for qn in qns)
116
+ assert found, f"Expected compile-related results, got: {qns}"
117
+
118
+ def test_search_parse(self, mcp_registry):
119
+ result = _find_api(mcp_registry, "parse C expression")
120
+ qns = [r["qualified_name"] for r in result["results"]]
121
+ found = any("parse" in qn.lower() or "expr" in qn.lower() for qn in qns)
122
+ assert found, f"Expected parse-related results, got: {qns}"
123
+
124
+ def test_search_memory(self, mcp_registry):
125
+ result = _find_api(mcp_registry, "allocate memory")
126
+ qns = [r["qualified_name"] for r in result["results"]]
127
+ found = any(
128
+ "alloc" in qn.lower() or "malloc" in qn.lower() or "mem" in qn.lower()
129
+ for qn in qns
130
+ )
131
+ assert found, f"Expected memory-related results, got: {qns}"
132
+
133
+ def test_scores_are_valid(self, mcp_registry):
134
+ result = _find_api(mcp_registry, "generate assembly code")
135
+ for r in result["results"]:
136
+ assert isinstance(r["score"], float)
137
+ assert 0.0 <= r["score"] <= 1.0
138
+
139
+ def test_scores_descending(self, mcp_registry):
140
+ result = _find_api(mcp_registry, "output binary")
141
+ scores = [r["score"] for r in result["results"]]
142
+ assert scores == sorted(scores, reverse=True)
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # API doc attachment
147
+ # ---------------------------------------------------------------------------
148
+
149
+
150
+ class TestApiDocAttachment:
151
+ """Verify L3 API docs are attached to search results."""
152
+
153
+ def test_some_results_have_api_doc(self, mcp_registry):
154
+ result = _find_api(mcp_registry, "compile source file", top_k=10)
155
+ with_doc = sum(1 for r in result["results"] if r["api_doc"])
156
+ assert with_doc > 0, "Some results should have API docs attached"
157
+
158
+ def test_api_doc_is_markdown(self, mcp_registry):
159
+ result = _find_api(mcp_registry, "parse tokens", top_k=10)
160
+ for r in result["results"]:
161
+ if r["api_doc"]:
162
+ assert r["api_doc"].startswith("# "), (
163
+ f"API doc should start with markdown title, got: {r['api_doc'][:50]}"
164
+ )
165
+ break
166
+
167
+ def test_api_doc_has_signature(self, mcp_registry):
168
+ """Attached API docs should contain function signature."""
169
+ result = _find_api(mcp_registry, "lexer tokenizer", top_k=10)
170
+ for r in result["results"]:
171
+ if r["api_doc"] and "签名:" in r["api_doc"]:
172
+ return # Found
173
+ # It's ok if some results don't have signatures (e.g., macros)
174
+ # Just check at least one doc was attached
175
+ with_doc = sum(1 for r in result["results"] if r["api_doc"])
176
+ if with_doc > 0:
177
+ return # Docs attached, signature format may vary
178
+ pytest.fail("No API docs attached to any result")
179
+
180
+ def test_api_doc_has_call_info(self, mcp_registry):
181
+ """Attached API docs should contain call relationship info."""
182
+ result = _find_api(mcp_registry, "compile", top_k=10)
183
+ for r in result["results"]:
184
+ if r["api_doc"] and "被调用" in r["api_doc"]:
185
+ return
186
+ pytest.fail("No API doc has call relationship info")
187
+
188
+
189
+ # ---------------------------------------------------------------------------
190
+ # Edge cases
191
+ # ---------------------------------------------------------------------------
192
+
193
+
194
+ class TestEdgeCases:
195
+ """Test edge cases and boundary conditions."""
196
+
197
+ def test_empty_query(self, mcp_registry):
198
+ """Empty query should still return results (or handle gracefully)."""
199
+ try:
200
+ result = _find_api(mcp_registry, "")
201
+ # Either returns empty or some results
202
+ assert isinstance(result, dict)
203
+ except Exception:
204
+ pass # Raising an error is also acceptable
205
+
206
+ def test_top_k_1(self, mcp_registry):
207
+ result = _find_api(mcp_registry, "main", top_k=1)
208
+ assert len(result["results"]) <= 1
209
+
210
+ def test_top_k_large(self, mcp_registry):
211
+ result = _find_api(mcp_registry, "function", top_k=50)
212
+ assert len(result["results"]) <= 50
213
+ assert len(result["results"]) > 0
214
+
215
+ def test_chinese_query(self, mcp_registry):
216
+ """Chinese natural language query should work."""
217
+ result = _find_api(mcp_registry, "编译源代码")
218
+ assert isinstance(result, dict)
219
+ assert result["result_count"] >= 0
220
+
221
+ def test_specific_function_name(self, mcp_registry):
222
+ """Querying an exact function name should find it."""
223
+ result = _find_api(mcp_registry, "tcc_compile", top_k=10)
224
+ qns = [r["qualified_name"] for r in result["results"]]
225
+ # Should find the function or something very related
226
+ assert len(qns) > 0
@@ -0,0 +1,78 @@
1
+ """Basic tests for code_graph_builder."""
2
+
3
+ import pytest
4
+
5
+
6
+ def test_import():
7
+ """Test basic imports."""
8
+ from code_graph_builder import CodeGraphBuilder, BuildResult
9
+ from code_graph_builder.constants import SupportedLanguage, NodeLabel
10
+ from code_graph_builder.types import GraphData, GraphSummary
11
+
12
+ assert CodeGraphBuilder is not None
13
+ assert BuildResult is not None
14
+ assert SupportedLanguage is not None
15
+ assert NodeLabel is not None
16
+ assert GraphData is not None
17
+ assert GraphSummary is not None
18
+
19
+
20
+ def test_constants():
21
+ """Test constants are defined correctly."""
22
+ from code_graph_builder.constants import SupportedLanguage, NodeLabel, RelationshipType
23
+
24
+ # Test SupportedLanguage enum
25
+ assert SupportedLanguage.PYTHON.value == "python"
26
+ assert SupportedLanguage.JS.value == "javascript"
27
+
28
+ # Test NodeLabel enum
29
+ assert NodeLabel.FUNCTION.value == "Function"
30
+ assert NodeLabel.CLASS.value == "Class"
31
+
32
+ # Test RelationshipType enum
33
+ assert RelationshipType.CALLS.value == "CALLS"
34
+ assert RelationshipType.DEFINES.value == "DEFINES"
35
+
36
+
37
+ def test_types():
38
+ """Test type definitions."""
39
+ from code_graph_builder.types import BuildResult, NodeType
40
+
41
+ # Test BuildResult
42
+ result = BuildResult(
43
+ project_name="test",
44
+ nodes_created=10,
45
+ relationships_created=5,
46
+ functions_found=3,
47
+ classes_found=2,
48
+ files_processed=1,
49
+ errors=[],
50
+ )
51
+ assert result.project_name == "test"
52
+ assert result.nodes_created == 10
53
+
54
+ # Test NodeType
55
+ assert NodeType.FUNCTION.value == "Function"
56
+ assert NodeType.METHOD.value == "Method"
57
+
58
+
59
+ def test_models():
60
+ """Test data models."""
61
+ from code_graph_builder.models import LanguageSpec, Dependency
62
+ from code_graph_builder.constants import SupportedLanguage
63
+
64
+ # Test LanguageSpec
65
+ spec = LanguageSpec(
66
+ language=SupportedLanguage.PYTHON,
67
+ file_extensions=(".py",),
68
+ function_node_types=("function_definition",),
69
+ class_node_types=("class_definition",),
70
+ module_node_types=("module",),
71
+ )
72
+ assert spec.language == SupportedLanguage.PYTHON
73
+ assert ".py" in spec.file_extensions
74
+
75
+ # Test Dependency
76
+ dep = Dependency(name="requests", spec=">=2.0.0")
77
+ assert dep.name == "requests"
78
+ assert dep.spec == ">=2.0.0"
@@ -0,0 +1,388 @@
1
+ """Tests for C language API interface extraction.
2
+
3
+ Tests cover:
4
+ - Function extraction with visibility (public/static/extern)
5
+ - Struct/union/enum member extraction
6
+ - Typedef extraction
7
+ - Macro extraction
8
+ - Header declaration tracking for visibility resolution
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+
15
+ import pytest
16
+
17
+
18
+ def _make_builder(project_path: Path):
19
+ """Create a CodeGraphBuilder with a project-specific DB path."""
20
+ from code_graph_builder.builder import CodeGraphBuilder
21
+
22
+ db_path = project_path / "test_graph.db"
23
+ return CodeGraphBuilder(
24
+ str(project_path),
25
+ backend_config={"db_path": str(db_path)},
26
+ )
27
+
28
+
29
+ @pytest.fixture
30
+ def c_project_with_header(tmp_path: Path) -> Path:
31
+ """Create a C project with header and source files."""
32
+ project_path = tmp_path / "c_api_project"
33
+ project_path.mkdir()
34
+
35
+ # Create a Makefile to be recognized as a C package
36
+ (project_path / "Makefile").write_text("all:\n\tgcc -o main main.c\n")
37
+
38
+ # Header file declaring public API
39
+ (project_path / "api.h").write_text(
40
+ """\
41
+ #ifndef API_H
42
+ #define API_H
43
+
44
+ typedef int error_code;
45
+ typedef struct point Point;
46
+
47
+ struct point {
48
+ int x;
49
+ int y;
50
+ };
51
+
52
+ enum color {
53
+ RED,
54
+ GREEN,
55
+ BLUE
56
+ };
57
+
58
+ union value {
59
+ int i;
60
+ float f;
61
+ char c;
62
+ };
63
+
64
+ #define MAX_SIZE 1024
65
+ #define VERSION "1.0.0"
66
+
67
+ int api_init(void);
68
+ void api_cleanup(void);
69
+ int api_process(const char *input, int len);
70
+
71
+ #endif
72
+ """
73
+ )
74
+
75
+ # Source file with implementations
76
+ (project_path / "api.c").write_text(
77
+ """\
78
+ #include "api.h"
79
+
80
+ static int _internal_helper(int x) {
81
+ return x * 2;
82
+ }
83
+
84
+ int api_init(void) {
85
+ return _internal_helper(0);
86
+ }
87
+
88
+ void api_cleanup(void) {
89
+ // cleanup
90
+ }
91
+
92
+ int api_process(const char *input, int len) {
93
+ return _internal_helper(len);
94
+ }
95
+
96
+ void undeclared_extern_func(void) {
97
+ // This function has external linkage but is not in a header
98
+ }
99
+ """
100
+ )
101
+
102
+ return project_path
103
+
104
+
105
+ @pytest.fixture
106
+ def c_struct_project(tmp_path: Path) -> Path:
107
+ """Create a C project focused on struct/union/enum definitions."""
108
+ project_path = tmp_path / "c_struct_project"
109
+ project_path.mkdir()
110
+
111
+ (project_path / "types.h").write_text(
112
+ """\
113
+ #ifndef TYPES_H
114
+ #define TYPES_H
115
+
116
+ typedef unsigned long size_t_alias;
117
+ typedef int (*callback_fn)(int, int);
118
+
119
+ struct config {
120
+ int width;
121
+ int height;
122
+ char *name;
123
+ float ratio;
124
+ };
125
+
126
+ enum log_level {
127
+ LOG_DEBUG,
128
+ LOG_INFO,
129
+ LOG_WARN,
130
+ LOG_ERROR
131
+ };
132
+
133
+ union data {
134
+ int integer;
135
+ double floating;
136
+ char string[32];
137
+ };
138
+
139
+ #define MAX_BUFSIZE 4096
140
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
141
+
142
+ #endif
143
+ """
144
+ )
145
+
146
+ return project_path
147
+
148
+
149
+ def test_c_function_visibility_header(c_project_with_header: Path) -> None:
150
+ """Test that functions declared in headers get 'public' visibility."""
151
+ builder = _make_builder(c_project_with_header)
152
+ result = builder.build_graph(clean=True)
153
+
154
+ assert result.nodes_created > 0, "No nodes were created"
155
+
156
+ # Query functions and their visibility
157
+ func_query = """
158
+ MATCH (f:Function)
159
+ RETURN f.name AS name, f.visibility AS visibility, f.signature AS signature
160
+ """
161
+ functions = builder.query(func_query)
162
+
163
+ func_map = {}
164
+ for row in functions:
165
+ raw = row.get("result", row)
166
+ if isinstance(raw, (list, tuple)):
167
+ func_map[raw[0]] = {"visibility": raw[1], "signature": raw[2]}
168
+ elif isinstance(raw, dict):
169
+ func_map[raw.get("name", "")] = {
170
+ "visibility": raw.get("visibility"),
171
+ "signature": raw.get("signature"),
172
+ }
173
+
174
+ # Functions declared in api.h should be "public"
175
+ assert "api_init" in func_map, f"api_init not found. Available: {list(func_map.keys())}"
176
+ assert func_map["api_init"]["visibility"] == "public", (
177
+ f"api_init should be 'public', got '{func_map['api_init']['visibility']}'"
178
+ )
179
+
180
+ # Static function should be "static"
181
+ assert "_internal_helper" in func_map, (
182
+ f"_internal_helper not found. Available: {list(func_map.keys())}"
183
+ )
184
+ assert func_map["_internal_helper"]["visibility"] == "static", (
185
+ f"_internal_helper should be 'static', got '{func_map['_internal_helper']['visibility']}'"
186
+ )
187
+
188
+
189
+ def test_c_function_visibility_extern(c_project_with_header: Path) -> None:
190
+ """Test that non-static functions not in headers get 'extern' visibility."""
191
+ builder = _make_builder(c_project_with_header)
192
+ result = builder.build_graph(clean=True)
193
+
194
+ func_query = """
195
+ MATCH (f:Function)
196
+ WHERE f.name = 'undeclared_extern_func'
197
+ RETURN f.name AS name, f.visibility AS visibility
198
+ """
199
+ functions = builder.query(func_query)
200
+
201
+ assert len(functions) > 0, "undeclared_extern_func not found"
202
+
203
+ raw = functions[0].get("result", functions[0])
204
+ if isinstance(raw, (list, tuple)):
205
+ visibility = raw[1]
206
+ else:
207
+ visibility = raw.get("visibility")
208
+
209
+ assert visibility == "extern", (
210
+ f"undeclared_extern_func should be 'extern', got '{visibility}'"
211
+ )
212
+
213
+
214
+ def test_c_struct_member_extraction(c_struct_project: Path) -> None:
215
+ """Test that struct members are extracted."""
216
+ builder = _make_builder(c_struct_project)
217
+ result = builder.build_graph(clean=True)
218
+
219
+ class_query = """
220
+ MATCH (c:Class)
221
+ RETURN c.name AS name, c.kind AS kind, c.parameters AS members, c.signature AS signature
222
+ """
223
+ classes = builder.query(class_query)
224
+
225
+ class_map = {}
226
+ for row in classes:
227
+ raw = row.get("result", row)
228
+ if isinstance(raw, (list, tuple)):
229
+ class_map[raw[0]] = {
230
+ "kind": raw[1],
231
+ "members": raw[2],
232
+ "signature": raw[3],
233
+ }
234
+ elif isinstance(raw, dict):
235
+ class_map[raw.get("name", "")] = {
236
+ "kind": raw.get("kind"),
237
+ "members": raw.get("members"),
238
+ "signature": raw.get("signature"),
239
+ }
240
+
241
+ # Check struct
242
+ assert "config" in class_map, f"config struct not found. Available: {list(class_map.keys())}"
243
+ config = class_map["config"]
244
+ assert config["kind"] == "struct", f"Expected kind 'struct', got '{config['kind']}'"
245
+ assert config["members"] is not None, "config struct should have members"
246
+ assert len(config["members"]) >= 3, (
247
+ f"config struct should have at least 3 members, got {len(config['members'])}"
248
+ )
249
+
250
+ # Check enum
251
+ assert "log_level" in class_map, f"log_level enum not found. Available: {list(class_map.keys())}"
252
+ log_level = class_map["log_level"]
253
+ assert log_level["kind"] == "enum", f"Expected kind 'enum', got '{log_level['kind']}'"
254
+ assert log_level["members"] is not None, "log_level enum should have members"
255
+ # Should contain LOG_DEBUG, LOG_INFO, LOG_WARN, LOG_ERROR
256
+ assert len(log_level["members"]) == 4, (
257
+ f"log_level enum should have 4 members, got {len(log_level['members'])}"
258
+ )
259
+
260
+ # Check union
261
+ assert "data" in class_map, f"data union not found. Available: {list(class_map.keys())}"
262
+ data = class_map["data"]
263
+ assert data["kind"] == "union", f"Expected kind 'union', got '{data['kind']}'"
264
+
265
+
266
+ def test_c_typedef_extraction(c_struct_project: Path) -> None:
267
+ """Test that typedef declarations are extracted as Type nodes."""
268
+ builder = _make_builder(c_struct_project)
269
+ result = builder.build_graph(clean=True)
270
+
271
+ type_query = """
272
+ MATCH (t:Type)
273
+ RETURN t.name AS name, t.kind AS kind, t.signature AS signature
274
+ """
275
+ types = builder.query(type_query)
276
+
277
+ type_map = {}
278
+ for row in types:
279
+ raw = row.get("result", row)
280
+ if isinstance(raw, (list, tuple)):
281
+ type_map[raw[0]] = {"kind": raw[1], "signature": raw[2]}
282
+ elif isinstance(raw, dict):
283
+ type_map[raw.get("name", "")] = {
284
+ "kind": raw.get("kind"),
285
+ "signature": raw.get("signature"),
286
+ }
287
+
288
+ assert "size_t_alias" in type_map, (
289
+ f"size_t_alias typedef not found. Available: {list(type_map.keys())}"
290
+ )
291
+ assert type_map["size_t_alias"]["kind"] == "typedef"
292
+
293
+
294
+ def test_c_macro_extraction(c_struct_project: Path) -> None:
295
+ """Test that #define macros are extracted."""
296
+ builder = _make_builder(c_struct_project)
297
+ result = builder.build_graph(clean=True)
298
+
299
+ # Macros are stored as Function nodes with kind='macro'
300
+ macro_query = """
301
+ MATCH (f:Function)
302
+ WHERE f.kind = 'macro'
303
+ RETURN f.name AS name, f.signature AS signature, f.visibility AS visibility
304
+ """
305
+ macros = builder.query(macro_query)
306
+
307
+ macro_names = set()
308
+ for row in macros:
309
+ raw = row.get("result", row)
310
+ if isinstance(raw, (list, tuple)):
311
+ macro_names.add(raw[0])
312
+ elif isinstance(raw, dict):
313
+ macro_names.add(raw.get("name"))
314
+
315
+ assert "MAX_BUFSIZE" in macro_names, (
316
+ f"MAX_BUFSIZE macro not found. Available: {macro_names}"
317
+ )
318
+ assert "MIN" in macro_names, (
319
+ f"MIN macro not found. Available: {macro_names}"
320
+ )
321
+
322
+
323
+ def test_c_function_signature_extraction(c_project_with_header: Path) -> None:
324
+ """Test that C function signatures are correctly built."""
325
+ builder = _make_builder(c_project_with_header)
326
+ result = builder.build_graph(clean=True)
327
+
328
+ func_query = """
329
+ MATCH (f:Function)
330
+ WHERE f.name = 'api_process'
331
+ RETURN f.name AS name, f.signature AS signature, f.return_type AS return_type,
332
+ f.parameters AS parameters
333
+ """
334
+ functions = builder.query(func_query)
335
+
336
+ assert len(functions) > 0, "api_process not found"
337
+
338
+ raw = functions[0].get("result", functions[0])
339
+ if isinstance(raw, (list, tuple)):
340
+ signature = raw[1]
341
+ return_type = raw[2]
342
+ parameters = raw[3]
343
+ else:
344
+ signature = raw.get("signature")
345
+ return_type = raw.get("return_type")
346
+ parameters = raw.get("parameters")
347
+
348
+ assert signature is not None, "api_process should have a signature"
349
+ assert "api_process" in signature, f"Signature should contain function name: {signature}"
350
+ assert return_type is not None, "api_process should have a return type"
351
+
352
+
353
+ def test_c_header_declarations_tracked(c_project_with_header: Path) -> None:
354
+ """Test that header declarations are tracked for visibility resolution."""
355
+ builder = _make_builder(c_project_with_header)
356
+ result = builder.build_graph(clean=True)
357
+
358
+ # Query all functions from the .c file
359
+ func_query = """
360
+ MATCH (m:Module)-[:DEFINES]->(f:Function)
361
+ RETURN m.name AS module, f.name AS name, f.visibility AS visibility
362
+ """
363
+ functions = builder.query(func_query)
364
+
365
+ c_file_funcs = {}
366
+ for row in functions:
367
+ raw = row.get("result", row)
368
+ if isinstance(raw, (list, tuple)):
369
+ mod_name = raw[0]
370
+ func_name = raw[1]
371
+ vis = raw[2]
372
+ elif isinstance(raw, dict):
373
+ mod_name = raw.get("module", "")
374
+ func_name = raw.get("name", "")
375
+ vis = raw.get("visibility")
376
+ else:
377
+ continue
378
+
379
+ if mod_name and mod_name.endswith(".c"):
380
+ c_file_funcs[func_name] = vis
381
+
382
+ # Functions also in header should be public
383
+ for fname in ("api_init", "api_cleanup", "api_process"):
384
+ if fname in c_file_funcs:
385
+ assert c_file_funcs[fname] == "public", (
386
+ f"{fname} in .c file should be 'public' (declared in header), "
387
+ f"got '{c_file_funcs[fname]}'"
388
+ )