code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,264 @@
1
+ """Step 1 integration test: graph-build on tinycc repository.
2
+
3
+ Builds a knowledge graph from the real tinycc C compiler source code,
4
+ then validates that nodes, relationships, and C-specific properties
5
+ (signatures, visibility, docstrings, macros, structs) are correctly extracted.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ import pytest
13
+
14
+ TINYCC_PATH = Path(__file__).resolve().parents[3] / "tinycc"
15
+
16
+ # Skip entire module if tinycc source is not available
17
+ pytestmark = pytest.mark.skipif(
18
+ not TINYCC_PATH.exists(),
19
+ reason=f"tinycc source not found at {TINYCC_PATH}",
20
+ )
21
+
22
+
23
+ @pytest.fixture(scope="module")
24
+ def builder(tmp_path_factory):
25
+ """Build the tinycc graph once for all tests in this module."""
26
+ from code_graph_builder.mcp.pipeline import build_graph
27
+
28
+ db_path = tmp_path_factory.mktemp("graph") / "graph.db"
29
+ b = build_graph(
30
+ repo_path=TINYCC_PATH,
31
+ db_path=db_path,
32
+ rebuild=True,
33
+ backend="kuzu",
34
+ )
35
+ yield b
36
+ if hasattr(b, "close"):
37
+ b.close()
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Basic graph structure
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ class TestGraphStructure:
46
+ """Verify the graph has expected node and relationship counts."""
47
+
48
+ def test_has_modules(self, builder):
49
+ rows = builder.query("MATCH (m:Module) RETURN count(m) AS cnt")
50
+ cnt = list(rows[0].values())[0] if rows else 0
51
+ assert cnt > 0, "Graph should have Module nodes"
52
+
53
+ def test_has_functions(self, builder):
54
+ rows = builder.query("MATCH (f:Function) RETURN count(f) AS cnt")
55
+ cnt = list(rows[0].values())[0] if rows else 0
56
+ assert cnt > 50, f"Expected many functions in tinycc, got {cnt}"
57
+
58
+ def test_has_calls(self, builder):
59
+ rows = builder.query("MATCH ()-[r:CALLS]->() RETURN count(r) AS cnt")
60
+ cnt = list(rows[0].values())[0] if rows else 0
61
+ assert cnt > 50, f"Expected many CALLS relationships, got {cnt}"
62
+
63
+ def test_has_defines(self, builder):
64
+ rows = builder.query("MATCH ()-[r:DEFINES]->() RETURN count(r) AS cnt")
65
+ cnt = list(rows[0].values())[0] if rows else 0
66
+ assert cnt > 0, "Graph should have DEFINES relationships"
67
+
68
+ def test_has_classes_or_types(self, builder):
69
+ """tinycc has structs, enums — should appear as Class or Type nodes."""
70
+ rows = builder.query(
71
+ "MATCH (c:Class) RETURN count(c) AS cnt"
72
+ )
73
+ cnt = list(rows[0].values())[0] if rows else 0
74
+ assert cnt > 0, "tinycc should have struct/enum/union Class nodes"
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # C-specific property extraction
79
+ # ---------------------------------------------------------------------------
80
+
81
+
82
+ class TestCProperties:
83
+ """Verify C-specific properties are extracted correctly."""
84
+
85
+ def test_function_has_signature(self, builder):
86
+ """At least some functions should have non-empty signatures."""
87
+ rows = builder.query(
88
+ "MATCH (f:Function) WHERE f.signature IS NOT NULL AND f.signature <> '' "
89
+ "RETURN count(f) AS cnt"
90
+ )
91
+ cnt = list(rows[0].values())[0] if rows else 0
92
+ assert cnt > 10, f"Expected functions with signatures, got {cnt}"
93
+
94
+ def test_function_has_return_type(self, builder):
95
+ rows = builder.query(
96
+ "MATCH (f:Function) WHERE f.return_type IS NOT NULL AND f.return_type <> '' "
97
+ "RETURN count(f) AS cnt"
98
+ )
99
+ cnt = list(rows[0].values())[0] if rows else 0
100
+ assert cnt > 10, f"Expected functions with return types, got {cnt}"
101
+
102
+ def test_function_has_visibility(self, builder):
103
+ """Functions should have public/static/extern visibility."""
104
+ rows = builder.query(
105
+ "MATCH (f:Function) WHERE f.visibility IN ['public', 'static', 'extern'] "
106
+ "RETURN f.visibility AS vis, count(f) AS cnt "
107
+ "ORDER BY cnt DESC"
108
+ )
109
+ assert len(rows) > 0, "Expected functions with visibility"
110
+ vis_types = {r["vis"] for r in rows}
111
+ assert "static" in vis_types, "tinycc should have static functions"
112
+
113
+ def test_static_functions_exist(self, builder):
114
+ """tinycc has many static helper functions."""
115
+ rows = builder.query(
116
+ "MATCH (f:Function) WHERE f.visibility = 'static' RETURN count(f) AS cnt"
117
+ )
118
+ cnt = list(rows[0].values())[0] if rows else 0
119
+ assert cnt > 20, f"Expected many static functions, got {cnt}"
120
+
121
+ def test_public_functions_exist(self, builder):
122
+ """Functions declared in .h files should be public."""
123
+ rows = builder.query(
124
+ "MATCH (f:Function) WHERE f.visibility = 'public' RETURN count(f) AS cnt"
125
+ )
126
+ cnt = list(rows[0].values())[0] if rows else 0
127
+ assert cnt > 0, f"Expected public functions from headers, got {cnt}"
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Comment/docstring extraction (P0 feature)
132
+ # ---------------------------------------------------------------------------
133
+
134
+
135
+ class TestDocstringExtraction:
136
+ """Verify C comments above functions are extracted as docstrings."""
137
+
138
+ def test_some_functions_have_docstrings(self, builder):
139
+ """tinycc has comments above many functions — some should be captured."""
140
+ rows = builder.query(
141
+ "MATCH (f:Function) WHERE f.docstring IS NOT NULL AND f.docstring <> '' "
142
+ "RETURN count(f) AS cnt"
143
+ )
144
+ cnt = list(rows[0].values())[0] if rows else 0
145
+ assert cnt > 0, "Expected some functions with extracted C comments as docstrings"
146
+
147
+ def test_docstring_not_decorative(self, builder):
148
+ """Extracted docstrings should not be purely decorative (e.g., '---')."""
149
+ rows = builder.query(
150
+ "MATCH (f:Function) WHERE f.docstring IS NOT NULL AND f.docstring <> '' "
151
+ "RETURN f.docstring AS doc LIMIT 20"
152
+ )
153
+ for r in rows:
154
+ doc = r["doc"]
155
+ # Should contain actual words, not just dashes/stars
156
+ assert any(c.isalpha() for c in doc), f"Decorative docstring leaked: {doc!r}"
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Macro extraction
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ class TestMacroExtraction:
165
+ """Verify #define macros are extracted as Function nodes with kind='macro'."""
166
+
167
+ def test_macros_exist(self, builder):
168
+ rows = builder.query(
169
+ "MATCH (f:Function) WHERE f.kind = 'macro' RETURN count(f) AS cnt"
170
+ )
171
+ cnt = list(rows[0].values())[0] if rows else 0
172
+ assert cnt > 0, "tinycc should have macro definitions"
173
+
174
+ def test_macro_has_signature(self, builder):
175
+ """Macro signature should contain the #define text."""
176
+ rows = builder.query(
177
+ "MATCH (f:Function) WHERE f.kind = 'macro' AND f.signature IS NOT NULL "
178
+ "RETURN f.name AS name, f.signature AS sig LIMIT 5"
179
+ )
180
+ assert len(rows) > 0
181
+ for r in rows:
182
+ assert r["name"], "Macro should have a name"
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # Struct/Enum extraction
187
+ # ---------------------------------------------------------------------------
188
+
189
+
190
+ class TestTypeExtraction:
191
+ """Verify struct/enum/union are extracted as Class nodes."""
192
+
193
+ def test_structs_exist(self, builder):
194
+ rows = builder.query(
195
+ "MATCH (c:Class) WHERE c.kind = 'struct' RETURN count(c) AS cnt"
196
+ )
197
+ cnt = list(rows[0].values())[0] if rows else 0
198
+ assert cnt > 0, "tinycc should have struct definitions"
199
+
200
+ def test_enums_exist(self, builder):
201
+ rows = builder.query(
202
+ "MATCH (c:Class) WHERE c.kind = 'enum' RETURN count(c) AS cnt"
203
+ )
204
+ cnt = list(rows[0].values())[0] if rows else 0
205
+ # tinycc may or may not have named enums, so just check >= 0
206
+ assert cnt >= 0
207
+
208
+ def test_class_has_kind(self, builder):
209
+ """All Class nodes should have a kind property (struct/enum/union)."""
210
+ rows = builder.query(
211
+ "MATCH (c:Class) WHERE c.kind IS NOT NULL RETURN DISTINCT c.kind AS kind"
212
+ )
213
+ kinds = {r["kind"] for r in rows}
214
+ assert len(kinds) > 0, "Class nodes should have kind property"
215
+
216
+
217
+ # ---------------------------------------------------------------------------
218
+ # Module-Function relationships
219
+ # ---------------------------------------------------------------------------
220
+
221
+
222
+ class TestRelationships:
223
+ """Verify graph relationships are correct."""
224
+
225
+ def test_most_functions_have_module(self, builder):
226
+ """Most functions extracted from source should have a parent module.
227
+
228
+ Note: CALLS edges can create Function stubs without DEFINES.
229
+ We check that the ratio of defined functions is high.
230
+ """
231
+ total = builder.query("MATCH (f:Function) RETURN count(f) AS cnt")
232
+ defined = builder.query(
233
+ "MATCH (m:Module)-[:DEFINES]->(f:Function) RETURN count(f) AS cnt"
234
+ )
235
+ total_cnt = list(total[0].values())[0] if total else 0
236
+ defined_cnt = list(defined[0].values())[0] if defined else 0
237
+ assert defined_cnt > 100, f"Expected many defined functions, got {defined_cnt}"
238
+ ratio = defined_cnt / total_cnt if total_cnt > 0 else 0
239
+ assert ratio > 0.05, f"Only {ratio:.1%} functions have parent module"
240
+
241
+ def test_calls_have_valid_endpoints(self, builder):
242
+ """CALLS relationships should connect existing functions."""
243
+ rows = builder.query(
244
+ "MATCH (a:Function)-[:CALLS]->(b:Function) "
245
+ "RETURN a.qualified_name AS caller, b.qualified_name AS callee "
246
+ "LIMIT 5"
247
+ )
248
+ assert len(rows) > 0
249
+ for r in rows:
250
+ assert r["caller"], "Caller should have qualified_name"
251
+ assert r["callee"], "Callee should have qualified_name"
252
+
253
+ def test_known_function_exists(self, builder):
254
+ """tinycc's main entry point 'tcc_main' should exist."""
255
+ rows = builder.query(
256
+ "MATCH (f:Function) WHERE f.name = 'tcc_main' RETURN f.qualified_name AS qn"
257
+ )
258
+ # tcc_main might be named differently, so just check it's queryable
259
+ # If not found, check for 'main' instead
260
+ if not rows:
261
+ rows = builder.query(
262
+ "MATCH (f:Function) WHERE f.name = 'main' RETURN f.qualified_name AS qn"
263
+ )
264
+ assert len(rows) > 0, "Should find tcc_main or main function"
@@ -0,0 +1,323 @@
1
+ """Step 2 integration test: API docs generation from tinycc graph.
2
+
3
+ Reuses the graph built in Step 1, generates L1/L2/L3 API documentation,
4
+ and validates file structure, content format, and C-specific features.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ import pytest
12
+
13
+ TINYCC_PATH = Path(__file__).resolve().parents[3] / "tinycc"
14
+
15
+ pytestmark = pytest.mark.skipif(
16
+ not TINYCC_PATH.exists(),
17
+ reason=f"tinycc source not found at {TINYCC_PATH}",
18
+ )
19
+
20
+
21
+ @pytest.fixture(scope="module")
22
+ def builder(tmp_path_factory):
23
+ """Build the tinycc graph once for all tests."""
24
+ from code_graph_builder.mcp.pipeline import build_graph
25
+
26
+ db_path = tmp_path_factory.mktemp("graph") / "graph.db"
27
+ b = build_graph(
28
+ repo_path=TINYCC_PATH,
29
+ db_path=db_path,
30
+ rebuild=True,
31
+ backend="kuzu",
32
+ )
33
+ yield b
34
+ if hasattr(b, "close"):
35
+ b.close()
36
+
37
+
38
+ @pytest.fixture(scope="module")
39
+ def api_docs_dir(builder, tmp_path_factory):
40
+ """Generate API docs and return the output directory."""
41
+ from code_graph_builder.mcp.pipeline import generate_api_docs_step
42
+
43
+ artifact_dir = tmp_path_factory.mktemp("artifacts")
44
+ result = generate_api_docs_step(
45
+ builder=builder,
46
+ artifact_dir=artifact_dir,
47
+ rebuild=True,
48
+ )
49
+ assert result["status"] == "success", f"API doc generation failed: {result}"
50
+ return artifact_dir / "api_docs", result
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # File structure
55
+ # ---------------------------------------------------------------------------
56
+
57
+
58
+ class TestFileStructure:
59
+ """Verify the three-level doc hierarchy is generated."""
60
+
61
+ def test_index_exists(self, api_docs_dir):
62
+ docs_dir, _ = api_docs_dir
63
+ assert (docs_dir / "index.md").exists()
64
+
65
+ def test_modules_dir_exists(self, api_docs_dir):
66
+ docs_dir, _ = api_docs_dir
67
+ assert (docs_dir / "modules").is_dir()
68
+
69
+ def test_funcs_dir_exists(self, api_docs_dir):
70
+ docs_dir, _ = api_docs_dir
71
+ assert (docs_dir / "funcs").is_dir()
72
+
73
+ def test_module_files_generated(self, api_docs_dir):
74
+ docs_dir, _ = api_docs_dir
75
+ module_files = list((docs_dir / "modules").glob("*.md"))
76
+ assert len(module_files) > 0, "Should generate module pages"
77
+
78
+ def test_func_files_generated(self, api_docs_dir):
79
+ docs_dir, _ = api_docs_dir
80
+ func_files = list((docs_dir / "funcs").glob("*.md"))
81
+ assert len(func_files) > 50, f"Expected many func pages, got {len(func_files)}"
82
+
83
+ def test_result_counts(self, api_docs_dir):
84
+ _, result = api_docs_dir
85
+ assert result["module_count"] > 0
86
+ assert result["func_count"] > 50
87
+ assert result["type_count"] >= 0
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # L1 index content
92
+ # ---------------------------------------------------------------------------
93
+
94
+
95
+ class TestL1Index:
96
+ """Verify the global index page content."""
97
+
98
+ def test_has_title(self, api_docs_dir):
99
+ docs_dir, _ = api_docs_dir
100
+ content = (docs_dir / "index.md").read_text(encoding="utf-8")
101
+ assert "# API Documentation Index" in content
102
+
103
+ def test_has_module_table(self, api_docs_dir):
104
+ docs_dir, _ = api_docs_dir
105
+ content = (docs_dir / "index.md").read_text(encoding="utf-8")
106
+ assert "| 模块" in content or "| Module" in content
107
+
108
+ def test_has_total_counts(self, api_docs_dir):
109
+ docs_dir, _ = api_docs_dir
110
+ content = (docs_dir / "index.md").read_text(encoding="utf-8")
111
+ assert "modules" in content.lower() or "模块" in content
112
+
113
+ def test_module_links(self, api_docs_dir):
114
+ docs_dir, _ = api_docs_dir
115
+ content = (docs_dir / "index.md").read_text(encoding="utf-8")
116
+ assert "modules/" in content, "Index should link to module pages"
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # L2 module page content
121
+ # ---------------------------------------------------------------------------
122
+
123
+
124
+ class TestL2ModulePage:
125
+ """Verify module-level documentation pages."""
126
+
127
+ def _get_any_module_page(self, api_docs_dir):
128
+ docs_dir, _ = api_docs_dir
129
+ pages = list((docs_dir / "modules").glob("*.md"))
130
+ assert len(pages) > 0
131
+ return pages[0].read_text(encoding="utf-8"), pages[0].name
132
+
133
+ def test_has_title(self, api_docs_dir):
134
+ content, _ = self._get_any_module_page(api_docs_dir)
135
+ assert content.startswith("# ")
136
+
137
+ def test_has_file_info(self, api_docs_dir):
138
+ content, _ = self._get_any_module_page(api_docs_dir)
139
+ # Should mention header or implementation files
140
+ assert "文件" in content or "头文件" in content or "Files" in content or ".c" in content
141
+
142
+ def test_has_function_table(self, api_docs_dir):
143
+ content, _ = self._get_any_module_page(api_docs_dir)
144
+ # Should have a table with function signatures
145
+ assert "|" in content, "Module page should have tables"
146
+
147
+ def test_links_to_func_pages(self, api_docs_dir):
148
+ """At least some module pages should link to function detail pages."""
149
+ docs_dir, _ = api_docs_dir
150
+ for page in (docs_dir / "modules").glob("*.md"):
151
+ content = page.read_text(encoding="utf-8")
152
+ if "../funcs/" in content:
153
+ return
154
+ pytest.fail("No module page links to function detail pages")
155
+
156
+ def test_visibility_sections(self, api_docs_dir):
157
+ """At least some module pages should have visibility-related content."""
158
+ docs_dir, _ = api_docs_dir
159
+ found = False
160
+ for page in (docs_dir / "modules").glob("*.md"):
161
+ content = page.read_text(encoding="utf-8")
162
+ if any(kw in content for kw in [
163
+ "公开接口", "内部函数", "外部声明", "其他",
164
+ "Public", "Static", "Extern",
165
+ "## 宏", # macro section is also a visibility grouping
166
+ ]):
167
+ found = True
168
+ break
169
+ assert found, "At least one module page should have visibility/type sections"
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # L3 function detail page content
174
+ # ---------------------------------------------------------------------------
175
+
176
+
177
+ class TestL3FuncDetail:
178
+ """Verify function detail documentation pages."""
179
+
180
+ def _get_func_page_with_content(self, api_docs_dir):
181
+ """Find a function page that has substantial content."""
182
+ docs_dir, _ = api_docs_dir
183
+ for page in sorted((docs_dir / "funcs").glob("*.md")):
184
+ content = page.read_text(encoding="utf-8")
185
+ if len(content) > 200: # Skip near-empty pages
186
+ return content, page.name
187
+ pytest.fail("No substantial function pages found")
188
+
189
+ def test_has_title(self, api_docs_dir):
190
+ content, _ = self._get_func_page_with_content(api_docs_dir)
191
+ assert content.startswith("# ")
192
+
193
+ def test_has_signature(self, api_docs_dir):
194
+ content, _ = self._get_func_page_with_content(api_docs_dir)
195
+ assert "签名" in content or "定义" in content or "Signature" in content
196
+
197
+ def test_has_visibility(self, api_docs_dir):
198
+ content, _ = self._get_func_page_with_content(api_docs_dir)
199
+ assert "可见性" in content or "Visibility" in content
200
+
201
+ def test_has_location(self, api_docs_dir):
202
+ content, _ = self._get_func_page_with_content(api_docs_dir)
203
+ assert "位置" in content or "Location" in content
204
+
205
+ def test_has_module_reference(self, api_docs_dir):
206
+ content, _ = self._get_func_page_with_content(api_docs_dir)
207
+ assert "模块" in content or "Module" in content
208
+
209
+ def test_has_called_by_section(self, api_docs_dir):
210
+ content, _ = self._get_func_page_with_content(api_docs_dir)
211
+ assert "被调用" in content or "Called by" in content
212
+
213
+ def test_has_description_or_todo(self, api_docs_dir):
214
+ """Function should have either a docstring description or TODO placeholder."""
215
+ content, _ = self._get_func_page_with_content(api_docs_dir)
216
+ has_desc = ">" in content # blockquote description line
217
+ assert has_desc, "Function page should have > description line"
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # C-specific doc features
222
+ # ---------------------------------------------------------------------------
223
+
224
+
225
+ class TestCSpecificDocs:
226
+ """Verify C/C++ specific documentation features."""
227
+
228
+ def test_macro_docs_generated(self, api_docs_dir):
229
+ """Macros should have their own function doc pages."""
230
+ docs_dir, _ = api_docs_dir
231
+ all_pages = list((docs_dir / "funcs").glob("*.md"))
232
+ macro_pages = []
233
+ for page in all_pages:
234
+ content = page.read_text(encoding="utf-8")
235
+ if "宏定义" in content or "macro" in content.lower():
236
+ macro_pages.append(page.name)
237
+ assert len(macro_pages) > 0, "Should have macro documentation pages"
238
+
239
+ def test_struct_docs_in_module_page(self, api_docs_dir):
240
+ """Module pages should document structs."""
241
+ docs_dir, _ = api_docs_dir
242
+ for page in (docs_dir / "modules").glob("*.md"):
243
+ content = page.read_text(encoding="utf-8")
244
+ if "struct" in content.lower() or "结构体" in content:
245
+ return # Found struct documentation
246
+ # Not all modules have structs, but at least some should
247
+ # Check if any types were generated at all
248
+ _, result = api_docs_dir
249
+ if result["type_count"] > 0:
250
+ pytest.fail("Types exist but no struct documentation found in module pages")
251
+
252
+ def test_signature_has_c_syntax(self, api_docs_dir):
253
+ """Some function pages should contain C-style signatures."""
254
+ docs_dir, _ = api_docs_dir
255
+ # Search for pages whose filename suggests a real C function (contains a dot separator)
256
+ # e.g., tinycc.tcc.tcc_compile.md — not just macro names
257
+ c_keywords = ["int ", "void ", "char ", "unsigned ", "long ", "struct ", "static "]
258
+ for page in (docs_dir / "funcs").glob("*.md"):
259
+ content = page.read_text(encoding="utf-8")
260
+ if "(" in content and any(kw in content for kw in c_keywords):
261
+ return # Found at least one
262
+ pytest.fail("Should find at least one C-style function signature in func docs")
263
+
264
+ def test_visibility_field_present(self, api_docs_dir):
265
+ """Function pages should have a visibility field."""
266
+ docs_dir, _ = api_docs_dir
267
+ checked = 0
268
+ has_visibility = 0
269
+ for page in list((docs_dir / "funcs").glob("*.md"))[:100]:
270
+ content = page.read_text(encoding="utf-8")
271
+ checked += 1
272
+ if "可见性:" in content or "Visibility:" in content:
273
+ has_visibility += 1
274
+ assert has_visibility > 0, "Function pages should have visibility field"
275
+
276
+ def test_docstring_in_description(self, api_docs_dir):
277
+ """Functions with extracted C comments should show them in description."""
278
+ docs_dir, _ = api_docs_dir
279
+ with_desc = 0
280
+ for page in (docs_dir / "funcs").glob("*.md"):
281
+ content = page.read_text(encoding="utf-8")
282
+ # Has a blockquote description that is NOT a TODO placeholder
283
+ for line in content.splitlines():
284
+ if line.startswith("> ") and "<!-- TODO" not in line:
285
+ with_desc += 1
286
+ break
287
+ assert with_desc > 0, "Some functions should have real descriptions from C comments"
288
+
289
+
290
+ # ---------------------------------------------------------------------------
291
+ # Consistency checks
292
+ # ---------------------------------------------------------------------------
293
+
294
+
295
+ class TestConsistency:
296
+ """Verify consistency between L1/L2/L3 levels."""
297
+
298
+ def test_module_count_matches_files(self, api_docs_dir):
299
+ docs_dir, result = api_docs_dir
300
+ module_files = list((docs_dir / "modules").glob("*.md"))
301
+ assert len(module_files) == result["module_count"], (
302
+ f"Module file count ({len(module_files)}) != result count ({result['module_count']})"
303
+ )
304
+
305
+ def test_func_count_matches_files(self, api_docs_dir):
306
+ """File count should closely match result count (small diff from filename collisions)."""
307
+ docs_dir, result = api_docs_dir
308
+ func_files = list((docs_dir / "funcs").glob("*.md"))
309
+ diff = abs(len(func_files) - result["func_count"])
310
+ assert diff <= 10, (
311
+ f"Func file count ({len(func_files)}) differs too much "
312
+ f"from result count ({result['func_count']}), diff={diff}"
313
+ )
314
+
315
+ def test_index_lists_all_modules(self, api_docs_dir):
316
+ """Index page should reference every module page."""
317
+ docs_dir, _ = api_docs_dir
318
+ index_content = (docs_dir / "index.md").read_text(encoding="utf-8")
319
+ module_files = list((docs_dir / "modules").glob("*.md"))
320
+ # At least 80% of module files should be referenced in index
321
+ referenced = sum(1 for f in module_files if f.stem in index_content)
322
+ ratio = referenced / len(module_files) if module_files else 1
323
+ assert ratio > 0.8, f"Only {ratio:.0%} modules referenced in index"