code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
"""Generate per-module and per-function API documentation from the knowledge graph.
|
|
2
|
+
|
|
3
|
+
Output layout (all under ``{artifact_dir}/api_docs/``):
|
|
4
|
+
|
|
5
|
+
index.md — L1 index: one row per module with summary counts
|
|
6
|
+
modules/
|
|
7
|
+
{module_qn}.md — L2 index: all interfaces in one module
|
|
8
|
+
funcs/
|
|
9
|
+
{func_qn}.md — L3 detail: signature, docstring, call graph
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from loguru import logger
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Data extraction helpers
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def _unpack_row(row: dict[str, Any]) -> list[Any]:
|
|
26
|
+
"""Normalise a Kùzu result row to a flat list.
|
|
27
|
+
|
|
28
|
+
Handles two formats:
|
|
29
|
+
- Legacy: {"result": [v1, v2, ...]}
|
|
30
|
+
- Named-column: {"col1": v1, "col2": v2, ...} (from KuzuIngestor.query())
|
|
31
|
+
"""
|
|
32
|
+
if "result" in row:
|
|
33
|
+
raw = row["result"]
|
|
34
|
+
return list(raw) if isinstance(raw, (list, tuple)) else [raw]
|
|
35
|
+
return list(row.values())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _build_call_graph(
|
|
39
|
+
call_rows: list[dict[str, Any]],
|
|
40
|
+
) -> tuple[dict[str, list[dict]], dict[str, list[dict]]]:
|
|
41
|
+
"""Build bidirectional call-graph mappings.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
(callers_of, callees_of) where each maps
|
|
45
|
+
qualified_name → list of {qn, path, start_line}.
|
|
46
|
+
"""
|
|
47
|
+
callers_of: dict[str, list[dict]] = defaultdict(list)
|
|
48
|
+
callees_of: dict[str, list[dict]] = defaultdict(list)
|
|
49
|
+
seen_edges: set[tuple[str, str]] = set()
|
|
50
|
+
|
|
51
|
+
for row in call_rows:
|
|
52
|
+
r = _unpack_row(row)
|
|
53
|
+
if len(r) < 2:
|
|
54
|
+
continue
|
|
55
|
+
caller_qn, callee_qn = r[0], r[1]
|
|
56
|
+
edge_key = (caller_qn, callee_qn)
|
|
57
|
+
if edge_key in seen_edges:
|
|
58
|
+
continue
|
|
59
|
+
seen_edges.add(edge_key)
|
|
60
|
+
|
|
61
|
+
callee_path = r[2] if len(r) > 2 else None
|
|
62
|
+
callee_start = r[3] if len(r) > 3 else None
|
|
63
|
+
|
|
64
|
+
callees_of[caller_qn].append({
|
|
65
|
+
"qn": callee_qn,
|
|
66
|
+
"path": callee_path,
|
|
67
|
+
"start_line": callee_start,
|
|
68
|
+
})
|
|
69
|
+
# Bug fix: callers_of stores *caller* info, not callee's location.
|
|
70
|
+
# Upstream query may not carry caller path yet, so use None defensively.
|
|
71
|
+
callers_of[callee_qn].append({
|
|
72
|
+
"qn": caller_qn,
|
|
73
|
+
"path": None,
|
|
74
|
+
"start_line": None,
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
return callers_of, callees_of
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
# Source code & call tree helpers
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
def _read_source_snippet(
|
|
85
|
+
path: str | None,
|
|
86
|
+
start_line: int | None,
|
|
87
|
+
end_line: int | None,
|
|
88
|
+
repo_path: Path | None = None,
|
|
89
|
+
) -> str | None:
|
|
90
|
+
"""Read function source code from the file system.
|
|
91
|
+
|
|
92
|
+
Returns the source code string or None if file cannot be read.
|
|
93
|
+
"""
|
|
94
|
+
if not path or not start_line or not end_line:
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
# Try absolute path first, then relative to repo_path
|
|
98
|
+
file_path = Path(path)
|
|
99
|
+
if not file_path.is_absolute() and repo_path:
|
|
100
|
+
file_path = repo_path / path
|
|
101
|
+
|
|
102
|
+
if not file_path.exists():
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
lines = file_path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
107
|
+
# start_line and end_line are 1-based
|
|
108
|
+
start = max(0, start_line - 1)
|
|
109
|
+
end = min(len(lines), end_line)
|
|
110
|
+
snippet = "\n".join(lines[start:end])
|
|
111
|
+
# Truncate very long functions
|
|
112
|
+
if len(snippet) > 3000:
|
|
113
|
+
snippet = snippet[:3000] + "\n /* ... truncated ... */"
|
|
114
|
+
return snippet
|
|
115
|
+
except (OSError, UnicodeDecodeError):
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _build_call_tree(
|
|
120
|
+
qn: str,
|
|
121
|
+
callees_of: dict[str, list[dict]],
|
|
122
|
+
func_lookup: dict[str, dict],
|
|
123
|
+
depth: int = 2,
|
|
124
|
+
_visited: set | None = None,
|
|
125
|
+
) -> list[str]:
|
|
126
|
+
"""Build ASCII call tree lines for a function, up to `depth` levels.
|
|
127
|
+
|
|
128
|
+
Returns list of strings like:
|
|
129
|
+
["├── func_b [static]", "│ └── func_c", "└── func_d"]
|
|
130
|
+
"""
|
|
131
|
+
if _visited is None:
|
|
132
|
+
_visited = set()
|
|
133
|
+
|
|
134
|
+
_visited.add(qn)
|
|
135
|
+
callees = callees_of.get(qn, [])
|
|
136
|
+
lines: list[str] = []
|
|
137
|
+
|
|
138
|
+
for i, callee in enumerate(callees):
|
|
139
|
+
callee_qn = callee["qn"]
|
|
140
|
+
callee_func = func_lookup.get(callee_qn, {})
|
|
141
|
+
name = callee_func.get("name", callee_qn.rsplit(".", 1)[-1])
|
|
142
|
+
vis = callee_func.get("visibility", "")
|
|
143
|
+
vis_tag = f" [{vis}]" if vis and vis != "public" else ""
|
|
144
|
+
|
|
145
|
+
is_last = (i == len(callees) - 1)
|
|
146
|
+
prefix = "└── " if is_last else "├── "
|
|
147
|
+
lines.append(f"{prefix}{name}{vis_tag}")
|
|
148
|
+
|
|
149
|
+
# Recurse if not visited and within depth
|
|
150
|
+
if depth > 1 and callee_qn not in _visited:
|
|
151
|
+
sub_lines = _build_call_tree(
|
|
152
|
+
callee_qn, callees_of, func_lookup, depth - 1, _visited
|
|
153
|
+
)
|
|
154
|
+
child_prefix = " " if is_last else "│ "
|
|
155
|
+
for sub_line in sub_lines:
|
|
156
|
+
lines.append(f"{child_prefix}{sub_line}")
|
|
157
|
+
|
|
158
|
+
return lines
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _infer_ownership(func: dict[str, Any]) -> list[str]:
|
|
162
|
+
"""Infer memory ownership hints from function signature using heuristics.
|
|
163
|
+
|
|
164
|
+
Returns list of strings describing ownership for each parameter and return value.
|
|
165
|
+
"""
|
|
166
|
+
hints: list[str] = []
|
|
167
|
+
name = func.get("name") or ""
|
|
168
|
+
return_type = func.get("return_type") or ""
|
|
169
|
+
|
|
170
|
+
# Return type ownership
|
|
171
|
+
if "*" in return_type:
|
|
172
|
+
if any(kw in name.lower() for kw in ("init", "create", "alloc", "new", "open", "dup", "clone")):
|
|
173
|
+
hints.append(f"返回 `{return_type}`: 调用方拥有,需释放")
|
|
174
|
+
elif any(kw in name.lower() for kw in ("get", "find", "lookup", "peek", "current")):
|
|
175
|
+
hints.append(f"返回 `{return_type}`: 借用,不可释放")
|
|
176
|
+
|
|
177
|
+
# Free/destroy patterns
|
|
178
|
+
if any(kw in name.lower() for kw in ("free", "destroy", "release", "close", "cleanup", "deinit")):
|
|
179
|
+
hints.append("释放函数:调用后指针失效")
|
|
180
|
+
|
|
181
|
+
return hints
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
# Markdown rendering
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
def _sanitise_filename(qn: str) -> str:
|
|
189
|
+
"""Convert a qualified name to a safe filename (no path separators).
|
|
190
|
+
|
|
191
|
+
macOS / Linux limit filenames to 255 bytes. For long C signatures that
|
|
192
|
+
include the full parameter list we truncate to 180 chars and append an
|
|
193
|
+
8-char hash so the name stays unique.
|
|
194
|
+
"""
|
|
195
|
+
import hashlib
|
|
196
|
+
safe = qn.replace("/", "_").replace("\\", "_").replace("\n", " ").replace("\r", "")
|
|
197
|
+
# Encode to bytes to measure the real byte length (UTF-8)
|
|
198
|
+
encoded = safe.encode("utf-8")
|
|
199
|
+
if len(encoded) <= 200:
|
|
200
|
+
return safe
|
|
201
|
+
# Truncate to 180 bytes (safe UTF-8 boundary) + 8-char hex hash
|
|
202
|
+
truncated = encoded[:180].decode("utf-8", errors="ignore").rstrip()
|
|
203
|
+
suffix = hashlib.md5(qn.encode("utf-8")).hexdigest()[:8]
|
|
204
|
+
return f"{truncated}_{suffix}"
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _render_func_detail(
|
|
208
|
+
func: dict[str, Any],
|
|
209
|
+
callers: list[dict],
|
|
210
|
+
callees: list[dict],
|
|
211
|
+
callees_of: dict[str, list[dict]] | None = None,
|
|
212
|
+
func_lookup: dict[str, dict] | None = None,
|
|
213
|
+
module_desc: str = "",
|
|
214
|
+
repo_path: Path | None = None,
|
|
215
|
+
) -> str:
|
|
216
|
+
"""Render L3 detail page for a single function (embedding-optimized)."""
|
|
217
|
+
if callees_of is None:
|
|
218
|
+
callees_of = {}
|
|
219
|
+
if func_lookup is None:
|
|
220
|
+
func_lookup = {}
|
|
221
|
+
|
|
222
|
+
lines: list[str] = []
|
|
223
|
+
qn = func["qn"]
|
|
224
|
+
name = func.get("name") or qn.rsplit(".", 1)[-1]
|
|
225
|
+
module_qn = func.get("module_qn", "")
|
|
226
|
+
kind = func.get("kind") or ""
|
|
227
|
+
|
|
228
|
+
# Title
|
|
229
|
+
lines.append(f"# {name}")
|
|
230
|
+
lines.append("")
|
|
231
|
+
|
|
232
|
+
# Semantic description line — the most important line for embedding retrieval
|
|
233
|
+
doc = (func.get("docstring") or "").strip()
|
|
234
|
+
if doc:
|
|
235
|
+
first_sentence = doc.split(".")[0].strip() + "." if "." in doc else doc
|
|
236
|
+
lines.append(f"> {first_sentence}")
|
|
237
|
+
else:
|
|
238
|
+
lines.append(f"> <!-- TODO: LLM generate description for {name} -->")
|
|
239
|
+
lines.append("")
|
|
240
|
+
|
|
241
|
+
# Metadata block
|
|
242
|
+
sig = func.get("signature") or name
|
|
243
|
+
if kind == "macro":
|
|
244
|
+
lines.append("- 类型: 宏定义")
|
|
245
|
+
lines.append(f"- 定义: `{sig}`")
|
|
246
|
+
else:
|
|
247
|
+
lines.append(f"- 签名: `{sig}`")
|
|
248
|
+
if func.get("return_type"):
|
|
249
|
+
lines.append(f"- 返回: `{func['return_type']}`")
|
|
250
|
+
|
|
251
|
+
vis = func.get("visibility") or "unknown"
|
|
252
|
+
loc_path = func.get("path") or ""
|
|
253
|
+
start = func.get("start_line") or "?"
|
|
254
|
+
end = func.get("end_line") or "?"
|
|
255
|
+
|
|
256
|
+
# Determine if declared in header
|
|
257
|
+
header_note = ""
|
|
258
|
+
if vis == "public" and loc_path:
|
|
259
|
+
header_name = Path(loc_path).stem + ".h"
|
|
260
|
+
header_note = f" | 头文件: {header_name}"
|
|
261
|
+
|
|
262
|
+
lines.append(f"- 可见性: {vis}{header_note}")
|
|
263
|
+
lines.append(f"- 位置: {loc_path}:{start}-{end}")
|
|
264
|
+
|
|
265
|
+
# Module with inline description for embedding context
|
|
266
|
+
if module_desc:
|
|
267
|
+
lines.append(f"- 模块: {module_qn} — {module_desc}")
|
|
268
|
+
else:
|
|
269
|
+
lines.append(f"- 模块: {module_qn}")
|
|
270
|
+
lines.append("")
|
|
271
|
+
|
|
272
|
+
# Full docstring (if longer than the summary line)
|
|
273
|
+
if doc and len(doc) > 80:
|
|
274
|
+
lines.append("## 描述")
|
|
275
|
+
lines.append("")
|
|
276
|
+
lines.append(doc)
|
|
277
|
+
lines.append("")
|
|
278
|
+
|
|
279
|
+
# Call tree (2-level, visual)
|
|
280
|
+
tree_lines = _build_call_tree(qn, callees_of, func_lookup, depth=2)
|
|
281
|
+
if tree_lines:
|
|
282
|
+
lines.append("## 调用树")
|
|
283
|
+
lines.append("")
|
|
284
|
+
lines.append(f"{name}")
|
|
285
|
+
lines.extend(tree_lines)
|
|
286
|
+
lines.append("")
|
|
287
|
+
|
|
288
|
+
# Called by
|
|
289
|
+
lines.append(f"## 被调用 ({len(callers)})")
|
|
290
|
+
lines.append("")
|
|
291
|
+
if callers:
|
|
292
|
+
for c in callers:
|
|
293
|
+
caller_func = func_lookup.get(c["qn"], {})
|
|
294
|
+
caller_module = caller_func.get("module_qn", "")
|
|
295
|
+
module_tag = f" ({caller_module})" if caller_module and caller_module != module_qn else ""
|
|
296
|
+
loc = ""
|
|
297
|
+
if c.get("path") and c.get("start_line"):
|
|
298
|
+
loc = f" → {c['path']}:{c['start_line']}"
|
|
299
|
+
lines.append(f"- {c['qn']}{module_tag}{loc}")
|
|
300
|
+
else:
|
|
301
|
+
lines.append("*(无调用者)*")
|
|
302
|
+
lines.append("")
|
|
303
|
+
|
|
304
|
+
# Parameters & memory ownership (C/C++ specific)
|
|
305
|
+
params = func.get("parameters")
|
|
306
|
+
ownership_hints = _infer_ownership(func)
|
|
307
|
+
if (params and isinstance(params, list) and any(p for p in params)) or ownership_hints:
|
|
308
|
+
lines.append("## 参数与内存")
|
|
309
|
+
lines.append("")
|
|
310
|
+
if params and isinstance(params, list):
|
|
311
|
+
lines.append("| 参数 | 方向 | 所有权 |")
|
|
312
|
+
lines.append("|------|------|--------|")
|
|
313
|
+
for p in params:
|
|
314
|
+
if not p:
|
|
315
|
+
continue
|
|
316
|
+
# Heuristic: const pointer = input/borrow, pointer = in-out
|
|
317
|
+
direction = "in"
|
|
318
|
+
ownership = ""
|
|
319
|
+
p_str = str(p)
|
|
320
|
+
if "*" in p_str:
|
|
321
|
+
if "const" in p_str:
|
|
322
|
+
direction = "in"
|
|
323
|
+
ownership = "借用"
|
|
324
|
+
else:
|
|
325
|
+
direction = "in/out"
|
|
326
|
+
ownership = "借用,可修改"
|
|
327
|
+
lines.append(f"| `{p_str}` | {direction} | {ownership} |")
|
|
328
|
+
lines.append("")
|
|
329
|
+
if ownership_hints:
|
|
330
|
+
for hint in ownership_hints:
|
|
331
|
+
lines.append(f"- {hint}")
|
|
332
|
+
lines.append("")
|
|
333
|
+
|
|
334
|
+
# Source code
|
|
335
|
+
if kind != "macro": # Macros already show definition in sig
|
|
336
|
+
source = _read_source_snippet(
|
|
337
|
+
func.get("path"), func.get("start_line"), func.get("end_line"), repo_path
|
|
338
|
+
)
|
|
339
|
+
if source:
|
|
340
|
+
lines.append("## 实现")
|
|
341
|
+
lines.append("")
|
|
342
|
+
# Detect language from file extension
|
|
343
|
+
ext = Path(loc_path).suffix if loc_path else ""
|
|
344
|
+
lang = "cpp" if ext in (".cpp", ".cc", ".cxx", ".hpp") else "c"
|
|
345
|
+
lines.append(f"```{lang}")
|
|
346
|
+
lines.append(source)
|
|
347
|
+
lines.append("```")
|
|
348
|
+
lines.append("")
|
|
349
|
+
|
|
350
|
+
return "\n".join(lines)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _render_module_page(
|
|
354
|
+
module_qn: str,
|
|
355
|
+
files: list[str],
|
|
356
|
+
funcs: list[dict[str, Any]],
|
|
357
|
+
types: list[dict[str, Any]],
|
|
358
|
+
callees_of: dict[str, list[dict]] | None = None,
|
|
359
|
+
func_lookup: dict[str, dict] | None = None,
|
|
360
|
+
module_desc: str = "",
|
|
361
|
+
) -> str:
|
|
362
|
+
"""Render L2 module index page."""
|
|
363
|
+
if callees_of is None:
|
|
364
|
+
callees_of = {}
|
|
365
|
+
if func_lookup is None:
|
|
366
|
+
func_lookup = {}
|
|
367
|
+
|
|
368
|
+
lines: list[str] = []
|
|
369
|
+
lines.append(f"# {module_qn}")
|
|
370
|
+
if module_desc:
|
|
371
|
+
lines.append("")
|
|
372
|
+
lines.append(f"> {module_desc}")
|
|
373
|
+
lines.append("")
|
|
374
|
+
|
|
375
|
+
# Header/implementation split
|
|
376
|
+
headers = [f for f in files if f.endswith((".h", ".hpp", ".hxx"))]
|
|
377
|
+
sources = [f for f in files if not f.endswith((".h", ".hpp", ".hxx"))]
|
|
378
|
+
if headers:
|
|
379
|
+
lines.append(f"**头文件**: {', '.join(headers)} | **实现**: {', '.join(sources) if sources else '—'}")
|
|
380
|
+
else:
|
|
381
|
+
lines.append(f"**文件**: {', '.join(files)}")
|
|
382
|
+
lines.append("")
|
|
383
|
+
|
|
384
|
+
# Separate macros from regular functions
|
|
385
|
+
regular_funcs = [f for f in funcs if f.get("kind") != "macro"]
|
|
386
|
+
macros = [f for f in funcs if f.get("kind") == "macro"]
|
|
387
|
+
|
|
388
|
+
# Call tree for public entry points
|
|
389
|
+
public_funcs = [f for f in regular_funcs if f.get("visibility") == "public"]
|
|
390
|
+
if public_funcs:
|
|
391
|
+
lines.append("## 调用树")
|
|
392
|
+
lines.append("")
|
|
393
|
+
for pf in public_funcs:
|
|
394
|
+
ret = f" → {pf['return_type']}" if pf.get("return_type") else ""
|
|
395
|
+
lines.append(f"{pf['name']}{ret}")
|
|
396
|
+
tree_lines = _build_call_tree(pf["qn"], callees_of, func_lookup, depth=2)
|
|
397
|
+
lines.extend(tree_lines)
|
|
398
|
+
lines.append("")
|
|
399
|
+
|
|
400
|
+
# Group functions by visibility
|
|
401
|
+
by_vis: dict[str, list[dict]] = defaultdict(list)
|
|
402
|
+
for f in regular_funcs:
|
|
403
|
+
by_vis[f.get("visibility") or "unknown"].append(f)
|
|
404
|
+
|
|
405
|
+
vis_order = ["public", "extern", "static", "unknown"]
|
|
406
|
+
vis_labels = {
|
|
407
|
+
"public": "公开接口",
|
|
408
|
+
"extern": "外部声明",
|
|
409
|
+
"static": "内部函数",
|
|
410
|
+
"unknown": "其他",
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
for vis in vis_order:
|
|
414
|
+
group = by_vis.get(vis)
|
|
415
|
+
if not group:
|
|
416
|
+
continue
|
|
417
|
+
lines.append(f"## {vis_labels.get(vis, vis)} ({len(group)})")
|
|
418
|
+
lines.append("")
|
|
419
|
+
lines.append("| 函数 | 签名 | 一句话 |")
|
|
420
|
+
lines.append("|------|------|--------|")
|
|
421
|
+
for f in group:
|
|
422
|
+
safe = _sanitise_filename(f["qn"])
|
|
423
|
+
sig = f.get("signature") or f["name"]
|
|
424
|
+
doc = (f.get("docstring") or "").strip()
|
|
425
|
+
brief = (doc.split(".")[0].strip() + ".") if doc and "." in doc else (doc or "—")
|
|
426
|
+
if len(brief) > 60:
|
|
427
|
+
brief = brief[:57] + "..."
|
|
428
|
+
lines.append(f"| [{f['name']}](../funcs/{safe}.md) | `{sig}` | {brief} |")
|
|
429
|
+
lines.append("")
|
|
430
|
+
|
|
431
|
+
# Types: structs, unions, enums with member info
|
|
432
|
+
if types:
|
|
433
|
+
# Group by kind
|
|
434
|
+
structs = [t for t in types if t.get("kind") in ("struct", None, "")]
|
|
435
|
+
unions = [t for t in types if t.get("kind") == "union"]
|
|
436
|
+
enums = [t for t in types if t.get("kind") == "enum"]
|
|
437
|
+
typedefs = [t for t in types if t.get("kind") == "typedef"]
|
|
438
|
+
|
|
439
|
+
if structs or unions:
|
|
440
|
+
lines.append(f"## 结构体 ({len(structs) + len(unions)})")
|
|
441
|
+
lines.append("")
|
|
442
|
+
for t in structs + unions:
|
|
443
|
+
kind_label = "union" if t.get("kind") == "union" else "struct"
|
|
444
|
+
lines.append(f"### {t.get('name', '?')} ({kind_label})")
|
|
445
|
+
lines.append("")
|
|
446
|
+
members = t.get("members") or t.get("parameters")
|
|
447
|
+
if members and isinstance(members, list):
|
|
448
|
+
for m in members:
|
|
449
|
+
if m:
|
|
450
|
+
lines.append(f"- `{m}`")
|
|
451
|
+
else:
|
|
452
|
+
sig = t.get("signature", "")
|
|
453
|
+
if sig:
|
|
454
|
+
lines.append(f"```c\n{sig}\n```")
|
|
455
|
+
lines.append("")
|
|
456
|
+
|
|
457
|
+
if enums:
|
|
458
|
+
lines.append(f"## 枚举 ({len(enums)})")
|
|
459
|
+
lines.append("")
|
|
460
|
+
for t in enums:
|
|
461
|
+
lines.append(f"### {t.get('name', '?')}")
|
|
462
|
+
lines.append("")
|
|
463
|
+
members = t.get("members") or t.get("parameters")
|
|
464
|
+
if members and isinstance(members, list):
|
|
465
|
+
lines.append(f"值: `{' | '.join(str(m) for m in members if m)}`")
|
|
466
|
+
else:
|
|
467
|
+
sig = t.get("signature", "")
|
|
468
|
+
if sig:
|
|
469
|
+
lines.append(f"```c\n{sig}\n```")
|
|
470
|
+
lines.append("")
|
|
471
|
+
|
|
472
|
+
if typedefs:
|
|
473
|
+
lines.append(f"## 类型别名 ({len(typedefs)})")
|
|
474
|
+
lines.append("")
|
|
475
|
+
lines.append("| 名称 | 定义 |")
|
|
476
|
+
lines.append("|------|------|")
|
|
477
|
+
for t in typedefs:
|
|
478
|
+
lines.append(f"| {t.get('name', '?')} | `{t.get('signature', '')}` |")
|
|
479
|
+
lines.append("")
|
|
480
|
+
|
|
481
|
+
# Macros
|
|
482
|
+
if macros:
|
|
483
|
+
lines.append(f"## 宏 ({len(macros)})")
|
|
484
|
+
lines.append("")
|
|
485
|
+
lines.append("| 宏 | 定义 |")
|
|
486
|
+
lines.append("|----|------|")
|
|
487
|
+
for m in macros:
|
|
488
|
+
sig = m.get("signature") or f"#define {m['name']}"
|
|
489
|
+
# Truncate long macro definitions
|
|
490
|
+
if len(sig) > 80:
|
|
491
|
+
sig = sig[:77] + "..."
|
|
492
|
+
lines.append(f"| {m['name']} | `{sig}` |")
|
|
493
|
+
lines.append("")
|
|
494
|
+
|
|
495
|
+
return "\n".join(lines)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _render_index(
|
|
499
|
+
module_summaries: list[dict[str, Any]],
|
|
500
|
+
total_funcs: int,
|
|
501
|
+
total_types: int,
|
|
502
|
+
import_graph: dict[str, list[str]] | None = None,
|
|
503
|
+
) -> str:
|
|
504
|
+
"""Render L1 global index page."""
|
|
505
|
+
lines: list[str] = []
|
|
506
|
+
lines.append("# API Documentation Index")
|
|
507
|
+
lines.append("")
|
|
508
|
+
lines.append(f"Total: {len(module_summaries)} modules, "
|
|
509
|
+
f"{total_funcs} functions, {total_types} types")
|
|
510
|
+
lines.append("")
|
|
511
|
+
|
|
512
|
+
# Module table with description column
|
|
513
|
+
lines.append("| 模块 | 职责 | 头文件 | 函数 | 类型 | 宏 |")
|
|
514
|
+
lines.append("|------|------|--------|------|------|----|")
|
|
515
|
+
|
|
516
|
+
for m in module_summaries:
|
|
517
|
+
safe = _sanitise_filename(m["qn"])
|
|
518
|
+
# Find header files
|
|
519
|
+
headers = [f for f in m["files"] if f.endswith((".h", ".hpp", ".hxx"))]
|
|
520
|
+
header_str = ", ".join(headers) if headers else "—"
|
|
521
|
+
desc = m.get("desc", "—")
|
|
522
|
+
macro_count = m.get("macros", 0)
|
|
523
|
+
func_count = m["public"] + m["static"] + m["extern"]
|
|
524
|
+
type_count = m["types"]
|
|
525
|
+
lines.append(
|
|
526
|
+
f"| [{m['qn']}](modules/{safe}.md) | {desc} "
|
|
527
|
+
f"| {header_str} | {func_count} | {type_count} | {macro_count} |"
|
|
528
|
+
)
|
|
529
|
+
lines.append("")
|
|
530
|
+
|
|
531
|
+
# Include dependency tree
|
|
532
|
+
if import_graph:
|
|
533
|
+
lines.append("## #include 依赖")
|
|
534
|
+
lines.append("")
|
|
535
|
+
# Find root modules (not imported by anyone)
|
|
536
|
+
all_imported: set[str] = set()
|
|
537
|
+
for targets in import_graph.values():
|
|
538
|
+
all_imported.update(targets)
|
|
539
|
+
roots = [m for m in import_graph if m not in all_imported]
|
|
540
|
+
if not roots:
|
|
541
|
+
roots = sorted(import_graph.keys())[:5]
|
|
542
|
+
|
|
543
|
+
visited: set[str] = set()
|
|
544
|
+
|
|
545
|
+
def _render_tree(mod: str, prefix: str = "", is_last: bool = True) -> None:
|
|
546
|
+
if mod in visited:
|
|
547
|
+
connector = "└── " if is_last else "├── "
|
|
548
|
+
lines.append(f"{prefix}{connector}{mod} (已展开)")
|
|
549
|
+
return
|
|
550
|
+
visited.add(mod)
|
|
551
|
+
connector = "└── " if is_last else "├── "
|
|
552
|
+
lines.append(f"{prefix}{connector}{mod}")
|
|
553
|
+
children = import_graph.get(mod, [])
|
|
554
|
+
for j, child in enumerate(children):
|
|
555
|
+
child_is_last = (j == len(children) - 1)
|
|
556
|
+
child_prefix = prefix + (" " if is_last else "│ ")
|
|
557
|
+
_render_tree(child, child_prefix, child_is_last)
|
|
558
|
+
|
|
559
|
+
for i, root in enumerate(sorted(roots)):
|
|
560
|
+
if i > 0:
|
|
561
|
+
lines.append("")
|
|
562
|
+
lines.append(root)
|
|
563
|
+
children = import_graph.get(root, [])
|
|
564
|
+
for j, child in enumerate(children):
|
|
565
|
+
_render_tree(child, "", j == len(children) - 1)
|
|
566
|
+
lines.append("")
|
|
567
|
+
|
|
568
|
+
return "\n".join(lines)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
# ---------------------------------------------------------------------------
|
|
572
|
+
# Public entry point
|
|
573
|
+
# ---------------------------------------------------------------------------
|
|
574
|
+
|
|
575
|
+
def generate_api_docs(
|
|
576
|
+
func_rows: list[dict[str, Any]],
|
|
577
|
+
type_rows: list[dict[str, Any]],
|
|
578
|
+
call_rows: list[dict[str, Any]],
|
|
579
|
+
output_dir: Path,
|
|
580
|
+
import_rows: list[dict[str, Any]] | None = None,
|
|
581
|
+
repo_path: Path | None = None,
|
|
582
|
+
) -> dict[str, Any]:
|
|
583
|
+
"""Generate hierarchical API documentation from pre-fetched graph data.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
func_rows: Rows from fetch_all_functions_for_docs query.
|
|
587
|
+
type_rows: Rows from fetch_all_types_for_docs query.
|
|
588
|
+
call_rows: Rows from fetch_all_calls query.
|
|
589
|
+
output_dir: Directory to write api_docs/ into.
|
|
590
|
+
import_rows: Rows from fetch_all_imports query (optional).
|
|
591
|
+
repo_path: Root path of the repository for source reading (optional).
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
Summary dict with module_count, func_count, type_count.
|
|
595
|
+
"""
|
|
596
|
+
api_dir = output_dir / "api_docs"
|
|
597
|
+
modules_dir = api_dir / "modules"
|
|
598
|
+
funcs_dir = api_dir / "funcs"
|
|
599
|
+
modules_dir.mkdir(parents=True, exist_ok=True)
|
|
600
|
+
funcs_dir.mkdir(parents=True, exist_ok=True)
|
|
601
|
+
|
|
602
|
+
callers_of, callees_of = _build_call_graph(call_rows)
|
|
603
|
+
|
|
604
|
+
# ---- Group functions by module ----
|
|
605
|
+
# module_qn → {files: set, funcs: list, types: list}
|
|
606
|
+
modules: dict[str, dict[str, Any]] = defaultdict(
|
|
607
|
+
lambda: {"files": set(), "funcs": [], "types": []}
|
|
608
|
+
)
|
|
609
|
+
seen_funcs: set[str] = set()
|
|
610
|
+
|
|
611
|
+
for row in func_rows:
|
|
612
|
+
r = _unpack_row(row)
|
|
613
|
+
if len(r) < 11:
|
|
614
|
+
continue
|
|
615
|
+
module_qn = r[0] or "unknown"
|
|
616
|
+
module_path = r[1] or ""
|
|
617
|
+
func_qn = r[2] or ""
|
|
618
|
+
if func_qn in seen_funcs:
|
|
619
|
+
continue
|
|
620
|
+
seen_funcs.add(func_qn)
|
|
621
|
+
func: dict[str, Any] = {
|
|
622
|
+
"module_qn": module_qn,
|
|
623
|
+
"qn": func_qn,
|
|
624
|
+
"name": r[3] or "",
|
|
625
|
+
"signature": r[4],
|
|
626
|
+
"return_type": r[5],
|
|
627
|
+
"visibility": r[6],
|
|
628
|
+
"parameters": r[7],
|
|
629
|
+
"docstring": r[8],
|
|
630
|
+
"start_line": r[9],
|
|
631
|
+
"end_line": r[10],
|
|
632
|
+
"path": r[11] if len(r) > 11 else module_path,
|
|
633
|
+
}
|
|
634
|
+
# Handle kind field (13th field, index 12)
|
|
635
|
+
if len(r) > 12:
|
|
636
|
+
func["kind"] = r[12]
|
|
637
|
+
modules[module_qn]["files"].add(module_path)
|
|
638
|
+
modules[module_qn]["funcs"].append(func)
|
|
639
|
+
|
|
640
|
+
for row in type_rows:
|
|
641
|
+
r = _unpack_row(row)
|
|
642
|
+
if len(r) < 6:
|
|
643
|
+
continue
|
|
644
|
+
# First column may be qualified_name (e.g., "mod.StructName") or module_qn
|
|
645
|
+
first_col = r[0] or "unknown"
|
|
646
|
+
type_name = r[1] or ""
|
|
647
|
+
# Derive module_qn: if first_col ends with ".type_name", strip it
|
|
648
|
+
if type_name and first_col.endswith(f".{type_name}"):
|
|
649
|
+
module_qn = first_col[: -(len(type_name) + 1)]
|
|
650
|
+
else:
|
|
651
|
+
module_qn = first_col
|
|
652
|
+
type_info: dict[str, Any] = {
|
|
653
|
+
"name": type_name,
|
|
654
|
+
"kind": r[2],
|
|
655
|
+
"signature": r[3],
|
|
656
|
+
}
|
|
657
|
+
# Handle both Class rows (7 fields with parameters) and Type rows (6 fields without)
|
|
658
|
+
if len(r) >= 7:
|
|
659
|
+
type_info["members"] = r[4] # parameters field contains members/enum values
|
|
660
|
+
type_info["start_line"] = r[5]
|
|
661
|
+
type_info["end_line"] = r[6]
|
|
662
|
+
else:
|
|
663
|
+
type_info["start_line"] = r[4]
|
|
664
|
+
type_info["end_line"] = r[5]
|
|
665
|
+
modules[module_qn]["types"].append(type_info)
|
|
666
|
+
|
|
667
|
+
# ---- Build func_lookup for call tree and caller enrichment ----
|
|
668
|
+
func_lookup: dict[str, dict] = {}
|
|
669
|
+
for mod_data in modules.values():
|
|
670
|
+
for func in mod_data["funcs"]:
|
|
671
|
+
if func["qn"]:
|
|
672
|
+
func_lookup[func["qn"]] = func
|
|
673
|
+
|
|
674
|
+
# ---- Build import graph ----
|
|
675
|
+
import_graph: dict[str, list[str]] = defaultdict(list)
|
|
676
|
+
if import_rows:
|
|
677
|
+
for row in import_rows:
|
|
678
|
+
r = _unpack_row(row)
|
|
679
|
+
if len(r) >= 2:
|
|
680
|
+
import_graph[r[0]].append(r[1])
|
|
681
|
+
|
|
682
|
+
# ---- Collect all known files per module ----
|
|
683
|
+
# Since .c and .h share module_qn, we need to discover both file paths.
|
|
684
|
+
# The func rows carry module_path (last-written, typically .c).
|
|
685
|
+
# We also query for any .h counterpart by checking the func paths.
|
|
686
|
+
for mod_data in modules.values():
|
|
687
|
+
paths = set()
|
|
688
|
+
for f in mod_data["funcs"]:
|
|
689
|
+
p = f.get("path") or ""
|
|
690
|
+
if p:
|
|
691
|
+
paths.add(Path(p).name)
|
|
692
|
+
mod_data["files"].update(paths)
|
|
693
|
+
# Remove empty strings
|
|
694
|
+
mod_data["files"].discard("")
|
|
695
|
+
|
|
696
|
+
# ---- Generate L3: per-function detail pages ----
|
|
697
|
+
total_funcs = 0
|
|
698
|
+
for mod_data in modules.values():
|
|
699
|
+
for func in mod_data["funcs"]:
|
|
700
|
+
qn = func["qn"]
|
|
701
|
+
if not qn:
|
|
702
|
+
continue
|
|
703
|
+
content = _render_func_detail(
|
|
704
|
+
func,
|
|
705
|
+
callers=callers_of.get(qn, []),
|
|
706
|
+
callees=callees_of.get(qn, []),
|
|
707
|
+
callees_of=callees_of,
|
|
708
|
+
func_lookup=func_lookup,
|
|
709
|
+
repo_path=repo_path,
|
|
710
|
+
)
|
|
711
|
+
safe = _sanitise_filename(qn)
|
|
712
|
+
(funcs_dir / f"{safe}.md").write_text(content, encoding="utf-8")
|
|
713
|
+
total_funcs += 1
|
|
714
|
+
|
|
715
|
+
# ---- Generate L2: per-module pages ----
|
|
716
|
+
module_summaries: list[dict[str, Any]] = []
|
|
717
|
+
for module_qn in sorted(modules):
|
|
718
|
+
mod_data = modules[module_qn]
|
|
719
|
+
funcs = mod_data["funcs"]
|
|
720
|
+
types = mod_data["types"]
|
|
721
|
+
files = sorted(mod_data["files"])
|
|
722
|
+
|
|
723
|
+
content = _render_module_page(
|
|
724
|
+
module_qn, files, funcs, types,
|
|
725
|
+
callees_of=callees_of,
|
|
726
|
+
func_lookup=func_lookup,
|
|
727
|
+
)
|
|
728
|
+
safe = _sanitise_filename(module_qn)
|
|
729
|
+
(modules_dir / f"{safe}.md").write_text(content, encoding="utf-8")
|
|
730
|
+
|
|
731
|
+
# Summary stats
|
|
732
|
+
vis_counts: dict[str, int] = defaultdict(int)
|
|
733
|
+
for f in funcs:
|
|
734
|
+
vis_counts[f.get("visibility") or "unknown"] += 1
|
|
735
|
+
macro_count = sum(1 for f in funcs if f.get("kind") == "macro")
|
|
736
|
+
|
|
737
|
+
module_summaries.append({
|
|
738
|
+
"qn": module_qn,
|
|
739
|
+
"files": files,
|
|
740
|
+
"public": vis_counts.get("public", 0),
|
|
741
|
+
"static": vis_counts.get("static", 0),
|
|
742
|
+
"extern": vis_counts.get("extern", 0),
|
|
743
|
+
"types": len(types),
|
|
744
|
+
"total": len(funcs) + len(types),
|
|
745
|
+
"macros": macro_count,
|
|
746
|
+
})
|
|
747
|
+
|
|
748
|
+
# ---- Generate L1: global index ----
|
|
749
|
+
total_types = sum(len(m["types"]) for m in modules.values())
|
|
750
|
+
index_content = _render_index(
|
|
751
|
+
module_summaries, total_funcs, total_types,
|
|
752
|
+
import_graph=dict(import_graph) if import_graph else None,
|
|
753
|
+
)
|
|
754
|
+
(api_dir / "index.md").write_text(index_content, encoding="utf-8")
|
|
755
|
+
|
|
756
|
+
logger.info(
|
|
757
|
+
f"API docs generated: {len(modules)} modules, "
|
|
758
|
+
f"{total_funcs} functions, {total_types} types"
|
|
759
|
+
)
|
|
760
|
+
return {
|
|
761
|
+
"module_count": len(modules),
|
|
762
|
+
"func_count": total_funcs,
|
|
763
|
+
"type_count": total_types,
|
|
764
|
+
}
|