code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,764 @@
1
+ """Generate per-module and per-function API documentation from the knowledge graph.
2
+
3
+ Output layout (all under ``{artifact_dir}/api_docs/``):
4
+
5
+ index.md — L1 index: one row per module with summary counts
6
+ modules/
7
+ {module_qn}.md — L2 index: all interfaces in one module
8
+ funcs/
9
+ {func_qn}.md — L3 detail: signature, docstring, call graph
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from collections import defaultdict
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from loguru import logger
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Data extraction helpers
23
+ # ---------------------------------------------------------------------------
24
+
25
+ def _unpack_row(row: dict[str, Any]) -> list[Any]:
26
+ """Normalise a Kùzu result row to a flat list.
27
+
28
+ Handles two formats:
29
+ - Legacy: {"result": [v1, v2, ...]}
30
+ - Named-column: {"col1": v1, "col2": v2, ...} (from KuzuIngestor.query())
31
+ """
32
+ if "result" in row:
33
+ raw = row["result"]
34
+ return list(raw) if isinstance(raw, (list, tuple)) else [raw]
35
+ return list(row.values())
36
+
37
+
38
+ def _build_call_graph(
39
+ call_rows: list[dict[str, Any]],
40
+ ) -> tuple[dict[str, list[dict]], dict[str, list[dict]]]:
41
+ """Build bidirectional call-graph mappings.
42
+
43
+ Returns:
44
+ (callers_of, callees_of) where each maps
45
+ qualified_name → list of {qn, path, start_line}.
46
+ """
47
+ callers_of: dict[str, list[dict]] = defaultdict(list)
48
+ callees_of: dict[str, list[dict]] = defaultdict(list)
49
+ seen_edges: set[tuple[str, str]] = set()
50
+
51
+ for row in call_rows:
52
+ r = _unpack_row(row)
53
+ if len(r) < 2:
54
+ continue
55
+ caller_qn, callee_qn = r[0], r[1]
56
+ edge_key = (caller_qn, callee_qn)
57
+ if edge_key in seen_edges:
58
+ continue
59
+ seen_edges.add(edge_key)
60
+
61
+ callee_path = r[2] if len(r) > 2 else None
62
+ callee_start = r[3] if len(r) > 3 else None
63
+
64
+ callees_of[caller_qn].append({
65
+ "qn": callee_qn,
66
+ "path": callee_path,
67
+ "start_line": callee_start,
68
+ })
69
+ # Bug fix: callers_of stores *caller* info, not callee's location.
70
+ # Upstream query may not carry caller path yet, so use None defensively.
71
+ callers_of[callee_qn].append({
72
+ "qn": caller_qn,
73
+ "path": None,
74
+ "start_line": None,
75
+ })
76
+
77
+ return callers_of, callees_of
78
+
79
+
80
+ # ---------------------------------------------------------------------------
81
+ # Source code & call tree helpers
82
+ # ---------------------------------------------------------------------------
83
+
84
+ def _read_source_snippet(
85
+ path: str | None,
86
+ start_line: int | None,
87
+ end_line: int | None,
88
+ repo_path: Path | None = None,
89
+ ) -> str | None:
90
+ """Read function source code from the file system.
91
+
92
+ Returns the source code string or None if file cannot be read.
93
+ """
94
+ if not path or not start_line or not end_line:
95
+ return None
96
+
97
+ # Try absolute path first, then relative to repo_path
98
+ file_path = Path(path)
99
+ if not file_path.is_absolute() and repo_path:
100
+ file_path = repo_path / path
101
+
102
+ if not file_path.exists():
103
+ return None
104
+
105
+ try:
106
+ lines = file_path.read_text(encoding="utf-8", errors="replace").splitlines()
107
+ # start_line and end_line are 1-based
108
+ start = max(0, start_line - 1)
109
+ end = min(len(lines), end_line)
110
+ snippet = "\n".join(lines[start:end])
111
+ # Truncate very long functions
112
+ if len(snippet) > 3000:
113
+ snippet = snippet[:3000] + "\n /* ... truncated ... */"
114
+ return snippet
115
+ except (OSError, UnicodeDecodeError):
116
+ return None
117
+
118
+
119
+ def _build_call_tree(
120
+ qn: str,
121
+ callees_of: dict[str, list[dict]],
122
+ func_lookup: dict[str, dict],
123
+ depth: int = 2,
124
+ _visited: set | None = None,
125
+ ) -> list[str]:
126
+ """Build ASCII call tree lines for a function, up to `depth` levels.
127
+
128
+ Returns list of strings like:
129
+ ["├── func_b [static]", "│ └── func_c", "└── func_d"]
130
+ """
131
+ if _visited is None:
132
+ _visited = set()
133
+
134
+ _visited.add(qn)
135
+ callees = callees_of.get(qn, [])
136
+ lines: list[str] = []
137
+
138
+ for i, callee in enumerate(callees):
139
+ callee_qn = callee["qn"]
140
+ callee_func = func_lookup.get(callee_qn, {})
141
+ name = callee_func.get("name", callee_qn.rsplit(".", 1)[-1])
142
+ vis = callee_func.get("visibility", "")
143
+ vis_tag = f" [{vis}]" if vis and vis != "public" else ""
144
+
145
+ is_last = (i == len(callees) - 1)
146
+ prefix = "└── " if is_last else "├── "
147
+ lines.append(f"{prefix}{name}{vis_tag}")
148
+
149
+ # Recurse if not visited and within depth
150
+ if depth > 1 and callee_qn not in _visited:
151
+ sub_lines = _build_call_tree(
152
+ callee_qn, callees_of, func_lookup, depth - 1, _visited
153
+ )
154
+ child_prefix = " " if is_last else "│ "
155
+ for sub_line in sub_lines:
156
+ lines.append(f"{child_prefix}{sub_line}")
157
+
158
+ return lines
159
+
160
+
161
+ def _infer_ownership(func: dict[str, Any]) -> list[str]:
162
+ """Infer memory ownership hints from function signature using heuristics.
163
+
164
+ Returns list of strings describing ownership for each parameter and return value.
165
+ """
166
+ hints: list[str] = []
167
+ name = func.get("name") or ""
168
+ return_type = func.get("return_type") or ""
169
+
170
+ # Return type ownership
171
+ if "*" in return_type:
172
+ if any(kw in name.lower() for kw in ("init", "create", "alloc", "new", "open", "dup", "clone")):
173
+ hints.append(f"返回 `{return_type}`: 调用方拥有,需释放")
174
+ elif any(kw in name.lower() for kw in ("get", "find", "lookup", "peek", "current")):
175
+ hints.append(f"返回 `{return_type}`: 借用,不可释放")
176
+
177
+ # Free/destroy patterns
178
+ if any(kw in name.lower() for kw in ("free", "destroy", "release", "close", "cleanup", "deinit")):
179
+ hints.append("释放函数:调用后指针失效")
180
+
181
+ return hints
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # Markdown rendering
186
+ # ---------------------------------------------------------------------------
187
+
188
+ def _sanitise_filename(qn: str) -> str:
189
+ """Convert a qualified name to a safe filename (no path separators).
190
+
191
+ macOS / Linux limit filenames to 255 bytes. For long C signatures that
192
+ include the full parameter list we truncate to 180 chars and append an
193
+ 8-char hash so the name stays unique.
194
+ """
195
+ import hashlib
196
+ safe = qn.replace("/", "_").replace("\\", "_").replace("\n", " ").replace("\r", "")
197
+ # Encode to bytes to measure the real byte length (UTF-8)
198
+ encoded = safe.encode("utf-8")
199
+ if len(encoded) <= 200:
200
+ return safe
201
+ # Truncate to 180 bytes (safe UTF-8 boundary) + 8-char hex hash
202
+ truncated = encoded[:180].decode("utf-8", errors="ignore").rstrip()
203
+ suffix = hashlib.md5(qn.encode("utf-8")).hexdigest()[:8]
204
+ return f"{truncated}_{suffix}"
205
+
206
+
207
+ def _render_func_detail(
208
+ func: dict[str, Any],
209
+ callers: list[dict],
210
+ callees: list[dict],
211
+ callees_of: dict[str, list[dict]] | None = None,
212
+ func_lookup: dict[str, dict] | None = None,
213
+ module_desc: str = "",
214
+ repo_path: Path | None = None,
215
+ ) -> str:
216
+ """Render L3 detail page for a single function (embedding-optimized)."""
217
+ if callees_of is None:
218
+ callees_of = {}
219
+ if func_lookup is None:
220
+ func_lookup = {}
221
+
222
+ lines: list[str] = []
223
+ qn = func["qn"]
224
+ name = func.get("name") or qn.rsplit(".", 1)[-1]
225
+ module_qn = func.get("module_qn", "")
226
+ kind = func.get("kind") or ""
227
+
228
+ # Title
229
+ lines.append(f"# {name}")
230
+ lines.append("")
231
+
232
+ # Semantic description line — the most important line for embedding retrieval
233
+ doc = (func.get("docstring") or "").strip()
234
+ if doc:
235
+ first_sentence = doc.split(".")[0].strip() + "." if "." in doc else doc
236
+ lines.append(f"> {first_sentence}")
237
+ else:
238
+ lines.append(f"> <!-- TODO: LLM generate description for {name} -->")
239
+ lines.append("")
240
+
241
+ # Metadata block
242
+ sig = func.get("signature") or name
243
+ if kind == "macro":
244
+ lines.append("- 类型: 宏定义")
245
+ lines.append(f"- 定义: `{sig}`")
246
+ else:
247
+ lines.append(f"- 签名: `{sig}`")
248
+ if func.get("return_type"):
249
+ lines.append(f"- 返回: `{func['return_type']}`")
250
+
251
+ vis = func.get("visibility") or "unknown"
252
+ loc_path = func.get("path") or ""
253
+ start = func.get("start_line") or "?"
254
+ end = func.get("end_line") or "?"
255
+
256
+ # Determine if declared in header
257
+ header_note = ""
258
+ if vis == "public" and loc_path:
259
+ header_name = Path(loc_path).stem + ".h"
260
+ header_note = f" | 头文件: {header_name}"
261
+
262
+ lines.append(f"- 可见性: {vis}{header_note}")
263
+ lines.append(f"- 位置: {loc_path}:{start}-{end}")
264
+
265
+ # Module with inline description for embedding context
266
+ if module_desc:
267
+ lines.append(f"- 模块: {module_qn} — {module_desc}")
268
+ else:
269
+ lines.append(f"- 模块: {module_qn}")
270
+ lines.append("")
271
+
272
+ # Full docstring (if longer than the summary line)
273
+ if doc and len(doc) > 80:
274
+ lines.append("## 描述")
275
+ lines.append("")
276
+ lines.append(doc)
277
+ lines.append("")
278
+
279
+ # Call tree (2-level, visual)
280
+ tree_lines = _build_call_tree(qn, callees_of, func_lookup, depth=2)
281
+ if tree_lines:
282
+ lines.append("## 调用树")
283
+ lines.append("")
284
+ lines.append(f"{name}")
285
+ lines.extend(tree_lines)
286
+ lines.append("")
287
+
288
+ # Called by
289
+ lines.append(f"## 被调用 ({len(callers)})")
290
+ lines.append("")
291
+ if callers:
292
+ for c in callers:
293
+ caller_func = func_lookup.get(c["qn"], {})
294
+ caller_module = caller_func.get("module_qn", "")
295
+ module_tag = f" ({caller_module})" if caller_module and caller_module != module_qn else ""
296
+ loc = ""
297
+ if c.get("path") and c.get("start_line"):
298
+ loc = f" → {c['path']}:{c['start_line']}"
299
+ lines.append(f"- {c['qn']}{module_tag}{loc}")
300
+ else:
301
+ lines.append("*(无调用者)*")
302
+ lines.append("")
303
+
304
+ # Parameters & memory ownership (C/C++ specific)
305
+ params = func.get("parameters")
306
+ ownership_hints = _infer_ownership(func)
307
+ if (params and isinstance(params, list) and any(p for p in params)) or ownership_hints:
308
+ lines.append("## 参数与内存")
309
+ lines.append("")
310
+ if params and isinstance(params, list):
311
+ lines.append("| 参数 | 方向 | 所有权 |")
312
+ lines.append("|------|------|--------|")
313
+ for p in params:
314
+ if not p:
315
+ continue
316
+ # Heuristic: const pointer = input/borrow, pointer = in-out
317
+ direction = "in"
318
+ ownership = ""
319
+ p_str = str(p)
320
+ if "*" in p_str:
321
+ if "const" in p_str:
322
+ direction = "in"
323
+ ownership = "借用"
324
+ else:
325
+ direction = "in/out"
326
+ ownership = "借用,可修改"
327
+ lines.append(f"| `{p_str}` | {direction} | {ownership} |")
328
+ lines.append("")
329
+ if ownership_hints:
330
+ for hint in ownership_hints:
331
+ lines.append(f"- {hint}")
332
+ lines.append("")
333
+
334
+ # Source code
335
+ if kind != "macro": # Macros already show definition in sig
336
+ source = _read_source_snippet(
337
+ func.get("path"), func.get("start_line"), func.get("end_line"), repo_path
338
+ )
339
+ if source:
340
+ lines.append("## 实现")
341
+ lines.append("")
342
+ # Detect language from file extension
343
+ ext = Path(loc_path).suffix if loc_path else ""
344
+ lang = "cpp" if ext in (".cpp", ".cc", ".cxx", ".hpp") else "c"
345
+ lines.append(f"```{lang}")
346
+ lines.append(source)
347
+ lines.append("```")
348
+ lines.append("")
349
+
350
+ return "\n".join(lines)
351
+
352
+
353
+ def _render_module_page(
354
+ module_qn: str,
355
+ files: list[str],
356
+ funcs: list[dict[str, Any]],
357
+ types: list[dict[str, Any]],
358
+ callees_of: dict[str, list[dict]] | None = None,
359
+ func_lookup: dict[str, dict] | None = None,
360
+ module_desc: str = "",
361
+ ) -> str:
362
+ """Render L2 module index page."""
363
+ if callees_of is None:
364
+ callees_of = {}
365
+ if func_lookup is None:
366
+ func_lookup = {}
367
+
368
+ lines: list[str] = []
369
+ lines.append(f"# {module_qn}")
370
+ if module_desc:
371
+ lines.append("")
372
+ lines.append(f"> {module_desc}")
373
+ lines.append("")
374
+
375
+ # Header/implementation split
376
+ headers = [f for f in files if f.endswith((".h", ".hpp", ".hxx"))]
377
+ sources = [f for f in files if not f.endswith((".h", ".hpp", ".hxx"))]
378
+ if headers:
379
+ lines.append(f"**头文件**: {', '.join(headers)} | **实现**: {', '.join(sources) if sources else '—'}")
380
+ else:
381
+ lines.append(f"**文件**: {', '.join(files)}")
382
+ lines.append("")
383
+
384
+ # Separate macros from regular functions
385
+ regular_funcs = [f for f in funcs if f.get("kind") != "macro"]
386
+ macros = [f for f in funcs if f.get("kind") == "macro"]
387
+
388
+ # Call tree for public entry points
389
+ public_funcs = [f for f in regular_funcs if f.get("visibility") == "public"]
390
+ if public_funcs:
391
+ lines.append("## 调用树")
392
+ lines.append("")
393
+ for pf in public_funcs:
394
+ ret = f" → {pf['return_type']}" if pf.get("return_type") else ""
395
+ lines.append(f"{pf['name']}{ret}")
396
+ tree_lines = _build_call_tree(pf["qn"], callees_of, func_lookup, depth=2)
397
+ lines.extend(tree_lines)
398
+ lines.append("")
399
+
400
+ # Group functions by visibility
401
+ by_vis: dict[str, list[dict]] = defaultdict(list)
402
+ for f in regular_funcs:
403
+ by_vis[f.get("visibility") or "unknown"].append(f)
404
+
405
+ vis_order = ["public", "extern", "static", "unknown"]
406
+ vis_labels = {
407
+ "public": "公开接口",
408
+ "extern": "外部声明",
409
+ "static": "内部函数",
410
+ "unknown": "其他",
411
+ }
412
+
413
+ for vis in vis_order:
414
+ group = by_vis.get(vis)
415
+ if not group:
416
+ continue
417
+ lines.append(f"## {vis_labels.get(vis, vis)} ({len(group)})")
418
+ lines.append("")
419
+ lines.append("| 函数 | 签名 | 一句话 |")
420
+ lines.append("|------|------|--------|")
421
+ for f in group:
422
+ safe = _sanitise_filename(f["qn"])
423
+ sig = f.get("signature") or f["name"]
424
+ doc = (f.get("docstring") or "").strip()
425
+ brief = (doc.split(".")[0].strip() + ".") if doc and "." in doc else (doc or "—")
426
+ if len(brief) > 60:
427
+ brief = brief[:57] + "..."
428
+ lines.append(f"| [{f['name']}](../funcs/{safe}.md) | `{sig}` | {brief} |")
429
+ lines.append("")
430
+
431
+ # Types: structs, unions, enums with member info
432
+ if types:
433
+ # Group by kind
434
+ structs = [t for t in types if t.get("kind") in ("struct", None, "")]
435
+ unions = [t for t in types if t.get("kind") == "union"]
436
+ enums = [t for t in types if t.get("kind") == "enum"]
437
+ typedefs = [t for t in types if t.get("kind") == "typedef"]
438
+
439
+ if structs or unions:
440
+ lines.append(f"## 结构体 ({len(structs) + len(unions)})")
441
+ lines.append("")
442
+ for t in structs + unions:
443
+ kind_label = "union" if t.get("kind") == "union" else "struct"
444
+ lines.append(f"### {t.get('name', '?')} ({kind_label})")
445
+ lines.append("")
446
+ members = t.get("members") or t.get("parameters")
447
+ if members and isinstance(members, list):
448
+ for m in members:
449
+ if m:
450
+ lines.append(f"- `{m}`")
451
+ else:
452
+ sig = t.get("signature", "")
453
+ if sig:
454
+ lines.append(f"```c\n{sig}\n```")
455
+ lines.append("")
456
+
457
+ if enums:
458
+ lines.append(f"## 枚举 ({len(enums)})")
459
+ lines.append("")
460
+ for t in enums:
461
+ lines.append(f"### {t.get('name', '?')}")
462
+ lines.append("")
463
+ members = t.get("members") or t.get("parameters")
464
+ if members and isinstance(members, list):
465
+ lines.append(f"值: `{' | '.join(str(m) for m in members if m)}`")
466
+ else:
467
+ sig = t.get("signature", "")
468
+ if sig:
469
+ lines.append(f"```c\n{sig}\n```")
470
+ lines.append("")
471
+
472
+ if typedefs:
473
+ lines.append(f"## 类型别名 ({len(typedefs)})")
474
+ lines.append("")
475
+ lines.append("| 名称 | 定义 |")
476
+ lines.append("|------|------|")
477
+ for t in typedefs:
478
+ lines.append(f"| {t.get('name', '?')} | `{t.get('signature', '')}` |")
479
+ lines.append("")
480
+
481
+ # Macros
482
+ if macros:
483
+ lines.append(f"## 宏 ({len(macros)})")
484
+ lines.append("")
485
+ lines.append("| 宏 | 定义 |")
486
+ lines.append("|----|------|")
487
+ for m in macros:
488
+ sig = m.get("signature") or f"#define {m['name']}"
489
+ # Truncate long macro definitions
490
+ if len(sig) > 80:
491
+ sig = sig[:77] + "..."
492
+ lines.append(f"| {m['name']} | `{sig}` |")
493
+ lines.append("")
494
+
495
+ return "\n".join(lines)
496
+
497
+
498
+ def _render_index(
499
+ module_summaries: list[dict[str, Any]],
500
+ total_funcs: int,
501
+ total_types: int,
502
+ import_graph: dict[str, list[str]] | None = None,
503
+ ) -> str:
504
+ """Render L1 global index page."""
505
+ lines: list[str] = []
506
+ lines.append("# API Documentation Index")
507
+ lines.append("")
508
+ lines.append(f"Total: {len(module_summaries)} modules, "
509
+ f"{total_funcs} functions, {total_types} types")
510
+ lines.append("")
511
+
512
+ # Module table with description column
513
+ lines.append("| 模块 | 职责 | 头文件 | 函数 | 类型 | 宏 |")
514
+ lines.append("|------|------|--------|------|------|----|")
515
+
516
+ for m in module_summaries:
517
+ safe = _sanitise_filename(m["qn"])
518
+ # Find header files
519
+ headers = [f for f in m["files"] if f.endswith((".h", ".hpp", ".hxx"))]
520
+ header_str = ", ".join(headers) if headers else "—"
521
+ desc = m.get("desc", "—")
522
+ macro_count = m.get("macros", 0)
523
+ func_count = m["public"] + m["static"] + m["extern"]
524
+ type_count = m["types"]
525
+ lines.append(
526
+ f"| [{m['qn']}](modules/{safe}.md) | {desc} "
527
+ f"| {header_str} | {func_count} | {type_count} | {macro_count} |"
528
+ )
529
+ lines.append("")
530
+
531
+ # Include dependency tree
532
+ if import_graph:
533
+ lines.append("## #include 依赖")
534
+ lines.append("")
535
+ # Find root modules (not imported by anyone)
536
+ all_imported: set[str] = set()
537
+ for targets in import_graph.values():
538
+ all_imported.update(targets)
539
+ roots = [m for m in import_graph if m not in all_imported]
540
+ if not roots:
541
+ roots = sorted(import_graph.keys())[:5]
542
+
543
+ visited: set[str] = set()
544
+
545
+ def _render_tree(mod: str, prefix: str = "", is_last: bool = True) -> None:
546
+ if mod in visited:
547
+ connector = "└── " if is_last else "├── "
548
+ lines.append(f"{prefix}{connector}{mod} (已展开)")
549
+ return
550
+ visited.add(mod)
551
+ connector = "└── " if is_last else "├── "
552
+ lines.append(f"{prefix}{connector}{mod}")
553
+ children = import_graph.get(mod, [])
554
+ for j, child in enumerate(children):
555
+ child_is_last = (j == len(children) - 1)
556
+ child_prefix = prefix + (" " if is_last else "│ ")
557
+ _render_tree(child, child_prefix, child_is_last)
558
+
559
+ for i, root in enumerate(sorted(roots)):
560
+ if i > 0:
561
+ lines.append("")
562
+ lines.append(root)
563
+ children = import_graph.get(root, [])
564
+ for j, child in enumerate(children):
565
+ _render_tree(child, "", j == len(children) - 1)
566
+ lines.append("")
567
+
568
+ return "\n".join(lines)
569
+
570
+
571
+ # ---------------------------------------------------------------------------
572
+ # Public entry point
573
+ # ---------------------------------------------------------------------------
574
+
575
+ def generate_api_docs(
576
+ func_rows: list[dict[str, Any]],
577
+ type_rows: list[dict[str, Any]],
578
+ call_rows: list[dict[str, Any]],
579
+ output_dir: Path,
580
+ import_rows: list[dict[str, Any]] | None = None,
581
+ repo_path: Path | None = None,
582
+ ) -> dict[str, Any]:
583
+ """Generate hierarchical API documentation from pre-fetched graph data.
584
+
585
+ Args:
586
+ func_rows: Rows from fetch_all_functions_for_docs query.
587
+ type_rows: Rows from fetch_all_types_for_docs query.
588
+ call_rows: Rows from fetch_all_calls query.
589
+ output_dir: Directory to write api_docs/ into.
590
+ import_rows: Rows from fetch_all_imports query (optional).
591
+ repo_path: Root path of the repository for source reading (optional).
592
+
593
+ Returns:
594
+ Summary dict with module_count, func_count, type_count.
595
+ """
596
+ api_dir = output_dir / "api_docs"
597
+ modules_dir = api_dir / "modules"
598
+ funcs_dir = api_dir / "funcs"
599
+ modules_dir.mkdir(parents=True, exist_ok=True)
600
+ funcs_dir.mkdir(parents=True, exist_ok=True)
601
+
602
+ callers_of, callees_of = _build_call_graph(call_rows)
603
+
604
+ # ---- Group functions by module ----
605
+ # module_qn → {files: set, funcs: list, types: list}
606
+ modules: dict[str, dict[str, Any]] = defaultdict(
607
+ lambda: {"files": set(), "funcs": [], "types": []}
608
+ )
609
+ seen_funcs: set[str] = set()
610
+
611
+ for row in func_rows:
612
+ r = _unpack_row(row)
613
+ if len(r) < 11:
614
+ continue
615
+ module_qn = r[0] or "unknown"
616
+ module_path = r[1] or ""
617
+ func_qn = r[2] or ""
618
+ if func_qn in seen_funcs:
619
+ continue
620
+ seen_funcs.add(func_qn)
621
+ func: dict[str, Any] = {
622
+ "module_qn": module_qn,
623
+ "qn": func_qn,
624
+ "name": r[3] or "",
625
+ "signature": r[4],
626
+ "return_type": r[5],
627
+ "visibility": r[6],
628
+ "parameters": r[7],
629
+ "docstring": r[8],
630
+ "start_line": r[9],
631
+ "end_line": r[10],
632
+ "path": r[11] if len(r) > 11 else module_path,
633
+ }
634
+ # Handle kind field (13th field, index 12)
635
+ if len(r) > 12:
636
+ func["kind"] = r[12]
637
+ modules[module_qn]["files"].add(module_path)
638
+ modules[module_qn]["funcs"].append(func)
639
+
640
+ for row in type_rows:
641
+ r = _unpack_row(row)
642
+ if len(r) < 6:
643
+ continue
644
+ # First column may be qualified_name (e.g., "mod.StructName") or module_qn
645
+ first_col = r[0] or "unknown"
646
+ type_name = r[1] or ""
647
+ # Derive module_qn: if first_col ends with ".type_name", strip it
648
+ if type_name and first_col.endswith(f".{type_name}"):
649
+ module_qn = first_col[: -(len(type_name) + 1)]
650
+ else:
651
+ module_qn = first_col
652
+ type_info: dict[str, Any] = {
653
+ "name": type_name,
654
+ "kind": r[2],
655
+ "signature": r[3],
656
+ }
657
+ # Handle both Class rows (7 fields with parameters) and Type rows (6 fields without)
658
+ if len(r) >= 7:
659
+ type_info["members"] = r[4] # parameters field contains members/enum values
660
+ type_info["start_line"] = r[5]
661
+ type_info["end_line"] = r[6]
662
+ else:
663
+ type_info["start_line"] = r[4]
664
+ type_info["end_line"] = r[5]
665
+ modules[module_qn]["types"].append(type_info)
666
+
667
+ # ---- Build func_lookup for call tree and caller enrichment ----
668
+ func_lookup: dict[str, dict] = {}
669
+ for mod_data in modules.values():
670
+ for func in mod_data["funcs"]:
671
+ if func["qn"]:
672
+ func_lookup[func["qn"]] = func
673
+
674
+ # ---- Build import graph ----
675
+ import_graph: dict[str, list[str]] = defaultdict(list)
676
+ if import_rows:
677
+ for row in import_rows:
678
+ r = _unpack_row(row)
679
+ if len(r) >= 2:
680
+ import_graph[r[0]].append(r[1])
681
+
682
+ # ---- Collect all known files per module ----
683
+ # Since .c and .h share module_qn, we need to discover both file paths.
684
+ # The func rows carry module_path (last-written, typically .c).
685
+ # We also query for any .h counterpart by checking the func paths.
686
+ for mod_data in modules.values():
687
+ paths = set()
688
+ for f in mod_data["funcs"]:
689
+ p = f.get("path") or ""
690
+ if p:
691
+ paths.add(Path(p).name)
692
+ mod_data["files"].update(paths)
693
+ # Remove empty strings
694
+ mod_data["files"].discard("")
695
+
696
+ # ---- Generate L3: per-function detail pages ----
697
+ total_funcs = 0
698
+ for mod_data in modules.values():
699
+ for func in mod_data["funcs"]:
700
+ qn = func["qn"]
701
+ if not qn:
702
+ continue
703
+ content = _render_func_detail(
704
+ func,
705
+ callers=callers_of.get(qn, []),
706
+ callees=callees_of.get(qn, []),
707
+ callees_of=callees_of,
708
+ func_lookup=func_lookup,
709
+ repo_path=repo_path,
710
+ )
711
+ safe = _sanitise_filename(qn)
712
+ (funcs_dir / f"{safe}.md").write_text(content, encoding="utf-8")
713
+ total_funcs += 1
714
+
715
+ # ---- Generate L2: per-module pages ----
716
+ module_summaries: list[dict[str, Any]] = []
717
+ for module_qn in sorted(modules):
718
+ mod_data = modules[module_qn]
719
+ funcs = mod_data["funcs"]
720
+ types = mod_data["types"]
721
+ files = sorted(mod_data["files"])
722
+
723
+ content = _render_module_page(
724
+ module_qn, files, funcs, types,
725
+ callees_of=callees_of,
726
+ func_lookup=func_lookup,
727
+ )
728
+ safe = _sanitise_filename(module_qn)
729
+ (modules_dir / f"{safe}.md").write_text(content, encoding="utf-8")
730
+
731
+ # Summary stats
732
+ vis_counts: dict[str, int] = defaultdict(int)
733
+ for f in funcs:
734
+ vis_counts[f.get("visibility") or "unknown"] += 1
735
+ macro_count = sum(1 for f in funcs if f.get("kind") == "macro")
736
+
737
+ module_summaries.append({
738
+ "qn": module_qn,
739
+ "files": files,
740
+ "public": vis_counts.get("public", 0),
741
+ "static": vis_counts.get("static", 0),
742
+ "extern": vis_counts.get("extern", 0),
743
+ "types": len(types),
744
+ "total": len(funcs) + len(types),
745
+ "macros": macro_count,
746
+ })
747
+
748
+ # ---- Generate L1: global index ----
749
+ total_types = sum(len(m["types"]) for m in modules.values())
750
+ index_content = _render_index(
751
+ module_summaries, total_funcs, total_types,
752
+ import_graph=dict(import_graph) if import_graph else None,
753
+ )
754
+ (api_dir / "index.md").write_text(index_content, encoding="utf-8")
755
+
756
+ logger.info(
757
+ f"API docs generated: {len(modules)} modules, "
758
+ f"{total_funcs} functions, {total_types} types"
759
+ )
760
+ return {
761
+ "module_count": len(modules),
762
+ "func_count": total_funcs,
763
+ "type_count": total_types,
764
+ }