code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Test code_graph_builder with Kùzu embedded database (no Docker)."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
|
12
|
+
sys.path.insert(0, str(PROJECT_ROOT))
|
|
13
|
+
|
|
14
|
+
from code_graph_builder import CodeGraphBuilder
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_kuzu_backend():
|
|
18
|
+
"""Test Kùzu backend with tinycc repository."""
|
|
19
|
+
repo_path = "/Users/jiaojeremy/CodeFile/tinycc"
|
|
20
|
+
output_dir = PROJECT_ROOT / "tinycc_kuzu"
|
|
21
|
+
output_dir.mkdir(exist_ok=True)
|
|
22
|
+
|
|
23
|
+
print("=" * 80)
|
|
24
|
+
print("Testing code_graph_builder with Kùzu (No Docker)")
|
|
25
|
+
print("=" * 80)
|
|
26
|
+
print(f"Repository path: {repo_path}")
|
|
27
|
+
print(f"Backend: Kùzu embedded database")
|
|
28
|
+
print()
|
|
29
|
+
|
|
30
|
+
# Initialize builder with Kùzu backend
|
|
31
|
+
print("Initializing CodeGraphBuilder with Kùzu backend...")
|
|
32
|
+
builder = CodeGraphBuilder(
|
|
33
|
+
repo_path=repo_path,
|
|
34
|
+
backend="kuzu", # No Docker required!
|
|
35
|
+
db_config={
|
|
36
|
+
"db_path": str(output_dir / "tinycc_graph.db"),
|
|
37
|
+
"batch_size": 1000,
|
|
38
|
+
},
|
|
39
|
+
exclude_paths=frozenset({"tests", "win32", "examples"}),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Build graph with timing
|
|
43
|
+
print("Building code graph...")
|
|
44
|
+
start_time = time.time()
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
result = builder.build_graph(clean=True)
|
|
48
|
+
duration = time.time() - start_time
|
|
49
|
+
|
|
50
|
+
print()
|
|
51
|
+
print("=" * 80)
|
|
52
|
+
print("BUILD RESULTS")
|
|
53
|
+
print("=" * 80)
|
|
54
|
+
print(f"Duration: {duration:.2f} seconds")
|
|
55
|
+
print(f"Files processed: {result.files_processed}")
|
|
56
|
+
print(f"Nodes created: {result.nodes_created}")
|
|
57
|
+
print(f"Relationships created: {result.relationships_created}")
|
|
58
|
+
print(f"Functions found: {result.functions_found}")
|
|
59
|
+
print(f"Classes found: {result.classes_found}")
|
|
60
|
+
print()
|
|
61
|
+
|
|
62
|
+
# Get statistics
|
|
63
|
+
print("Getting statistics...")
|
|
64
|
+
stats = builder.get_statistics()
|
|
65
|
+
print(f"Total nodes: {stats.get('total_nodes', 0)}")
|
|
66
|
+
print(f"Total relationships: {stats.get('total_relationships', 0)}")
|
|
67
|
+
print()
|
|
68
|
+
|
|
69
|
+
# Try a query
|
|
70
|
+
print("Testing Cypher query...")
|
|
71
|
+
results = builder.query("MATCH (f:Function) RETURN f.name LIMIT 5")
|
|
72
|
+
print(f"Query returned {len(results)} results")
|
|
73
|
+
print()
|
|
74
|
+
|
|
75
|
+
# Export graph
|
|
76
|
+
print("Exporting graph data...")
|
|
77
|
+
graph_data = builder.export_graph()
|
|
78
|
+
|
|
79
|
+
export_file = output_dir / "export.json"
|
|
80
|
+
with open(export_file, "w") as f:
|
|
81
|
+
json.dump(graph_data, f, indent=2, default=str)
|
|
82
|
+
print(f"Exported to: {export_file}")
|
|
83
|
+
print()
|
|
84
|
+
|
|
85
|
+
print("=" * 80)
|
|
86
|
+
print("KÙZU BACKEND TEST COMPLETED SUCCESSFULLY")
|
|
87
|
+
print("=" * 80)
|
|
88
|
+
print()
|
|
89
|
+
print(f"Database location: {output_dir / 'tinycc_graph.db'}")
|
|
90
|
+
print("You can query this database directly using Kùzu CLI or Python API")
|
|
91
|
+
|
|
92
|
+
except Exception as e:
|
|
93
|
+
print(f"ERROR: {e}")
|
|
94
|
+
import traceback
|
|
95
|
+
traceback.print_exc()
|
|
96
|
+
return 1
|
|
97
|
+
|
|
98
|
+
return 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_memory_backend():
|
|
102
|
+
"""Test memory backend (no persistence)."""
|
|
103
|
+
repo_path = "/Users/jiaojeremy/CodeFile/tinycc"
|
|
104
|
+
output_dir = PROJECT_ROOT / "tinycc_memory"
|
|
105
|
+
output_dir.mkdir(exist_ok=True)
|
|
106
|
+
|
|
107
|
+
print("\n" + "=" * 80)
|
|
108
|
+
print("Testing code_graph_builder with Memory backend")
|
|
109
|
+
print("=" * 80)
|
|
110
|
+
|
|
111
|
+
# Initialize builder with memory backend
|
|
112
|
+
print("Initializing CodeGraphBuilder with Memory backend...")
|
|
113
|
+
builder = CodeGraphBuilder(
|
|
114
|
+
repo_path=repo_path,
|
|
115
|
+
backend="memory", # No database at all!
|
|
116
|
+
exclude_paths=frozenset({"tests", "win32", "examples"}),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Build graph
|
|
120
|
+
print("Building code graph...")
|
|
121
|
+
start_time = time.time()
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
result = builder.build_graph()
|
|
125
|
+
duration = time.time() - start_time
|
|
126
|
+
|
|
127
|
+
print()
|
|
128
|
+
print("=" * 80)
|
|
129
|
+
print("BUILD RESULTS")
|
|
130
|
+
print("=" * 80)
|
|
131
|
+
print(f"Duration: {duration:.2f} seconds")
|
|
132
|
+
print(f"Nodes created: {result.nodes_created}")
|
|
133
|
+
print(f"Relationships created: {result.relationships_created}")
|
|
134
|
+
print()
|
|
135
|
+
|
|
136
|
+
# Get statistics
|
|
137
|
+
stats = builder.get_statistics()
|
|
138
|
+
print(f"Total nodes: {stats.get('total_nodes', 0)}")
|
|
139
|
+
print(f"Total relationships: {stats.get('total_relationships', 0)}")
|
|
140
|
+
|
|
141
|
+
# Export to JSON
|
|
142
|
+
graph_data = builder.export_graph()
|
|
143
|
+
export_file = output_dir / "graph.json"
|
|
144
|
+
with open(export_file, "w") as f:
|
|
145
|
+
json.dump(graph_data, f, indent=2, default=str)
|
|
146
|
+
print(f"Exported to: {export_file}")
|
|
147
|
+
|
|
148
|
+
print()
|
|
149
|
+
print("MEMORY BACKEND TEST COMPLETED SUCCESSFULLY")
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
print(f"ERROR: {e}")
|
|
153
|
+
import traceback
|
|
154
|
+
traceback.print_exc()
|
|
155
|
+
return 1
|
|
156
|
+
|
|
157
|
+
return 0
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def main():
|
|
161
|
+
"""Run all tests."""
|
|
162
|
+
print("Code Graph Builder - Local Deployment Test")
|
|
163
|
+
print("No Docker required!")
|
|
164
|
+
print()
|
|
165
|
+
|
|
166
|
+
# Test Kùzu backend
|
|
167
|
+
ret1 = test_kuzu_backend()
|
|
168
|
+
|
|
169
|
+
# Test Memory backend
|
|
170
|
+
ret2 = test_memory_backend()
|
|
171
|
+
|
|
172
|
+
print("\n" + "=" * 80)
|
|
173
|
+
print("ALL TESTS COMPLETED")
|
|
174
|
+
print("=" * 80)
|
|
175
|
+
print()
|
|
176
|
+
print("Summary:")
|
|
177
|
+
print(f" Kùzu backend: {'✅ PASSED' if ret1 == 0 else '❌ FAILED'}")
|
|
178
|
+
print(f" Memory backend: {'✅ PASSED' if ret2 == 0 else '❌ FAILED'}")
|
|
179
|
+
print()
|
|
180
|
+
print("You can now use code_graph_builder without Docker!")
|
|
181
|
+
print()
|
|
182
|
+
print("Usage:")
|
|
183
|
+
print(' builder = CodeGraphBuilder(repo_path, backend="kuzu")')
|
|
184
|
+
print(' builder = CodeGraphBuilder(repo_path, backend="memory")')
|
|
185
|
+
|
|
186
|
+
return max(ret1, ret2)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
if __name__ == "__main__":
|
|
190
|
+
sys.exit(main())
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""RAG全流程测试脚本 - 使用 Redis 代码仓数据,按模块生成多页 wiki。
|
|
3
|
+
|
|
4
|
+
源码上下文方案(参考 deepwiki):
|
|
5
|
+
- 通过 qualified_name 推导 .c 文件路径(redis.src.<module>.<func> -> src/<module>.c)
|
|
6
|
+
- 用 start_line/end_line 精确读取函数体
|
|
7
|
+
- 按 deepwiki 格式组装 <File Path> 上下文块传给 LLM
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python test_rag_redis.py
|
|
11
|
+
python test_rag_redis.py --max-pages 5
|
|
12
|
+
python test_rag_redis.py --repo-path /path/to/redis --max-pages 20
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
from collections import Counter
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
MAX_SOURCE_CHARS_PER_FUNC = 2000
|
|
25
|
+
MAX_FUNCS_IN_CONTEXT = 6
|
|
26
|
+
|
|
27
|
+
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def setup_environment():
|
|
31
|
+
sys.path.insert(0, str(PROJECT_ROOT))
|
|
32
|
+
env_file = PROJECT_ROOT / ".env"
|
|
33
|
+
if env_file.exists():
|
|
34
|
+
with open(env_file) as f:
|
|
35
|
+
for line in f:
|
|
36
|
+
line = line.strip()
|
|
37
|
+
if line and not line.startswith("#") and "=" in line:
|
|
38
|
+
key, value = line.split("=", 1)
|
|
39
|
+
os.environ.setdefault(key.strip(), value.strip())
|
|
40
|
+
if not os.getenv("MOONSHOT_API_KEY"):
|
|
41
|
+
print("错误: MOONSHOT_API_KEY 未设置")
|
|
42
|
+
sys.exit(1)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_redis_graph(db_path: Path, repo_path: Path):
|
|
46
|
+
from code_graph_builder import CodeGraphBuilder
|
|
47
|
+
|
|
48
|
+
builder = CodeGraphBuilder(
|
|
49
|
+
repo_path=str(repo_path),
|
|
50
|
+
backend="kuzu",
|
|
51
|
+
backend_config={"db_path": str(db_path)},
|
|
52
|
+
scan_config={"include_languages": {"c"}},
|
|
53
|
+
)
|
|
54
|
+
return builder
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def qualified_name_to_file(qname: str, repo_path: Path) -> Path | None:
|
|
58
|
+
"""将 qualified_name 映射到源文件路径。
|
|
59
|
+
|
|
60
|
+
规则:redis.src.<module>.<func> -> <repo>/src/<module>.c
|
|
61
|
+
其余子目录格式(utils 等)暂不处理。
|
|
62
|
+
"""
|
|
63
|
+
parts = qname.split(".")
|
|
64
|
+
if len(parts) < 4:
|
|
65
|
+
return None
|
|
66
|
+
if parts[1] != "src":
|
|
67
|
+
return None
|
|
68
|
+
module = parts[2]
|
|
69
|
+
file_path = repo_path / "src" / f"{module}.c"
|
|
70
|
+
return file_path if file_path.exists() else None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def read_function_source(func: dict, repo_path: Path) -> str | None:
|
|
74
|
+
"""从磁盘读取函数的真实源码,跳过单行前向声明。"""
|
|
75
|
+
qname = func.get("qualified_name", "")
|
|
76
|
+
start_line = func.get("start_line", 0)
|
|
77
|
+
end_line = func.get("end_line", 0)
|
|
78
|
+
|
|
79
|
+
if start_line == 0 or start_line == end_line:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
file_path = qualified_name_to_file(qname, repo_path)
|
|
83
|
+
if file_path is None:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
with open(file_path, encoding="utf-8", errors="replace") as fh:
|
|
88
|
+
lines = fh.readlines()
|
|
89
|
+
source = "".join(lines[start_line - 1 : end_line])
|
|
90
|
+
if len(source) > MAX_SOURCE_CHARS_PER_FUNC:
|
|
91
|
+
source = source[:MAX_SOURCE_CHARS_PER_FUNC] + "\n /* ... truncated ... */"
|
|
92
|
+
return source
|
|
93
|
+
except OSError:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def build_source_context(functions: list[dict], repo_path: Path) -> str:
|
|
98
|
+
"""按 deepwiki 格式组装源码上下文块。"""
|
|
99
|
+
file_chunks: dict[str, list[str]] = {}
|
|
100
|
+
|
|
101
|
+
for func in functions:
|
|
102
|
+
source = read_function_source(func, repo_path)
|
|
103
|
+
if not source:
|
|
104
|
+
continue
|
|
105
|
+
qname = func.get("qualified_name", "")
|
|
106
|
+
parts = qname.split(".")
|
|
107
|
+
module = parts[2] if len(parts) >= 4 and parts[1] == "src" else "unknown"
|
|
108
|
+
filename = f"src/{module}.c"
|
|
109
|
+
entry = f"// {func['name']} (line {func['start_line']}-{func['end_line']})\n{source}"
|
|
110
|
+
file_chunks.setdefault(filename, []).append(entry)
|
|
111
|
+
|
|
112
|
+
if not file_chunks:
|
|
113
|
+
return ""
|
|
114
|
+
|
|
115
|
+
result_parts = []
|
|
116
|
+
for filename, chunks in file_chunks.items():
|
|
117
|
+
header = f"## File Path: {filename}"
|
|
118
|
+
body = "\n\n".join(f"```c\n{chunk}\n```" for chunk in chunks)
|
|
119
|
+
result_parts.append(f"{header}\n\n{body}")
|
|
120
|
+
|
|
121
|
+
return "\n\n----------\n\n".join(result_parts)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def build_module_index(builder) -> dict[str, list[dict]]:
|
|
125
|
+
"""通过 Kùzu 查询按模块名分组函数节点(仅 src/ 下的 C 文件)。"""
|
|
126
|
+
rows = builder.query(
|
|
127
|
+
"MATCH (f:Function) RETURN f.name, f.qualified_name, f.start_line, f.end_line"
|
|
128
|
+
)
|
|
129
|
+
modules: dict[str, list[dict]] = {}
|
|
130
|
+
for row in rows:
|
|
131
|
+
name, qname, start_line, end_line = row["result"]
|
|
132
|
+
parts = qname.split(".")
|
|
133
|
+
if len(parts) < 4 or parts[1] != "src":
|
|
134
|
+
continue
|
|
135
|
+
module = parts[2]
|
|
136
|
+
modules.setdefault(module, []).append({
|
|
137
|
+
"name": name,
|
|
138
|
+
"qualified_name": qname,
|
|
139
|
+
"start_line": start_line or 0,
|
|
140
|
+
"end_line": end_line or 0,
|
|
141
|
+
})
|
|
142
|
+
return modules
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def select_representative_functions(functions: list[dict], n: int) -> list[dict]:
|
|
146
|
+
"""选取最具代表性的函数:优先选多行函数(真实实现),按行数降序。"""
|
|
147
|
+
multi_line = [f for f in functions if f["end_line"] > f["start_line"] + 2]
|
|
148
|
+
multi_line.sort(key=lambda f: f["end_line"] - f["start_line"], reverse=True)
|
|
149
|
+
return multi_line[:n]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def get_module_call_stats(builder, module_name: str) -> dict:
|
|
153
|
+
"""获取模块的调用关系统计。"""
|
|
154
|
+
rows = builder.query(f"""
|
|
155
|
+
MATCH (caller:Function)-[:CALLS]->(callee:Function)
|
|
156
|
+
WHERE caller.qualified_name CONTAINS '.src.{module_name}.'
|
|
157
|
+
RETURN callee.name AS name, count(*) AS cnt
|
|
158
|
+
ORDER BY cnt DESC LIMIT 5
|
|
159
|
+
""")
|
|
160
|
+
top_called = [(r["result"][0], r["result"][1]) for r in rows]
|
|
161
|
+
|
|
162
|
+
rows2 = builder.query(f"""
|
|
163
|
+
MATCH (caller:Function)-[:CALLS]->(callee:Function)
|
|
164
|
+
WHERE callee.qualified_name CONTAINS '.src.{module_name}.'
|
|
165
|
+
RETURN caller.name AS name, count(*) AS cnt
|
|
166
|
+
ORDER BY cnt DESC LIMIT 5
|
|
167
|
+
""")
|
|
168
|
+
top_callers = [(r["result"][0], r["result"][1]) for r in rows2]
|
|
169
|
+
|
|
170
|
+
return {"top_called": top_called, "top_callers": top_callers}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def analyze_module_page(
|
|
174
|
+
module_name: str,
|
|
175
|
+
functions: list[dict],
|
|
176
|
+
agent,
|
|
177
|
+
repo_path: Path,
|
|
178
|
+
call_stats: dict,
|
|
179
|
+
) -> str:
|
|
180
|
+
"""用 CamelAgent 分析一个 Redis 模块,注入真实源码上下文。"""
|
|
181
|
+
representative = select_representative_functions(functions, MAX_FUNCS_IN_CONTEXT)
|
|
182
|
+
source_context = build_source_context(representative, repo_path)
|
|
183
|
+
|
|
184
|
+
func_list = "\n".join(
|
|
185
|
+
f"- `{f['name']}` (line {f['start_line']}-{f['end_line']})"
|
|
186
|
+
for f in sorted(functions, key=lambda f: f["start_line"])[:15]
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
top_called_str = "\n".join(
|
|
190
|
+
f" - `{name}`: 被调用 {cnt} 次" for name, cnt in call_stats["top_called"]
|
|
191
|
+
)
|
|
192
|
+
top_callers_str = "\n".join(
|
|
193
|
+
f" - `{name}`: 调用 {cnt} 次" for name, cnt in call_stats["top_callers"]
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if source_context:
|
|
197
|
+
context_section = (
|
|
198
|
+
f"以下是从源文件 `src/{module_name}.c` 中提取的代表性函数实现:\n\n"
|
|
199
|
+
f"<START_OF_CONTEXT>\n{source_context}\n<END_OF_CONTEXT>"
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
context_section = "(源文件不可访问,仅凭函数名分析)"
|
|
203
|
+
|
|
204
|
+
call_info = ""
|
|
205
|
+
if top_called_str:
|
|
206
|
+
call_info += f"\n该模块最常调用的外部函数:\n{top_called_str}\n"
|
|
207
|
+
if top_callers_str:
|
|
208
|
+
call_info += f"\n最常调用该模块函数的外部函数:\n{top_callers_str}\n"
|
|
209
|
+
|
|
210
|
+
task = (
|
|
211
|
+
f"请对 Redis 数据库的 `{module_name}` 模块(`src/{module_name}.c`)进行系统性分析。\n\n"
|
|
212
|
+
f"该模块共 {len(functions)} 个函数,部分列表:\n{func_list}\n"
|
|
213
|
+
f"{call_info}\n"
|
|
214
|
+
f"{context_section}\n\n"
|
|
215
|
+
"请按如下结构输出(Markdown 格式):\n"
|
|
216
|
+
"## 模块概述\n"
|
|
217
|
+
"## 核心函数分析(结合上方真实代码)\n"
|
|
218
|
+
"## 模块间依赖关系\n"
|
|
219
|
+
"## 关键实现细节\n"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
response = agent.analyze(task=task)
|
|
223
|
+
return response.content
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def generate_wiki(
|
|
227
|
+
builder,
|
|
228
|
+
output_dir: Path,
|
|
229
|
+
max_pages: int,
|
|
230
|
+
repo_path: Path,
|
|
231
|
+
) -> tuple[Path, int]:
|
|
232
|
+
from code_graph_builder.rag.camel_agent import CamelAgent
|
|
233
|
+
from code_graph_builder.rag.client import create_llm_client
|
|
234
|
+
|
|
235
|
+
llm_client = create_llm_client(
|
|
236
|
+
api_key=os.getenv("MOONSHOT_API_KEY"),
|
|
237
|
+
model=os.getenv("MOONSHOT_MODEL", "kimi-k2.5"),
|
|
238
|
+
temperature=1.0,
|
|
239
|
+
)
|
|
240
|
+
agent = CamelAgent(
|
|
241
|
+
role="Redis 数据库代码分析专家",
|
|
242
|
+
goal="结合真实源码系统分析 Redis 各模块的功能、实现细节和架构关系",
|
|
243
|
+
backstory="拥有20年 C 语言和数据库系统开发经验,深入理解 Redis 源码架构",
|
|
244
|
+
llm_client=llm_client,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
modules = build_module_index(builder)
|
|
248
|
+
sorted_modules = sorted(modules.items(), key=lambda x: -len(x[1]))
|
|
249
|
+
pages_to_generate = sorted_modules[:max_pages]
|
|
250
|
+
|
|
251
|
+
print(f"\n共 {len(modules)} 个模块(src/),将生成 {len(pages_to_generate)} 个 wiki 页面")
|
|
252
|
+
print(f"源码路径: {repo_path}")
|
|
253
|
+
print("模块(按函数数量排序):")
|
|
254
|
+
for name, funcs in pages_to_generate:
|
|
255
|
+
rep = select_representative_functions(funcs, MAX_FUNCS_IN_CONTEXT)
|
|
256
|
+
src_count = sum(1 for f in rep if read_function_source(f, repo_path))
|
|
257
|
+
print(f" {name}: {len(funcs)} 函数,{src_count}/{len(rep)} 个有源码")
|
|
258
|
+
|
|
259
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
260
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
261
|
+
report_path = output_dir / f"redis_wiki_{timestamp}.md"
|
|
262
|
+
|
|
263
|
+
pages: list[dict] = []
|
|
264
|
+
|
|
265
|
+
for i, (module_name, functions) in enumerate(pages_to_generate, 1):
|
|
266
|
+
rep = select_representative_functions(functions, MAX_FUNCS_IN_CONTEXT)
|
|
267
|
+
src_count = sum(1 for f in rep if read_function_source(f, repo_path))
|
|
268
|
+
print(f"\n[{i}/{len(pages_to_generate)}] {module_name} ({len(functions)} 函数, {src_count} 段真实源码)...")
|
|
269
|
+
try:
|
|
270
|
+
call_stats = get_module_call_stats(builder, module_name)
|
|
271
|
+
content = analyze_module_page(module_name, functions, agent, repo_path, call_stats)
|
|
272
|
+
pages.append({
|
|
273
|
+
"id": module_name,
|
|
274
|
+
"title": module_name,
|
|
275
|
+
"function_count": len(functions),
|
|
276
|
+
"source_snippets": src_count,
|
|
277
|
+
"content": content,
|
|
278
|
+
})
|
|
279
|
+
print(f" 完成 ({len(content)} 字符)")
|
|
280
|
+
except Exception as e:
|
|
281
|
+
print(f" 失败: {e}")
|
|
282
|
+
pages.append({
|
|
283
|
+
"id": module_name,
|
|
284
|
+
"title": module_name,
|
|
285
|
+
"function_count": len(functions),
|
|
286
|
+
"source_snippets": 0,
|
|
287
|
+
"content": f"分析失败: {e}",
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
# 全局统计
|
|
291
|
+
total_funcs_row = builder.query("MATCH (f:Function) RETURN count(f) AS cnt")
|
|
292
|
+
total_funcs = total_funcs_row[0]["result"][0] if total_funcs_row else 0
|
|
293
|
+
total_calls_row = builder.query("MATCH ()-[r:CALLS]->() RETURN count(r) AS cnt")
|
|
294
|
+
total_calls = total_calls_row[0]["result"][0] if total_calls_row else 0
|
|
295
|
+
|
|
296
|
+
lines = [
|
|
297
|
+
"# Redis 源码 Wiki",
|
|
298
|
+
"",
|
|
299
|
+
f"*生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*",
|
|
300
|
+
f"*模型: {os.getenv('MOONSHOT_MODEL', 'kimi-k2.5')}*",
|
|
301
|
+
f"*页面数: {len(pages)} | 源码上下文: 已启用*",
|
|
302
|
+
"",
|
|
303
|
+
"## 图数据概览",
|
|
304
|
+
"",
|
|
305
|
+
f"| 指标 | 数值 |",
|
|
306
|
+
f"|------|------|",
|
|
307
|
+
f"| 总函数数 | {total_funcs:,} |",
|
|
308
|
+
f"| 总调用关系 | {total_calls:,} |",
|
|
309
|
+
f"| src/ 模块数 | {len(modules)} |",
|
|
310
|
+
f"| 本次生成页面 | {len(pages)} |",
|
|
311
|
+
"",
|
|
312
|
+
"---",
|
|
313
|
+
"",
|
|
314
|
+
"## 目录",
|
|
315
|
+
"",
|
|
316
|
+
]
|
|
317
|
+
for p in pages:
|
|
318
|
+
lines.append(
|
|
319
|
+
f"- [{p['title']}](#{p['id']}) "
|
|
320
|
+
f"({p['function_count']} 函数, {p['source_snippets']} 段源码)"
|
|
321
|
+
)
|
|
322
|
+
lines += ["", "---", ""]
|
|
323
|
+
|
|
324
|
+
for p in pages:
|
|
325
|
+
lines += [
|
|
326
|
+
f"<a id='{p['id']}'></a>",
|
|
327
|
+
"",
|
|
328
|
+
f"## {p['title']}",
|
|
329
|
+
"",
|
|
330
|
+
f"*函数数量: {p['function_count']} | 源码片段: {p['source_snippets']}*",
|
|
331
|
+
"",
|
|
332
|
+
p["content"],
|
|
333
|
+
"",
|
|
334
|
+
"---",
|
|
335
|
+
"",
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
report_path.write_text("\n".join(lines), encoding="utf-8")
|
|
339
|
+
print(f"\nWiki 已保存: {report_path}")
|
|
340
|
+
print(f"文件大小: {report_path.stat().st_size:,} 字节")
|
|
341
|
+
print(f"总页面数: {len(pages)}")
|
|
342
|
+
return report_path, len(pages)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def main():
|
|
346
|
+
parser = argparse.ArgumentParser(description="Redis Wiki 生成器(含真实源码上下文)")
|
|
347
|
+
parser.add_argument("--max-pages", type=int, default=10, help="最多生成几个 wiki 页面 (默认: 10)")
|
|
348
|
+
parser.add_argument("--output-dir", type=Path, default=Path("./rag_output"), help="输出目录")
|
|
349
|
+
parser.add_argument(
|
|
350
|
+
"--repo-path",
|
|
351
|
+
type=Path,
|
|
352
|
+
default=Path("/Users/jiaojeremy/CodeFile/redis"),
|
|
353
|
+
help="Redis 源码仓路径",
|
|
354
|
+
)
|
|
355
|
+
parser.add_argument(
|
|
356
|
+
"--db-path",
|
|
357
|
+
type=Path,
|
|
358
|
+
default=Path("./redis_graph.db"),
|
|
359
|
+
help="Kùzu 数据库路径(需已构建)",
|
|
360
|
+
)
|
|
361
|
+
args = parser.parse_args()
|
|
362
|
+
|
|
363
|
+
setup_environment()
|
|
364
|
+
|
|
365
|
+
print("=" * 60)
|
|
366
|
+
print("Redis Wiki 生成器(含真实源码上下文)")
|
|
367
|
+
print("=" * 60)
|
|
368
|
+
print(f"数据库: {args.db_path}")
|
|
369
|
+
print(f"源码路径: {args.repo_path}")
|
|
370
|
+
|
|
371
|
+
builder = load_redis_graph(args.db_path, args.repo_path)
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
report_path, page_count = generate_wiki(
|
|
375
|
+
builder=builder,
|
|
376
|
+
output_dir=args.output_dir,
|
|
377
|
+
max_pages=args.max_pages,
|
|
378
|
+
repo_path=args.repo_path,
|
|
379
|
+
)
|
|
380
|
+
print(f"\n完成! 生成了 {page_count} 个页面")
|
|
381
|
+
print(f"报告: {report_path}")
|
|
382
|
+
except Exception as e:
|
|
383
|
+
print(f"\n错误: {e}")
|
|
384
|
+
import traceback
|
|
385
|
+
traceback.print_exc()
|
|
386
|
+
sys.exit(1)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
if __name__ == "__main__":
|
|
390
|
+
main()
|