code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,171 @@
1
+ """Memory-only graph service - No database required."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import types
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from loguru import logger
11
+
12
+ from ..types import GraphData, PropertyDict, PropertyValue, ResultRow
13
+
14
+
15
+ class MemoryIngestor:
16
+ """Ingestor that stores graph data in memory only (no persistence).
17
+
18
+ This is useful for testing and one-off analysis where database
19
+ persistence is not needed.
20
+
21
+ Example:
22
+ >>> ingestor = MemoryIngestor()
23
+ >>> with ingestor:
24
+ ... ingestor.ensure_node_batch("Function", {"name": "foo"})
25
+ ... ingestor.flush_all()
26
+ >>> data = ingestor.export_graph()
27
+ """
28
+
29
+ def __init__(self):
30
+ """Initialize memory ingestor."""
31
+ self.nodes: list[dict] = []
32
+ self.relationships: list[dict] = []
33
+ self._node_buffer: list[tuple[str, PropertyDict]] = []
34
+ self._rel_buffer: list[tuple] = []
35
+ self._batch_size = 1000
36
+
37
+ def __enter__(self) -> MemoryIngestor:
38
+ """Enter context manager."""
39
+ logger.info("Memory ingestor initialized (no persistence)")
40
+ return self
41
+
42
+ def __exit__(
43
+ self,
44
+ exc_type: type | None,
45
+ exc_val: Exception | None,
46
+ exc_tb: types.TracebackType | None,
47
+ ) -> None:
48
+ """Exit context manager."""
49
+ self.flush_all()
50
+ if exc_type:
51
+ logger.exception(f"Exception during ingest: {exc_val}")
52
+
53
+ def ensure_node_batch(self, label: str, properties: PropertyDict) -> None:
54
+ """Add a node to the batch buffer."""
55
+ self._node_buffer.append((label, properties.copy()))
56
+ if len(self._node_buffer) >= self._batch_size:
57
+ self.flush_nodes()
58
+
59
+ def ensure_relationship_batch(
60
+ self,
61
+ source: tuple[str, str, PropertyValue],
62
+ rel_type: str,
63
+ target: tuple[str, str, PropertyValue],
64
+ properties: PropertyDict | None = None,
65
+ ) -> None:
66
+ """Add a relationship to the batch buffer."""
67
+ self._rel_buffer.append((source, rel_type, target, properties))
68
+ if len(self._rel_buffer) >= self._batch_size:
69
+ self.flush_relationships()
70
+
71
+ def flush_nodes(self) -> None:
72
+ """Flush node buffer to memory."""
73
+ for label, props in self._node_buffer:
74
+ self.nodes.append({
75
+ "label": label,
76
+ "properties": props,
77
+ "id": len(self.nodes),
78
+ })
79
+ logger.debug(f"Flushed {len(self._node_buffer)} nodes to memory")
80
+ self._node_buffer = []
81
+
82
+ def flush_relationships(self) -> None:
83
+ """Flush relationship buffer to memory."""
84
+ for source, rel_type, target, props in self._rel_buffer:
85
+ self.relationships.append({
86
+ "source": {"label": source[0], "key": source[1], "value": source[2]},
87
+ "type": rel_type,
88
+ "target": {"label": target[0], "key": target[1], "value": target[2]},
89
+ "properties": props or {},
90
+ })
91
+ logger.debug(f"Flushed {len(self._rel_buffer)} relationships to memory")
92
+ self._rel_buffer = []
93
+
94
+ def flush_all(self) -> None:
95
+ """Flush all pending data."""
96
+ self.flush_nodes()
97
+ self.flush_relationships()
98
+
99
+ def clean_database(self) -> None:
100
+ """Clean all data from memory."""
101
+ self.nodes = []
102
+ self.relationships = []
103
+ self._node_buffer = []
104
+ self._rel_buffer = []
105
+ logger.info("Memory database cleaned")
106
+
107
+ def export_graph(self) -> GraphData:
108
+ """Export the graph data."""
109
+ return {
110
+ "nodes": self.nodes,
111
+ "relationships": self.relationships,
112
+ "metadata": {
113
+ "total_nodes": len(self.nodes),
114
+ "total_relationships": len(self.relationships),
115
+ },
116
+ }
117
+
118
+ def export_graph_to_dict(self) -> GraphData:
119
+ """Export the graph data (alias for export_graph)."""
120
+ return self.export_graph()
121
+
122
+ def get_statistics(self) -> dict[str, Any]:
123
+ """Get statistics about the graph."""
124
+ # Count node labels
125
+ node_labels: dict[str, int] = {}
126
+ for node in self.nodes:
127
+ label = node.get("label", "Unknown")
128
+ node_labels[label] = node_labels.get(label, 0) + 1
129
+
130
+ # Count relationship types
131
+ rel_types: dict[str, int] = {}
132
+ for rel in self.relationships:
133
+ rel_type = rel.get("type", "UNKNOWN")
134
+ rel_types[rel_type] = rel_types.get(rel_type, 0) + 1
135
+
136
+ return {
137
+ "node_count": len(self.nodes),
138
+ "relationship_count": len(self.relationships),
139
+ "node_labels": node_labels,
140
+ "relationship_types": rel_types,
141
+ }
142
+
143
+ def query(self, cypher_query: str, params: PropertyDict | None = None) -> list[ResultRow]:
144
+ """Execute a query against the in-memory graph.
145
+
146
+ Note: This is a simplified implementation that only supports
147
+ basic MATCH queries.
148
+ """
149
+ results: list[ResultRow] = []
150
+
151
+ # Very basic query parsing - just return all nodes for MATCH (n)
152
+ if "MATCH (n)" in cypher_query and "count" not in cypher_query.lower():
153
+ for node in self.nodes:
154
+ results.append({"n": node})
155
+
156
+ return results
157
+
158
+ def save_to_file(self, filepath: str | Path) -> None:
159
+ """Save the graph data to a JSON file."""
160
+ data = self.export_graph()
161
+ with open(filepath, "w") as f:
162
+ json.dump(data, f, indent=2, default=str)
163
+ logger.info(f"Graph saved to {filepath}")
164
+
165
+ def load_from_file(self, filepath: str | Path) -> None:
166
+ """Load graph data from a JSON file."""
167
+ with open(filepath) as f:
168
+ data = json.load(f)
169
+ self.nodes = data.get("nodes", [])
170
+ self.relationships = data.get("relationships", [])
171
+ logger.info(f"Graph loaded from {filepath}")
@@ -0,0 +1,75 @@
1
+ """Load global configuration from ``~/.claude/settings.json``.
2
+
3
+ This module reads LLM and embedding API credentials stored in the Claude
4
+ Code settings file and injects them into ``os.environ`` via
5
+ :func:`os.environ.setdefault`. Because ``setdefault`` is used, any values
6
+ already present in the environment (from ``.env``, MCP ``env`` block, or
7
+ shell exports) take precedence — this file acts as a *fallback* layer.
8
+
9
+ Expected JSON structure::
10
+
11
+ {
12
+ "env": {
13
+ "LLM_API_KEY": "sk-...",
14
+ "LLM_BASE_URL": "https://api.openai.com/v1",
15
+ "LLM_MODEL": "gpt-4o",
16
+ "DASHSCOPE_API_KEY": "sk-...",
17
+ "DASHSCOPE_BASE_URL": "https://dashscope.aliyuncs.com/api/v1"
18
+ }
19
+ }
20
+
21
+ All keys inside the ``"env"`` object are injected into the process
22
+ environment. Unknown keys are silently accepted so the file can hold
23
+ additional settings for other tools.
24
+
25
+ The function is intentionally side-effect-free when the file does not
26
+ exist or is malformed — it logs a warning and returns without error.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import json
32
+ import os
33
+ from pathlib import Path
34
+
35
+ from loguru import logger
36
+
37
+ # Well-known settings file location
38
+ SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
39
+
40
+
41
+ def load_settings(path: Path | None = None) -> dict:
42
+ """Read ``~/.claude/settings.json`` and inject ``env`` entries.
43
+
44
+ Args:
45
+ path: Override the default settings file location (useful for tests).
46
+
47
+ Returns:
48
+ The parsed JSON dict (or ``{}`` if the file does not exist).
49
+ """
50
+ settings_file = path or SETTINGS_PATH
51
+
52
+ if not settings_file.exists():
53
+ return {}
54
+
55
+ try:
56
+ data = json.loads(settings_file.read_text(encoding="utf-8"))
57
+ except (json.JSONDecodeError, OSError) as exc:
58
+ logger.warning(f"Failed to parse {settings_file}: {exc}")
59
+ return {}
60
+
61
+ if not isinstance(data, dict):
62
+ logger.warning(f"Expected JSON object in {settings_file}, got {type(data).__name__}")
63
+ return {}
64
+
65
+ env_block = data.get("env")
66
+ if isinstance(env_block, dict):
67
+ injected = []
68
+ for key, value in env_block.items():
69
+ if isinstance(value, str) and key not in os.environ:
70
+ os.environ.setdefault(key, value)
71
+ injected.append(key)
72
+ if injected:
73
+ logger.info(f"Loaded from {settings_file}: {', '.join(injected)}")
74
+
75
+ return data
@@ -0,0 +1,401 @@
1
+ # 阶段二完成验收标准与测试方案
2
+
3
+ ## 1. 概述
4
+
5
+ 本文档定义 code_graph_builder 项目阶段二(完善核心解析逻辑,特别是调用关系解析)的完成标准和测试方案。
6
+
7
+ **阶段二核心目标**:
8
+ - 完善调用关系解析逻辑
9
+ - 支持多种调用类型的准确识别
10
+ - 确保跨文件调用解析正确
11
+ - 达到可接受的准确率和性能指标
12
+
13
+ ---
14
+
15
+ ## 2. 调用关系解析完成标准
16
+
17
+ ### 2.1 必须识别的调用类型
18
+
19
+ | 调用类型 | 说明 | 示例 | 优先级 |
20
+ |---------|------|------|--------|
21
+ | **直接函数调用** | 同文件内的函数调用 | `foo()` | P0 |
22
+ | **跨文件函数调用** | 通过 import/include 的调用 | `from utils import foo` → `foo()` | P0 |
23
+ | **方法调用** | 对象方法调用 | `obj.method()` | P0 |
24
+ | **链式调用** | 链式方法调用 | `obj.a().b()` | P1 |
25
+ | **静态方法调用** | 类/模块静态方法 | `Class.static_method()` | P1 |
26
+ | **父类方法调用** | super/父类方法调用 | `super().method()` | P1 |
27
+ | **IIFE 调用** | 立即执行函数 | `(function(){})()` | P2 |
28
+ | **回调/高阶函数** | 函数作为参数传递 | `map(fn, list)` | P2 |
29
+ | **构造函数调用** | 对象实例化 | `new Class()` / `Class()` | P1 |
30
+
31
+ ### 2.2 准确率要求
32
+
33
+ | 指标 | 目标值 | 最低可接受值 | 测量方法 |
34
+ |------|--------|-------------|---------|
35
+ | **调用识别率** | ≥ 95% | ≥ 90% | 实际调用数 / 应识别调用数 |
36
+ | **调用解析准确率** | ≥ 90% | ≥ 85% | 正确解析的调用 / 总解析调用 |
37
+ | **跨文件调用准确率** | ≥ 85% | ≥ 80% | 正确解析的跨文件调用 / 总跨文件调用 |
38
+ | **误报率** | ≤ 5% | ≤ 10% | 错误识别的调用 / 总识别调用 |
39
+
40
+ ### 2.3 性能要求
41
+
42
+ | 指标 | 目标值 | 最高可接受值 | 测试条件 |
43
+ |------|--------|-------------|---------|
44
+ | **解析速度** | ≥ 1000 函数/秒 | ≥ 500 函数/秒 | TinyCC 规模项目(1611 函数) |
45
+ | **内存占用** | ≤ 2GB | ≤ 4GB | 解析 TinyCC 项目峰值内存 |
46
+ | **数据库写入** | ≥ 500 节点/秒 | ≥ 200 节点/秒 | 批量写入模式 |
47
+
48
+ ---
49
+
50
+ ## 3. 测试场景设计
51
+
52
+ ### 3.1 测试场景矩阵
53
+
54
+ ```
55
+ 简单项目 中型项目 大型项目
56
+ (1-10文件) (10-100) (100+)
57
+ ─────────────────────────────────────────────────────
58
+ 单语言 (Python) [场景1] [场景4] [场景7]
59
+ 双语言 (Py+JS) [场景2] [场景5] [场景8]
60
+ 多语言 (3+语言) [场景3] [场景6] [场景9]
61
+ ```
62
+
63
+ ### 3.2 详细测试场景
64
+
65
+ #### 场景1:简单单一文件项目
66
+ **目的**:验证基本调用识别能力
67
+
68
+ **测试代码示例**:
69
+ ```python
70
+ # simple_project/main.py
71
+ def helper():
72
+ return "help"
73
+
74
+ class Calculator:
75
+ def add(self, a, b):
76
+ return a + b
77
+
78
+ def calculate(self):
79
+ return self.add(1, 2)
80
+
81
+ def main():
82
+ calc = Calculator()
83
+ result = calc.calculate()
84
+ help_result = helper()
85
+ return result, help_result
86
+ ```
87
+
88
+ **预期结果**:
89
+ - 识别 4 个函数/方法定义
90
+ - 识别 3 条调用关系:
91
+ - `main` → `helper`
92
+ - `main` → `Calculator.calculate`
93
+ - `Calculator.calculate` → `Calculator.add`
94
+
95
+ **通过标准**:
96
+ - [ ] 所有函数被正确识别
97
+ - [ ] 所有调用关系被正确识别
98
+ - [ ] 无错误解析
99
+
100
+ ---
101
+
102
+ #### 场景2:跨文件调用(Python)
103
+ **目的**:验证 import 解析和跨文件调用识别
104
+
105
+ **项目结构**:
106
+ ```
107
+ cross_file_project/
108
+ ├── utils/
109
+ │ ├── __init__.py
110
+ │ ├── helpers.py
111
+ │ └── math_ops.py
112
+ ├── services/
113
+ │ ├── __init__.py
114
+ │ └── processor.py
115
+ └── main.py
116
+ ```
117
+
118
+ **测试代码示例**:
119
+ ```python
120
+ # utils/helpers.py
121
+ def format_data(data):
122
+ return f"formatted: {data}"
123
+
124
+ class DataProcessor:
125
+ def process(self, data):
126
+ return format_data(data)
127
+
128
+ # utils/math_ops.py
129
+ def calculate(x, y):
130
+ return x + y
131
+
132
+ # services/processor.py
133
+ from utils.helpers import format_data, DataProcessor
134
+ from utils.math_ops import calculate
135
+
136
+ def process_request(data):
137
+ formatted = format_data(data)
138
+ processor = DataProcessor()
139
+ processed = processor.process(data)
140
+ calc_result = calculate(1, 2)
141
+ return formatted, processed, calc_result
142
+
143
+ # main.py
144
+ from services.processor import process_request
145
+
146
+ def main():
147
+ return process_request("test")
148
+ ```
149
+
150
+ **预期调用关系**:
151
+ | 调用者 | 被调用者 | 类型 |
152
+ |-------|---------|------|
153
+ | `main.main` | `services.processor.process_request` | 跨模块函数 |
154
+ | `services.processor.process_request` | `utils.helpers.format_data` | 跨模块函数 |
155
+ | `services.processor.process_request` | `utils.math_ops.calculate` | 跨模块函数 |
156
+ | `services.processor.process_request` | `utils.helpers.DataProcessor.process` | 跨模块方法 |
157
+ | `utils.helpers.DataProcessor.process` | `utils.helpers.format_data` | 同模块函数 |
158
+
159
+ **通过标准**:
160
+ - [ ] 所有 5 条调用关系被正确识别
161
+ - [ ] import 语句被正确解析
162
+ - [ ] FQN(完全限定名)构建正确
163
+
164
+ ---
165
+
166
+ #### 场景3:多语言混合项目
167
+ **目的**:验证多语言支持能力
168
+
169
+ **项目结构**:
170
+ ```
171
+ multi_lang_project/
172
+ ├── python_api/
173
+ │ ├── __init__.py
174
+ │ └── api.py
175
+ ├── js_frontend/
176
+ │ └── app.js
177
+ └── rust_core/
178
+ └── lib.rs
179
+ ```
180
+
181
+ **测试重点**:
182
+ - Python 调用 Python(已验证)
183
+ - JavaScript 函数调用识别
184
+ - Rust 函数调用识别
185
+ - 每种语言的调用解析独立正确
186
+
187
+ **通过标准**:
188
+ - [ ] Python 调用识别率 ≥ 90%
189
+ - [ ] JavaScript 调用识别率 ≥ 85%
190
+ - [ ] Rust 调用识别率 ≥ 80%
191
+
192
+ ---
193
+
194
+ #### 场景4:中型项目(TinyCC 规模)
195
+ **目的**:验证实际项目解析能力
196
+
197
+ **测试对象**:TinyCC 项目(已验证:43 文件,1611 函数)
198
+
199
+ **测试内容**:
200
+ 1. **完整性检查**
201
+ - [ ] 所有文件被解析
202
+ - [ ] 所有函数被识别
203
+ - [ ] 无解析错误导致程序退出
204
+
205
+ 2. **调用关系检查**(抽样)
206
+ - [ ] 随机抽取 20 个函数,验证其调用关系
207
+ - [ ] 验证主要调用链完整
208
+
209
+ 3. **性能检查**
210
+ - [ ] 解析时间 ≤ 5 秒
211
+ - [ ] 内存占用 ≤ 2GB
212
+
213
+ ---
214
+
215
+ #### 场景5:复杂调用模式
216
+ **目的**:验证复杂调用模式的识别
217
+
218
+ **测试代码示例**:
219
+ ```python
220
+ # 链式调用
221
+ result = obj.a().b().c()
222
+
223
+ # 嵌套调用
224
+ result = outer(inner(data))
225
+
226
+ # 高阶函数
227
+ results = map(process, items)
228
+ filtered = filter(lambda x: x > 0, data)
229
+
230
+ # 条件调用
231
+ result = obj.method() if condition else other_method()
232
+
233
+ # 动态调用(可选,可能无法完全支持)
234
+ method = getattr(obj, method_name)
235
+ method()
236
+ ```
237
+
238
+ **通过标准**:
239
+ - [ ] 链式调用识别率 ≥ 80%
240
+ - [ ] 嵌套调用识别率 ≥ 90%
241
+ - [ ] 高阶函数调用识别率 ≥ 70%
242
+
243
+ ---
244
+
245
+ #### 场景6:边界情况测试
246
+ **目的**:验证边界情况的处理
247
+
248
+ **测试用例**:
249
+
250
+ | 用例 | 代码示例 | 预期行为 |
251
+ |------|---------|---------|
252
+ | 短函数名 | `def a(): pass` → `a()` | 正确识别 |
253
+ | 同名函数 | 不同模块的同名函数 | 正确区分 FQN |
254
+ | 递归调用 | `def f(): f()` | 正确识别自调用 |
255
+ | 间接递归 | `def a(): b()` / `def b(): a()` | 正确识别循环调用 |
256
+ | 未定义函数 | `undefined_func()` | 记录但不报错 |
257
+ | 内置函数 | `print()`, `len()` | 可选识别 |
258
+ | 第三方库 | `import numpy` → `numpy.array()` | 记录但不解析 |
259
+
260
+ ---
261
+
262
+ ## 4. 验收检查清单
263
+
264
+ ### 4.1 功能检查清单
265
+
266
+ #### 解析功能
267
+ - [ ] 单文件项目解析通过(场景1)
268
+ - [ ] 跨文件调用解析通过(场景2)
269
+ - [ ] 多语言项目解析通过(场景3)
270
+ - [ ] TinyCC 项目完整解析通过(场景4)
271
+ - [ ] 复杂调用模式识别通过(场景5)
272
+ - [ ] 边界情况处理通过(场景6)
273
+
274
+ #### 调用类型支持
275
+ - [ ] 直接函数调用识别
276
+ - [ ] 方法调用识别
277
+ - [ ] 跨文件函数调用解析
278
+ - [ ] 链式调用识别
279
+ - [ ] 静态方法调用识别
280
+ - [ ] 构造函数调用识别
281
+
282
+ #### 数据正确性
283
+ - [ ] 节点数量与实际代码一致
284
+ - [ ] 关系数量与预期一致
285
+ - [ ] FQN 格式正确
286
+ - [ ] 行号信息准确
287
+
288
+ ### 4.2 性能检查清单
289
+
290
+ - [ ] TinyCC 项目解析时间 ≤ 5 秒
291
+ - [ ] TinyCC 项目内存占用 ≤ 2GB
292
+ - [ ] 数据库批量写入正常
293
+ - [ ] 无内存泄漏(连续解析 3 次内存稳定)
294
+
295
+ ### 4.3 代码质量检查清单
296
+
297
+ - [ ] 所有单元测试通过
298
+ - [ ] 代码覆盖率 ≥ 80%
299
+ - [ ] 类型检查通过(ty)
300
+ - [ ] 代码风格检查通过(ruff)
301
+ - [ ] 无 `Any` 类型使用
302
+ - [ ] 无 `cast()` 使用
303
+
304
+ ---
305
+
306
+ ## 5. "阶段二完成"定义
307
+
308
+ ### 5.1 必须完成项(阻塞项)
309
+
310
+ 以下所有项必须完成,阶段二才算完成:
311
+
312
+ | 序号 | 完成项 | 验证方法 |
313
+ |------|--------|---------|
314
+ | 1 | 调用处理器完整移植 | 代码审查 |
315
+ | 2 | 调用解析器完整移植 | 代码审查 |
316
+ | 3 | 类型推断引擎完整移植 | 代码审查 |
317
+ | 4 | 导入处理器完整移植 | 代码审查 |
318
+ | 5 | 场景1测试通过 | 执行测试 |
319
+ | 6 | 场景2测试通过 | 执行测试 |
320
+ | 7 | TinyCC 项目解析通过 | 执行测试 |
321
+ | 8 | 调用识别率 ≥ 90% | 测试报告 |
322
+ | 9 | 调用解析准确率 ≥ 85% | 测试报告 |
323
+ | 10 | 单元测试覆盖率 ≥ 80% | 覆盖率报告 |
324
+
325
+ ### 5.2 建议完成项(非阻塞)
326
+
327
+ 以下项建议完成,但不阻塞阶段二完成:
328
+
329
+ - [ ] 多语言混合项目测试(场景3)
330
+ - [ ] 复杂调用模式测试(场景5)
331
+ - [ ] 边界情况全面测试(场景6)
332
+ - [ ] 性能优化达到目标值
333
+ - [ ] 完整文档编写
334
+
335
+ ### 5.3 阶段二完成签字
336
+
337
+ | 角色 | 签字 | 日期 |
338
+ |------|------|------|
339
+ | 技术负责人 | | |
340
+ | 质量保障 | | |
341
+ | 产品经理 | | |
342
+
343
+ ---
344
+
345
+ ## 6. 测试执行命令
346
+
347
+ ```bash
348
+ # 1. 运行所有单元测试
349
+ cd /Users/jiaojeremy/CodeFile/code-graph-rag
350
+ uv run pytest code_graph_builder/tests/ -v
351
+
352
+ # 2. 运行特定测试场景
353
+ uv run pytest code_graph_builder/tests/test_call_processor.py -v
354
+ uv run pytest code_graph_builder/tests/test_call_resolver.py -v
355
+
356
+ # 3. 运行覆盖率测试
357
+ uv run pytest code_graph_builder/tests/ --cov=code_graph_builder --cov-report=html
358
+
359
+ # 4. 代码质量检查
360
+ uv run ruff check code_graph_builder/
361
+ uv run ruff format --check code_graph_builder/
362
+ uv run ty code_graph_builder/
363
+
364
+ # 5. TinyCC 项目解析测试
365
+ python -c "
366
+ from code_graph_builder import CodeGraphBuilder
367
+ builder = CodeGraphBuilder('/path/to/tinycc')
368
+ result = builder.build_graph(clean=True)
369
+ print(f'Nodes: {result.nodes_created}')
370
+ print(f'Functions: {result.functions_found}')
371
+ print(f'Relationships: {result.relationships_created}')
372
+ "
373
+ ```
374
+
375
+ ---
376
+
377
+ ## 7. 附录
378
+
379
+ ### 7.1 术语定义
380
+
381
+ | 术语 | 定义 |
382
+ |------|------|
383
+ | FQN | Fully Qualified Name,完全限定名,如 `project.module.Class.method` |
384
+ | IIFE | Immediately Invoked Function Expression,立即执行函数表达式 |
385
+ | CALLS 关系 | 图中表示函数调用关系的关系类型 |
386
+ | 调用识别率 | 识别出的调用数 / 实际存在的调用数 |
387
+ | 调用解析准确率 | 正确解析目标函数的调用 / 总解析调用数 |
388
+
389
+ ### 7.2 参考文件
390
+
391
+ - `/Users/jiaojeremy/CodeFile/code-graph-rag/PORTING_TASKS.md` - 移植任务总览
392
+ - `/Users/jiaojeremy/CodeFile/code-graph-rag/code_graph_builder/PORTING_CHANGES.md` - 移植变更记录
393
+ - `/Users/jiaojeremy/CodeFile/code-graph-rag/codebase_rag/tests/test_call_processor.py` - 原项目调用处理器测试
394
+ - `/Users/jiaojeremy/CodeFile/code-graph-rag/codebase_rag/tests/test_call_resolver.py` - 原项目调用解析器测试
395
+ - `/Users/jiaojeremy/CodeFile/code-graph-rag/codebase_rag/tests/test_complex_cross_file_calls.py` - 原项目跨文件调用测试
396
+
397
+ ---
398
+
399
+ *文档版本:1.0*
400
+ *创建日期:2026-02-21*
401
+ *适用阶段:阶段二验收*
@@ -0,0 +1 @@
1
+ """Tests for code_graph_builder."""