jarvis-ai-assistant 0.3.30__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +458 -152
  3. jarvis/jarvis_agent/agent_manager.py +17 -13
  4. jarvis/jarvis_agent/builtin_input_handler.py +2 -6
  5. jarvis/jarvis_agent/config_editor.py +2 -7
  6. jarvis/jarvis_agent/event_bus.py +82 -12
  7. jarvis/jarvis_agent/file_context_handler.py +329 -0
  8. jarvis/jarvis_agent/file_methodology_manager.py +3 -4
  9. jarvis/jarvis_agent/jarvis.py +628 -55
  10. jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
  11. jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
  12. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
  13. jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
  14. jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
  15. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
  16. jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
  17. jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
  18. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
  19. jarvis/jarvis_agent/language_support_info.py +486 -0
  20. jarvis/jarvis_agent/main.py +34 -10
  21. jarvis/jarvis_agent/memory_manager.py +7 -16
  22. jarvis/jarvis_agent/methodology_share_manager.py +10 -16
  23. jarvis/jarvis_agent/prompt_manager.py +1 -1
  24. jarvis/jarvis_agent/prompts.py +193 -171
  25. jarvis/jarvis_agent/protocols.py +8 -12
  26. jarvis/jarvis_agent/run_loop.py +105 -9
  27. jarvis/jarvis_agent/session_manager.py +2 -3
  28. jarvis/jarvis_agent/share_manager.py +20 -22
  29. jarvis/jarvis_agent/shell_input_handler.py +1 -2
  30. jarvis/jarvis_agent/stdio_redirect.py +295 -0
  31. jarvis/jarvis_agent/task_analyzer.py +31 -6
  32. jarvis/jarvis_agent/task_manager.py +11 -27
  33. jarvis/jarvis_agent/tool_executor.py +2 -3
  34. jarvis/jarvis_agent/tool_share_manager.py +12 -24
  35. jarvis/jarvis_agent/utils.py +5 -1
  36. jarvis/jarvis_agent/web_bridge.py +189 -0
  37. jarvis/jarvis_agent/web_output_sink.py +53 -0
  38. jarvis/jarvis_agent/web_server.py +786 -0
  39. jarvis/jarvis_c2rust/__init__.py +26 -0
  40. jarvis/jarvis_c2rust/cli.py +575 -0
  41. jarvis/jarvis_c2rust/collector.py +250 -0
  42. jarvis/jarvis_c2rust/constants.py +26 -0
  43. jarvis/jarvis_c2rust/library_replacer.py +1254 -0
  44. jarvis/jarvis_c2rust/llm_module_agent.py +1272 -0
  45. jarvis/jarvis_c2rust/loaders.py +207 -0
  46. jarvis/jarvis_c2rust/models.py +28 -0
  47. jarvis/jarvis_c2rust/optimizer.py +2157 -0
  48. jarvis/jarvis_c2rust/scanner.py +1681 -0
  49. jarvis/jarvis_c2rust/transpiler.py +2983 -0
  50. jarvis/jarvis_c2rust/utils.py +385 -0
  51. jarvis/jarvis_code_agent/build_validation_config.py +132 -0
  52. jarvis/jarvis_code_agent/code_agent.py +1371 -220
  53. jarvis/jarvis_code_agent/code_analyzer/__init__.py +65 -0
  54. jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +106 -0
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +74 -0
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +72 -0
  60. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +70 -0
  61. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +53 -0
  62. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +47 -0
  63. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +61 -0
  64. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +110 -0
  65. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +154 -0
  66. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +110 -0
  67. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +153 -0
  68. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
  69. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +648 -0
  70. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
  71. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
  72. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
  73. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
  74. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
  75. jarvis/jarvis_code_agent/code_analyzer/language_support.py +110 -0
  76. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +49 -0
  77. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +299 -0
  78. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +215 -0
  79. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
  80. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
  81. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +269 -0
  82. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +281 -0
  83. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
  84. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +605 -0
  85. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
  86. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +252 -0
  87. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +58 -0
  88. jarvis/jarvis_code_agent/lint.py +501 -8
  89. jarvis/jarvis_code_agent/utils.py +141 -0
  90. jarvis/jarvis_code_analysis/code_review.py +493 -584
  91. jarvis/jarvis_data/config_schema.json +128 -12
  92. jarvis/jarvis_git_squash/main.py +4 -5
  93. jarvis/jarvis_git_utils/git_commiter.py +82 -75
  94. jarvis/jarvis_mcp/sse_mcp_client.py +22 -29
  95. jarvis/jarvis_mcp/stdio_mcp_client.py +12 -13
  96. jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
  97. jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
  98. jarvis/jarvis_methodology/main.py +32 -48
  99. jarvis/jarvis_multi_agent/__init__.py +287 -55
  100. jarvis/jarvis_multi_agent/main.py +36 -4
  101. jarvis/jarvis_platform/base.py +524 -202
  102. jarvis/jarvis_platform/human.py +7 -8
  103. jarvis/jarvis_platform/kimi.py +30 -36
  104. jarvis/jarvis_platform/openai.py +88 -25
  105. jarvis/jarvis_platform/registry.py +26 -10
  106. jarvis/jarvis_platform/tongyi.py +24 -25
  107. jarvis/jarvis_platform/yuanbao.py +32 -43
  108. jarvis/jarvis_platform_manager/main.py +66 -77
  109. jarvis/jarvis_platform_manager/service.py +8 -13
  110. jarvis/jarvis_rag/cli.py +53 -55
  111. jarvis/jarvis_rag/embedding_manager.py +13 -18
  112. jarvis/jarvis_rag/llm_interface.py +8 -9
  113. jarvis/jarvis_rag/query_rewriter.py +10 -21
  114. jarvis/jarvis_rag/rag_pipeline.py +24 -27
  115. jarvis/jarvis_rag/reranker.py +4 -5
  116. jarvis/jarvis_rag/retriever.py +28 -30
  117. jarvis/jarvis_sec/__init__.py +305 -0
  118. jarvis/jarvis_sec/agents.py +143 -0
  119. jarvis/jarvis_sec/analysis.py +276 -0
  120. jarvis/jarvis_sec/checkers/__init__.py +32 -0
  121. jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
  122. jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
  123. jarvis/jarvis_sec/cli.py +139 -0
  124. jarvis/jarvis_sec/clustering.py +1439 -0
  125. jarvis/jarvis_sec/file_manager.py +427 -0
  126. jarvis/jarvis_sec/parsers.py +73 -0
  127. jarvis/jarvis_sec/prompts.py +268 -0
  128. jarvis/jarvis_sec/report.py +336 -0
  129. jarvis/jarvis_sec/review.py +453 -0
  130. jarvis/jarvis_sec/status.py +264 -0
  131. jarvis/jarvis_sec/types.py +20 -0
  132. jarvis/jarvis_sec/utils.py +499 -0
  133. jarvis/jarvis_sec/verification.py +848 -0
  134. jarvis/jarvis_sec/workflow.py +226 -0
  135. jarvis/jarvis_smart_shell/main.py +38 -87
  136. jarvis/jarvis_stats/cli.py +2 -2
  137. jarvis/jarvis_stats/stats.py +8 -8
  138. jarvis/jarvis_stats/storage.py +15 -21
  139. jarvis/jarvis_stats/visualizer.py +1 -1
  140. jarvis/jarvis_tools/clear_memory.py +3 -20
  141. jarvis/jarvis_tools/cli/main.py +21 -23
  142. jarvis/jarvis_tools/edit_file.py +1019 -132
  143. jarvis/jarvis_tools/execute_script.py +83 -25
  144. jarvis/jarvis_tools/file_analyzer.py +6 -9
  145. jarvis/jarvis_tools/generate_new_tool.py +14 -21
  146. jarvis/jarvis_tools/lsp_client.py +1552 -0
  147. jarvis/jarvis_tools/methodology.py +2 -3
  148. jarvis/jarvis_tools/read_code.py +1736 -35
  149. jarvis/jarvis_tools/read_symbols.py +140 -0
  150. jarvis/jarvis_tools/read_webpage.py +12 -13
  151. jarvis/jarvis_tools/registry.py +427 -200
  152. jarvis/jarvis_tools/retrieve_memory.py +20 -19
  153. jarvis/jarvis_tools/rewrite_file.py +72 -158
  154. jarvis/jarvis_tools/save_memory.py +3 -15
  155. jarvis/jarvis_tools/search_web.py +18 -18
  156. jarvis/jarvis_tools/sub_agent.py +36 -43
  157. jarvis/jarvis_tools/sub_code_agent.py +25 -26
  158. jarvis/jarvis_tools/virtual_tty.py +55 -33
  159. jarvis/jarvis_utils/clipboard.py +7 -10
  160. jarvis/jarvis_utils/config.py +232 -45
  161. jarvis/jarvis_utils/embedding.py +8 -5
  162. jarvis/jarvis_utils/fzf.py +8 -8
  163. jarvis/jarvis_utils/git_utils.py +225 -36
  164. jarvis/jarvis_utils/globals.py +3 -3
  165. jarvis/jarvis_utils/http.py +1 -1
  166. jarvis/jarvis_utils/input.py +99 -48
  167. jarvis/jarvis_utils/jsonnet_compat.py +465 -0
  168. jarvis/jarvis_utils/methodology.py +52 -48
  169. jarvis/jarvis_utils/utils.py +819 -491
  170. jarvis_ai_assistant-0.7.6.dist-info/METADATA +600 -0
  171. jarvis_ai_assistant-0.7.6.dist-info/RECORD +218 -0
  172. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/entry_points.txt +4 -0
  173. jarvis/jarvis_agent/config.py +0 -92
  174. jarvis/jarvis_agent/edit_file_handler.py +0 -296
  175. jarvis/jarvis_platform/ai8.py +0 -332
  176. jarvis/jarvis_tools/ask_user.py +0 -54
  177. jarvis_ai_assistant-0.3.30.dist-info/METADATA +0 -381
  178. jarvis_ai_assistant-0.3.30.dist-info/RECORD +0 -137
  179. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/WHEEL +0 -0
  180. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/licenses/LICENSE +0 -0
  181. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,33 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import os
3
- from typing import Any, Dict
3
+ import time
4
+ from typing import Any, Dict, List
4
5
 
5
- from jarvis.jarvis_utils.output import OutputType, PrettyOutput
6
+ from jarvis.jarvis_utils.config import get_max_input_token_count
7
+ from jarvis.jarvis_utils.embedding import get_context_token_count
8
+
9
+ # 尝试导入语言支持模块
10
+ try:
11
+ from jarvis.jarvis_code_agent.code_analyzer.language_support import (
12
+ detect_language,
13
+ get_dependency_analyzer,
14
+ )
15
+ from jarvis.jarvis_code_agent.code_analyzer.structured_code import StructuredCodeExtractor
16
+ LANGUAGE_SUPPORT_AVAILABLE = True
17
+ except ImportError:
18
+ LANGUAGE_SUPPORT_AVAILABLE = False
19
+ def get_dependency_analyzer(language: str):
20
+ return None
21
+ StructuredCodeExtractor = None
6
22
 
7
23
 
8
24
  class ReadCodeTool:
9
25
  name = "read_code"
10
- description = "代码阅读与分析工具,用于读取源代码文件并添加行号,针对代码文件优化,提供更好的格式化输出和行号显示,适用于代码分析、审查和理解代码实现的场景"
26
+ description = (
27
+ "结构化读取源代码文件。"
28
+ "支持的语言按语法单元(函数、类等)读取;不支持的语言按空白行分组;"
29
+ "raw_mode=true 时按每20行分组读取。"
30
+ )
11
31
  # 工具标签
12
32
  parameters = {
13
33
  "type": "object",
@@ -20,17 +40,667 @@ class ReadCodeTool:
20
40
  "path": {"type": "string"},
21
41
  "start_line": {"type": "number", "default": 1},
22
42
  "end_line": {"type": "number", "default": -1},
43
+ "raw_mode": {"type": "boolean", "default": False},
23
44
  },
24
45
  "required": ["path"],
25
46
  },
26
- "description": "要读取的文件列表",
47
+ "description": "要读取的文件列表,每个文件可指定行号范围(start_line 到 end_line,-1 表示文件末尾)。raw_mode为true时按每20行分组读取(原始模式)。",
27
48
  }
28
49
  },
29
50
  "required": ["files"],
30
51
  }
31
-
52
+
53
+ def _extract_syntax_units(
54
+ self, filepath: str, content: str, start_line: int, end_line: int
55
+ ) -> List[Dict[str, Any]]:
56
+ """提取语法单元(函数、类等)
57
+
58
+ Args:
59
+ filepath: 文件路径
60
+ content: 文件内容
61
+ start_line: 起始行号
62
+ end_line: 结束行号
63
+
64
+ Returns:
65
+ 语法单元列表,每个单元包含 id, start_line, end_line, content
66
+ """
67
+ if StructuredCodeExtractor:
68
+ return StructuredCodeExtractor.extract_syntax_units(filepath, content, start_line, end_line)
69
+ return []
70
+
71
+ def _extract_syntax_units_with_split(
72
+ self, filepath: str, content: str, start_line: int, end_line: int
73
+ ) -> List[Dict[str, Any]]:
74
+ """提取语法单元,然后对超过50行的单元进行二级切分:
75
+ 1. 先按连续空白行切分大块
76
+ 2. 如果子块仍然超过50行,再按固定行数(50行一组)切分
77
+
78
+ Args:
79
+ filepath: 文件路径
80
+ content: 文件内容
81
+ start_line: 起始行号
82
+ end_line: 结束行号
83
+
84
+ Returns:
85
+ 语法单元列表,每个单元不超过50行
86
+ """
87
+ # 先获取语法单元(仅在支持语法解析的语言中才会返回非空)
88
+ syntax_units = self._extract_syntax_units(filepath, content, start_line, end_line)
89
+
90
+ if not syntax_units:
91
+ return []
92
+
93
+ result = []
94
+ for unit in syntax_units:
95
+ unit_line_count = unit['end_line'] - unit['start_line'] + 1
96
+ if unit_line_count > 50:
97
+ # 第一步:对大块先按空白行切分(基于 StructuredCodeExtractor)
98
+ blank_groups = self._extract_blank_line_groups(
99
+ content, unit['start_line'], unit['end_line']
100
+ )
101
+
102
+ # 如果按空白行切分失败(例如全部为空白或实现返回空),退回原始大块
103
+ if not blank_groups:
104
+ blank_groups = [unit]
105
+
106
+ for group in blank_groups:
107
+ group_line_count = group['end_line'] - group['start_line'] + 1
108
+ if group_line_count > 50:
109
+ # 第二步:对子块中仍然超过50行的部分,按每50行固定切分
110
+ sub_groups = self._extract_line_groups(
111
+ content, group['start_line'], group['end_line'], group_size=50
112
+ )
113
+ result.extend(sub_groups)
114
+ else:
115
+ # 经过空白行切分得到的中等大小块,直接加入结果
116
+ result.append(group)
117
+ else:
118
+ # 如果单元不超过50行,直接添加
119
+ result.append(unit)
120
+
121
+ return result
122
+
123
+ def _extract_blank_line_groups(
124
+ self, content: str, start_line: int, end_line: int
125
+ ) -> List[Dict[str, Any]]:
126
+ """按空白行分组提取内容(委托给StructuredCodeExtractor)"""
127
+ if StructuredCodeExtractor:
128
+ return StructuredCodeExtractor.extract_blank_line_groups(content, start_line, end_line)
129
+ return []
130
+
131
+ def _extract_blank_line_groups_with_split(
132
+ self, content: str, start_line: int, end_line: int
133
+ ) -> List[Dict[str, Any]]:
134
+ """先按空白行分组,然后对超过20行的块再按每20行分割
135
+
136
+ Args:
137
+ content: 文件内容
138
+ start_line: 起始行号
139
+ end_line: 结束行号
140
+
141
+ Returns:
142
+ 分组列表,每个分组包含 id, start_line, end_line, content
143
+ """
144
+ # 先获取空白行分组
145
+ blank_line_groups = self._extract_blank_line_groups(content, start_line, end_line)
146
+
147
+ if not blank_line_groups:
148
+ return []
149
+
150
+ result = []
151
+ for group in blank_line_groups:
152
+ group_line_count = group['end_line'] - group['start_line'] + 1
153
+ if group_line_count > 20:
154
+ # 如果块超过20行,按每20行分割
155
+ sub_groups = self._extract_line_groups(
156
+ content, group['start_line'], group['end_line'], group_size=20
157
+ )
158
+ result.extend(sub_groups)
159
+ else:
160
+ # 如果块不超过20行,直接添加
161
+ result.append(group)
162
+
163
+ return result
164
+
165
+ def _extract_line_groups(
166
+ self, content: str, start_line: int, end_line: int, group_size: int = 20
167
+ ) -> List[Dict[str, Any]]:
168
+ """按行号分组提取内容(委托给StructuredCodeExtractor)"""
169
+ if StructuredCodeExtractor:
170
+ return StructuredCodeExtractor.extract_line_groups(content, start_line, end_line, group_size)
171
+ return []
172
+
173
+ def _ensure_unique_ids(self, units: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
174
+ """确保单元列表中所有id唯一(委托给StructuredCodeExtractor)"""
175
+ if StructuredCodeExtractor:
176
+ return StructuredCodeExtractor.ensure_unique_ids(units)
177
+ return units
178
+
179
+ def _extract_imports(self, filepath: str, content: str, start_line: int, end_line: int) -> List[Dict[str, Any]]:
180
+ """提取文件的导入/包含语句作为结构化单元(委托给StructuredCodeExtractor)"""
181
+ if StructuredCodeExtractor:
182
+ return StructuredCodeExtractor.extract_imports(filepath, content, start_line, end_line)
183
+ return []
184
+
185
+ def _create_import_unit(self, import_group: List[Dict[str, Any]]) -> Dict[str, Any]:
186
+ """创建导入语句单元(委托给StructuredCodeExtractor)"""
187
+ if StructuredCodeExtractor:
188
+ return StructuredCodeExtractor.create_import_unit(import_group)
189
+ return {}
190
+
191
+ def _format_structured_output(
192
+ self, filepath: str, units: List[Dict[str, Any]], total_lines: int, agent: Any = None
193
+ ) -> str:
194
+ """格式化结构化输出
195
+
196
+ Args:
197
+ filepath: 文件路径
198
+ units: 语法单元或行号分组列表(已包含导入语句单元)
199
+ total_lines: 文件总行数
200
+ agent: Agent实例,用于从缓存中获取block_id
201
+
202
+ Returns:
203
+ 格式化后的输出字符串
204
+ """
205
+ # 文件开始分界符
206
+ output_lines = [
207
+ "=" * 80,
208
+ f"🔍 文件: {filepath}",
209
+ f"📄 总行数: {total_lines}",
210
+ f"📦 结构化单元数: {len(units)}",
211
+ "=" * 80,
212
+ "",
213
+ ]
214
+
215
+ # 为每个单元分配block-id
216
+ # 如果unit已经有block_id(从缓存中获取),直接使用;否则按顺序生成
217
+ for idx, unit in enumerate(units, start=1):
218
+ # 如果unit已经有block_id,直接使用(在生成structured_units时已分配)
219
+ block_id = unit.get('block_id')
220
+ if not block_id:
221
+ # 否则按顺序生成临时id
222
+ block_id = f"block-{idx}"
223
+ # 显示id
224
+ output_lines.append(f"[id:{block_id}]")
225
+ # 添加内容,保持原有缩进,并为每行添加行号
226
+ content = unit.get('content', '')
227
+ if content:
228
+ # 获取单元的起始行号
229
+ start_line = unit.get('start_line', 1)
230
+ # 将内容按行分割
231
+ content_lines = content.split('\n')
232
+ # 为每一行添加行号(右对齐,4位,不足补空格)
233
+ numbered_lines = []
234
+ current_line = start_line
235
+ for line in content_lines:
236
+ # 行号右对齐,占4位
237
+ line_number_str = f"{current_line:4d}"
238
+ numbered_lines.append(f"{line_number_str}:{line}")
239
+ current_line += 1
240
+ # 将带行号的内容添加到输出
241
+ output_lines.append('\n'.join(numbered_lines))
242
+ # 块结束分界符
243
+ output_lines.append("-" * 80)
244
+ output_lines.append("") # 单元之间空行分隔
245
+
246
+ # 文件结束分界符
247
+ output_lines.append("=" * 80)
248
+ output_lines.append("")
249
+
250
+ return '\n'.join(output_lines)
251
+
252
+ def _get_file_cache(self, agent: Any, filepath: str) -> Dict[str, Any]:
253
+ """获取文件的缓存信息
254
+
255
+ Args:
256
+ agent: Agent实例
257
+ filepath: 文件路径
258
+
259
+ Returns:
260
+ 缓存信息字典,如果不存在则返回None
261
+ """
262
+ if not agent:
263
+ return None
264
+
265
+ cache = agent.get_user_data("read_code_cache")
266
+ if not cache:
267
+ return None
268
+
269
+ abs_path = os.path.abspath(filepath)
270
+ return cache.get(abs_path)
271
+
272
+ def _get_blocks_from_cache(self, cache_info: Dict[str, Any], start_line: int, end_line: int) -> List[Dict[str, Any]]:
273
+ """从缓存中获取对应范围的blocks
274
+
275
+ Args:
276
+ cache_info: 缓存信息
277
+ start_line: 起始行号(1-based)
278
+ end_line: 结束行号(1-based,-1表示文件末尾)
279
+
280
+ Returns:
281
+ blocks列表,每个block包含block_id和content
282
+ """
283
+ if not cache_info or "id_list" not in cache_info or "blocks" not in cache_info:
284
+ return []
285
+
286
+ id_list = cache_info.get("id_list", [])
287
+ blocks = cache_info.get("blocks", {})
288
+ result = []
289
+
290
+ # 如果end_line是-1,表示文件末尾,需要先计算文件总行数
291
+ if end_line == -1:
292
+ # 先遍历所有blocks计算总行数
293
+ # 注意:块内容不包含末尾换行符,块之间需要添加换行符
294
+ total_lines = 0
295
+ for idx, block_id in enumerate(id_list):
296
+ block_data = blocks.get(block_id)
297
+ if block_data:
298
+ block_content = block_data.get("content", "")
299
+ if block_content:
300
+ # 块内容中的换行符数量 + 1 = 行数
301
+ block_line_count = block_content.count('\n') + 1
302
+ total_lines += block_line_count
303
+ # 如果不是最后一个块,块之间有一个换行符分隔(已计入下一个块的第一行)
304
+ # 所以不需要额外添加
305
+ end_line = total_lines
306
+
307
+ # 通过前面blocks的内容推算每个block的行号范围
308
+ # 注意:块内容不包含末尾换行符,块之间需要添加换行符
309
+ current_line = 1 # 从第1行开始
310
+
311
+ for idx, block_id in enumerate(id_list):
312
+ block_data = blocks.get(block_id)
313
+ if not block_data:
314
+ continue
315
+ block_content = block_data.get("content", "")
316
+ if not block_content:
317
+ continue
318
+
319
+ # 计算这个block的行数
320
+ # 块内容中的换行符数量 + 1 = 行数(因为块内容不包含末尾换行符)
321
+ block_line_count = block_content.count('\n') + 1
322
+
323
+ block_start_line = current_line
324
+ block_end_line = current_line + block_line_count - 1
325
+
326
+ # block与请求范围有重叠就包含
327
+ if block_end_line >= start_line and block_start_line <= end_line:
328
+ result.append({
329
+ "block_id": block_id,
330
+ "content": block_content,
331
+ "start_line": block_start_line,
332
+ })
333
+
334
+ # 更新当前行号
335
+ # 块之间有一个换行符分隔,所以下一个块从 block_end_line + 1 开始
336
+ current_line = block_end_line + 1
337
+
338
+ # 如果已经超过请求的结束行,可以提前退出
339
+ if block_start_line > end_line:
340
+ break
341
+
342
+ return result
343
+
344
+ def _convert_units_to_sequential_ids(self, units: List[Dict[str, Any]], full_content: str = None) -> Dict[str, Any]:
345
+ """将单元列表转换为缓存格式(id_list和blocks字典)
346
+
347
+ 按照行号范围分割文件,不区分语法单元,确保完美恢复。
348
+
349
+ Args:
350
+ units: 结构化单元列表,每个单元包含 id, start_line, end_line, content
351
+ full_content: 完整的文件内容(可选),用于确保块之间的空白行也被包含
352
+
353
+ Returns:
354
+ 包含 id_list 和 blocks 的字典:
355
+ - id_list: 有序的id列表,如 ["block-1", "block-2", "block-3"]
356
+ - blocks: id到块信息的字典,如 {"block-1": {"content": "..."}, ...}
357
+ """
358
+ if not full_content or not units:
359
+ # 没有完整内容,直接使用原始的content
360
+ sorted_original = sorted(units, key=lambda u: u.get('start_line', 0))
361
+ id_list = []
362
+ blocks = {}
363
+ for unit in sorted_original:
364
+ block_id = f"block-{len(id_list) + 1}" # block-1, block-2, ...
365
+ id_list.append(block_id)
366
+ content = unit.get('content', '')
367
+ # 去掉块末尾的换行符
368
+ if content.endswith('\n'):
369
+ content = content[:-1]
370
+ blocks[block_id] = {
371
+ "content": content,
372
+ }
373
+ return {
374
+ "id_list": id_list,
375
+ "blocks": blocks,
376
+ "file_ends_with_newline": False, # 无法确定,默认False
377
+ }
378
+
379
+ # 收集所有单元的开始行号作为分割点
380
+ # 关键:直接使用每个单元的start_line,不合并范围,保留语法单元边界
381
+ split_points_set = {1} # 从第1行开始
382
+ for unit in units:
383
+ start_line = unit.get('start_line', 1)
384
+ if start_line > 0:
385
+ split_points_set.add(start_line)
386
+
387
+ if not split_points_set:
388
+ # 没有有效的分割点,返回空列表
389
+ return {"id_list": [], "blocks": {}, "file_ends_with_newline": False}
390
+
391
+ # 按照每个单元的开始行作为分割点,连续分割文件内容
392
+ # 每个块包含从当前分割点到下一个分割点之前的所有内容
393
+ # 关键:直接按行号范围从原始内容中提取,确保完美恢复(包括文件末尾的换行符和所有空白行)
394
+ # 使用 split('\n') 分割,然后手动为每行添加换行符(除了最后一行,根据原始文件决定)
395
+ lines = full_content.split('\n')
396
+ result_units = []
397
+
398
+ # 排序分割点
399
+ split_points = sorted(split_points_set)
400
+ split_points.append(len(lines) + 1) # 文件末尾
401
+
402
+ # 按照分割点连续分割文件
403
+ # 注意:如果文件以换行符结尾,split('\n')会在末尾产生一个空字符串
404
+ # 我们需要正确处理这种情况
405
+ file_ends_with_newline = full_content.endswith('\n')
406
+
407
+ for idx in range(len(split_points) - 1):
408
+ start_line = split_points[idx] # 1-based
409
+ next_start_line = split_points[idx + 1] # 1-based
410
+
411
+ # 提取从当前分割点到下一个分割点之前的所有内容
412
+ unit_start_idx = max(0, start_line - 1) # 0-based索引
413
+ unit_end_idx = min(len(lines) - 1, next_start_line - 2) # 0-based索引,下一个分割点之前
414
+
415
+ # 确保索引有效
416
+ if unit_start_idx <= unit_end_idx:
417
+ # 提取行并重新组合,确保保留所有换行符
418
+ extracted_lines = lines[unit_start_idx:unit_end_idx + 1]
419
+
420
+ # 重新组合:每行后面添加换行符
421
+ # 对于非最后一个块,最后一行也需要换行符,因为下一个块从下一行开始
422
+ # 对于最后一个块,根据原始文件是否以换行符结尾来决定
423
+ full_unit_content_parts = []
424
+ is_last_block = (idx == len(split_points) - 2)
425
+
426
+ for i, line in enumerate(extracted_lines):
427
+ if i < len(extracted_lines) - 1:
428
+ # 不是最后一行,添加换行符
429
+ full_unit_content_parts.append(line + '\n')
430
+ else:
431
+ # 最后一行
432
+ if not is_last_block:
433
+ # 非最后一个块:最后一行必须添加换行符,因为下一个块从下一行开始
434
+ # 这样可以保留块之间的空白行
435
+ full_unit_content_parts.append(line + '\n')
436
+ else:
437
+ # 最后一个块:需要特殊处理
438
+ # 如果文件以换行符结尾,且最后一行是空字符串(来自split('\n')的副作用),
439
+ # 且不是唯一的一行,那么前面的行已经输出了换行符,这里不需要再输出
440
+ if file_ends_with_newline and line == '' and len(extracted_lines) > 1:
441
+ # 最后一行是空字符串且来自trailing newline,且不是唯一的一行
442
+ # 前面的行已经输出了换行符,所以这里不需要再输出任何内容
443
+ # 空字符串表示不输出任何内容
444
+ full_unit_content_parts.append('')
445
+ elif file_ends_with_newline:
446
+ # 文件以换行符结尾,最后一行需要换行符
447
+ full_unit_content_parts.append(line + '\n')
448
+ else:
449
+ # 文件不以换行符结尾
450
+ full_unit_content_parts.append(line)
451
+
452
+ full_unit_content = ''.join(full_unit_content_parts)
453
+
454
+ # 去掉块末尾的换行符(存储时去掉,恢复时再添加)
455
+ if full_unit_content.endswith('\n'):
456
+ full_unit_content = full_unit_content[:-1]
457
+
458
+ block_id = f"block-{len(result_units) + 1}" # block-1, block-2, ...
459
+ result_units.append({
460
+ "id": block_id,
461
+ "content": full_unit_content,
462
+ })
463
+
464
+ # 转换为 id_list 和 blocks 格式
465
+ id_list = [unit["id"] for unit in result_units]
466
+ blocks = {
467
+ unit["id"]: {
468
+ "content": unit["content"],
469
+ }
470
+ for unit in result_units
471
+ }
472
+
473
+ # 保存文件是否以换行符结尾的信息(用于恢复时正确处理)
474
+ file_ends_with_newline = full_content.endswith('\n')
475
+
476
+ return {
477
+ "id_list": id_list,
478
+ "blocks": blocks,
479
+ "file_ends_with_newline": file_ends_with_newline,
480
+ }
481
+
482
+ def _save_file_cache(
483
+ self, agent: Any, filepath: str, units: List[Dict[str, Any]],
484
+ total_lines: int, file_mtime: float, full_content: str = None
485
+ ) -> None:
486
+ """保存文件的结构化信息到缓存
487
+
488
+ Args:
489
+ agent: Agent实例
490
+ filepath: 文件路径
491
+ units: 结构化单元列表
492
+ total_lines: 文件总行数
493
+ file_mtime: 文件修改时间
494
+ full_content: 完整的文件内容(可选),用于确保块之间的空白行也被包含
495
+ """
496
+ if not agent:
497
+ return
498
+
499
+ cache = agent.get_user_data("read_code_cache")
500
+ if not cache:
501
+ cache = {}
502
+ agent.set_user_data("read_code_cache", cache)
503
+
504
+ abs_path = os.path.abspath(filepath)
505
+
506
+ # 转换为 id_list 和 blocks 格式
507
+ cache_data = self._convert_units_to_sequential_ids(units, full_content)
508
+
509
+ cache[abs_path] = {
510
+ "id_list": cache_data["id_list"],
511
+ "blocks": cache_data["blocks"],
512
+ "total_lines": total_lines,
513
+ "read_time": time.time(),
514
+ "file_mtime": file_mtime,
515
+ "file_ends_with_newline": cache_data.get("file_ends_with_newline", False),
516
+ }
517
+ agent.set_user_data("read_code_cache", cache)
518
+
519
+ def _is_cache_valid(self, cache_info: Dict[str, Any], filepath: str) -> bool:
520
+ """检查缓存是否有效
521
+
522
+ Args:
523
+ cache_info: 缓存信息字典
524
+ filepath: 文件路径
525
+
526
+ Returns:
527
+ True表示缓存有效,False表示缓存无效
528
+ """
529
+ if not cache_info:
530
+ return False
531
+
532
+ try:
533
+ # 检查文件是否存在
534
+ if not os.path.exists(filepath):
535
+ return False
536
+
537
+ # 检查文件修改时间是否变化
538
+ current_mtime = os.path.getmtime(filepath)
539
+ cached_mtime = cache_info.get("file_mtime")
540
+
541
+ if cached_mtime is None or abs(current_mtime - cached_mtime) > 0.1: # 允许0.1秒的误差
542
+ return False
543
+
544
+ # 检查缓存数据结构是否完整
545
+ if "id_list" not in cache_info or "blocks" not in cache_info or "total_lines" not in cache_info:
546
+ return False
547
+
548
+ return True
549
+ except Exception:
550
+ return False
551
+
552
+ def _restore_file_from_cache(self, cache_info: Dict[str, Any]) -> str:
553
+ """从缓存恢复文件内容
554
+
555
+ Args:
556
+ cache_info: 缓存信息字典
557
+
558
+ Returns:
559
+ 恢复的文件内容字符串(与原始文件内容完全一致)
560
+ """
561
+ if not cache_info:
562
+ return ""
563
+
564
+ # 按照 id_list 的顺序恢复
565
+ id_list = cache_info.get("id_list", [])
566
+ blocks = cache_info.get("blocks", {})
567
+ file_ends_with_newline = cache_info.get("file_ends_with_newline", False)
568
+
569
+ result = []
570
+ for idx, block_id in enumerate(id_list):
571
+ block = blocks.get(block_id)
572
+ if block:
573
+ content = block.get('content', '')
574
+ if content:
575
+ result.append(content)
576
+ # 在块之间添加换行符(最后一个块后面根据文件是否以换行符结尾决定)
577
+ is_last_block = (idx == len(id_list) - 1)
578
+ if is_last_block:
579
+ # 最后一个块:如果文件以换行符结尾,添加换行符
580
+ if file_ends_with_newline:
581
+ result.append('\n')
582
+ else:
583
+ # 非最后一个块:在块之间添加换行符
584
+ result.append('\n')
585
+
586
+ return ''.join(result) if result else ""
587
+
588
+ def _estimate_structured_tokens(
589
+ self, filepath: str, content: str, start_line: int, end_line: int, total_lines: int, raw_mode: bool = False
590
+ ) -> int:
591
+ """估算结构化输出的token数
592
+
593
+ Args:
594
+ filepath: 文件路径
595
+ content: 文件内容
596
+ start_line: 起始行号
597
+ end_line: 结束行号
598
+ total_lines: 文件总行数
599
+
600
+ Returns:
601
+ 估算的token数
602
+ """
603
+ try:
604
+ if raw_mode:
605
+ # 原始模式:按每20行分组计算token
606
+ line_groups = self._extract_line_groups(content, start_line, end_line, group_size=20)
607
+ if line_groups:
608
+ import_units = self._extract_imports(filepath, content, start_line, end_line)
609
+ all_units = import_units + line_groups[:1]
610
+ # 确保id唯一
611
+ all_units = self._ensure_unique_ids(all_units)
612
+ # 按行号排序
613
+ all_units.sort(key=lambda u: u['start_line'])
614
+ sample_output = self._format_structured_output(filepath, all_units, total_lines)
615
+ if len(line_groups) > 1:
616
+ group_tokens = get_context_token_count(sample_output)
617
+ return group_tokens * len(line_groups)
618
+ else:
619
+ return get_context_token_count(sample_output)
620
+ else:
621
+ # 尝试提取语法单元(确保每个单元不超过50行)
622
+ syntax_units = self._extract_syntax_units_with_split(filepath, content, start_line, end_line)
623
+
624
+ if syntax_units:
625
+ # 使用语法单元结构化输出格式计算token
626
+ import_units = self._extract_imports(filepath, content, start_line, end_line)
627
+ all_units = import_units + syntax_units[:1]
628
+ # 确保id唯一
629
+ all_units = self._ensure_unique_ids(all_units)
630
+ # 按行号排序
631
+ all_units.sort(key=lambda u: u['start_line'])
632
+ sample_output = self._format_structured_output(filepath, all_units, total_lines)
633
+ if len(syntax_units) > 1:
634
+ unit_tokens = get_context_token_count(sample_output)
635
+ return unit_tokens * len(syntax_units)
636
+ else:
637
+ return get_context_token_count(sample_output)
638
+ else:
639
+ # 使用空白行分组格式计算token(不支持语言时)
640
+ # 先按空行分割,然后对超过20行的块再按每20行分割
641
+ line_groups = self._extract_blank_line_groups_with_split(content, start_line, end_line)
642
+ if line_groups:
643
+ import_units = self._extract_imports(filepath, content, start_line, end_line)
644
+ all_units = import_units + line_groups[:1]
645
+ # 确保id唯一
646
+ all_units = self._ensure_unique_ids(all_units)
647
+ # 按行号排序
648
+ all_units.sort(key=lambda u: u['start_line'])
649
+ sample_output = self._format_structured_output(filepath, all_units, total_lines)
650
+ if len(line_groups) > 1:
651
+ group_tokens = get_context_token_count(sample_output)
652
+ return group_tokens * len(line_groups)
653
+ else:
654
+ return get_context_token_count(sample_output)
655
+ else:
656
+ # 回退到原始格式计算
657
+ lines = content.split('\n')
658
+ selected_lines = lines[start_line - 1:end_line]
659
+ numbered_content = "".join(f"{i:5d}:{line}\n" for i, line in enumerate(selected_lines, start=start_line))
660
+ return get_context_token_count(numbered_content)
661
+ except Exception:
662
+ # 如果估算失败,使用简单的行号格式估算
663
+ lines = content.split('\n')
664
+ selected_lines = lines[start_line - 1:end_line]
665
+ numbered_content = "".join(f"{i:5d}:{line}\n" for i, line in enumerate(selected_lines, start=start_line))
666
+ return get_context_token_count(numbered_content)
667
+
668
+ def _get_max_token_limit(self, agent: Any = None) -> int:
669
+ """获取基于剩余token数量的token限制
670
+
671
+ Args:
672
+ agent: Agent实例,用于获取模型和剩余token数量
673
+
674
+ Returns:
675
+ int: 允许的最大token数(剩余token的2/3,或至少保留1/3剩余token)
676
+ """
677
+ try:
678
+ # 优先使用剩余token数量
679
+ if agent and hasattr(agent, "model"):
680
+ try:
681
+ remaining_tokens = agent.model.get_remaining_token_count()
682
+ # 使用剩余token的2/3作为限制,保留1/3作为安全余量
683
+ limit_tokens = int(remaining_tokens * 2 / 3)
684
+ # 确保至少返回一个合理的值
685
+ if limit_tokens > 0:
686
+ return limit_tokens
687
+ except Exception:
688
+ pass
689
+
690
+ # 回退方案:使用输入窗口的2/3
691
+ model_group = None
692
+ if agent:
693
+ model_group = getattr(agent, "model_group", None)
694
+
695
+ max_input_tokens = get_max_input_token_count(model_group)
696
+ # 计算2/3限制的token数
697
+ limit_tokens = int(max_input_tokens * 2 / 3)
698
+ return limit_tokens
699
+ except Exception:
700
+ # 如果获取失败,使用默认值(假设32000 token,2/3是21333)
701
+ return 21333
32
702
  def _handle_single_file(
33
- self, filepath: str, start_line: int = 1, end_line: int = -1, agent: Any = None
703
+ self, filepath: str, start_line: int = 1, end_line: int = -1, agent: Any = None, raw_mode: bool = False
34
704
  ) -> Dict[str, Any]:
35
705
  """处理单个文件的读取操作
36
706
 
@@ -38,6 +708,7 @@ class ReadCodeTool:
38
708
  filepath (str): 文件路径
39
709
  start_line (int): 起始行号,默认为1
40
710
  end_line (int): 结束行号,默认为-1表示文件末尾
711
+ agent: Agent实例,用于获取上下文管理器
41
712
 
42
713
  Returns:
43
714
  Dict[str, Any]: 包含成功状态、输出内容和错误信息的字典
@@ -62,10 +733,9 @@ class ReadCodeTool:
62
733
  }
63
734
 
64
735
  # 读取文件内容
736
+ # 第一遍流式读取,仅统计总行数,避免一次性读入内存
65
737
  with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
66
- lines = f.readlines()
67
-
68
- total_lines = len(lines)
738
+ total_lines = sum(1 for _ in f)
69
739
 
70
740
  # 处理空文件情况
71
741
  if total_lines == 0:
@@ -99,21 +769,220 @@ class ReadCodeTool:
99
769
  "stderr": f"无效的行范围 [{start_line}-{end_line}] (总行数: {total_lines})",
100
770
  }
101
771
 
102
- # 添加行号并构建输出内容
103
- selected_lines = lines[start_line - 1 : end_line]
104
- numbered_content = "".join(
105
- [
106
- f"{i:4d}:{line}"
107
- for i, line in enumerate(selected_lines, start=start_line)
108
- ]
109
- )
772
+ # 获取文件修改时间
773
+ file_mtime = os.path.getmtime(abs_path)
774
+
775
+ # 检查缓存是否有效
776
+ cache_info = self._get_file_cache(agent, abs_path)
777
+ use_cache = self._is_cache_valid(cache_info, abs_path)
110
778
 
111
- # 构建输出格式
112
- output = (
113
- f"\n🔍 文件: {abs_path}\n"
114
- f"📄 原始行号: {start_line}-{end_line} (共{total_lines}行) \n\n"
115
- f"{numbered_content}\n\n"
116
- )
779
+ # 读取完整文件内容用于语法分析和token计算
780
+ if use_cache:
781
+ # 从缓存恢复文件内容
782
+ full_content = self._restore_file_from_cache(cache_info)
783
+ # 如果恢复失败,重新读取文件
784
+ if not full_content:
785
+ with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
786
+ full_content = f.read()
787
+ else:
788
+ # 读取文件内容
789
+ with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
790
+ full_content = f.read()
791
+
792
+ # 读取要读取的行范围内容
793
+ selected_content_lines = []
794
+ lines = full_content.split('\n')
795
+ for i in range(start_line - 1, min(end_line, len(lines))):
796
+ selected_content_lines.append(lines[i])
797
+
798
+ # 估算结构化输出的token数
799
+ content_tokens = self._estimate_structured_tokens(abs_path, full_content, start_line, end_line, total_lines, raw_mode)
800
+
801
+ max_token_limit = self._get_max_token_limit(agent)
802
+
803
+ # 检查单文件读取token数是否超过2/3限制
804
+ if content_tokens > max_token_limit:
805
+ read_lines = end_line - start_line + 1
806
+
807
+ # 计算安全读取的行数 (按比例缩减)
808
+ safe_lines = int((max_token_limit / content_tokens) * read_lines)
809
+ safe_lines = max(1, min(safe_lines, read_lines))
810
+ safe_end_line = start_line + safe_lines - 1
811
+
812
+ # 读取安全范围内的内容
813
+ selected_content_lines = []
814
+ for i in range(start_line - 1, min(safe_end_line, len(lines))):
815
+ selected_content_lines.append(lines[i])
816
+
817
+ # 构造部分读取结果
818
+ partial_content = '\n'.join(selected_content_lines)
819
+
820
+ return {
821
+ "success": True,
822
+ "stdout": (
823
+ f"⚠️ 警告: 仅读取前{safe_lines}行 (共{read_lines}行),因为内容超出限制\n"
824
+ f"📊 实际读取范围: {start_line}-{safe_end_line} (原请求范围: {start_line}-{end_line})\n\n"
825
+ f"{partial_content}\n\n"
826
+ f"💡 建议:\n"
827
+ f" 1. 如需继续读取,请使用:\n"
828
+ f" start_line={safe_end_line + 1}&end_line={end_line}\n"
829
+ f" 2. 需要读取全部内容? 请缩小行范围或分批读取"
830
+ ),
831
+ "stderr": (
832
+ f"原始请求范围 {start_line}-{end_line} 超过token限制 "
833
+ f"({content_tokens}/{max_token_limit} tokens)"
834
+ ),
835
+ }
836
+
837
+ # 生成整个文件的结构化信息(用于缓存)
838
+ # 提取整个文件的导入/包含语句
839
+ full_import_units = self._extract_imports(abs_path, full_content, 1, total_lines)
840
+
841
+ # 生成整个文件的结构化单元
842
+ full_structured_units = None
843
+
844
+ if raw_mode:
845
+ # 原始读取模式:按每20行分组(整个文件)
846
+ full_line_groups = self._extract_line_groups(full_content, 1, total_lines, group_size=20)
847
+ # 合并导入单元和行号分组
848
+ full_all_units = full_import_units + full_line_groups
849
+ # 确保id唯一
850
+ full_all_units = self._ensure_unique_ids(full_all_units)
851
+ # 按行号排序
852
+ full_all_units.sort(key=lambda u: u['start_line'])
853
+ full_structured_units = full_all_units
854
+ else:
855
+ # 尝试提取整个文件的语法单元(确保每个单元不超过50行)
856
+ full_syntax_units = self._extract_syntax_units_with_split(abs_path, full_content, 1, total_lines)
857
+
858
+ # 检测语言类型
859
+ if LANGUAGE_SUPPORT_AVAILABLE:
860
+ try:
861
+ detect_language(abs_path)
862
+ except Exception:
863
+ pass
864
+
865
+ if full_syntax_units:
866
+ # 合并导入单元和语法单元
867
+ full_all_units = full_import_units + full_syntax_units
868
+ # 确保id唯一
869
+ full_all_units = self._ensure_unique_ids(full_all_units)
870
+ # 按行号排序
871
+ full_all_units.sort(key=lambda u: u['start_line'])
872
+ full_structured_units = full_all_units
873
+ else:
874
+ # 使用空白行分组结构化输出(不支持语言时)
875
+ # 先按空行分割,然后对超过20行的块再按每20行分割(整个文件)
876
+ full_line_groups = self._extract_blank_line_groups_with_split(full_content, 1, total_lines)
877
+ # 合并导入单元和行号分组
878
+ full_all_units = full_import_units + full_line_groups
879
+ # 确保id唯一
880
+ full_all_units = self._ensure_unique_ids(full_all_units)
881
+ # 按行号排序
882
+ full_all_units.sort(key=lambda u: u['start_line'])
883
+ full_structured_units = full_all_units
884
+
885
+ # 保存整个文件的结构化信息到缓存
886
+ if full_structured_units is not None:
887
+ self._save_file_cache(agent, abs_path, full_structured_units, total_lines, file_mtime, full_content)
888
+
889
+ # 如果缓存有效,直接使用缓存中的blocks输出
890
+ if agent:
891
+ cache_info = self._get_file_cache(agent, abs_path)
892
+ if cache_info and self._is_cache_valid(cache_info, abs_path):
893
+ # 直接从缓存中获取对应范围的blocks
894
+ cached_blocks = self._get_blocks_from_cache(cache_info, start_line, end_line)
895
+ if cached_blocks:
896
+ # 转换为units格式(用于输出),保留真实的文件起始行号
897
+ structured_units = []
898
+ for block in cached_blocks:
899
+ structured_units.append({
900
+ "block_id": block["block_id"],
901
+ "content": block["content"],
902
+ "start_line": block.get("start_line", 1),
903
+ })
904
+ output = self._format_structured_output(abs_path, structured_units, total_lines, agent)
905
+ else:
906
+ output = ""
907
+ else:
908
+ # 缓存无效,重新提取units
909
+ # 提取请求范围的结构化单元(用于输出)
910
+ import_units = self._extract_imports(abs_path, full_content, start_line, end_line)
911
+
912
+ # 确定使用的结构化单元(语法单元或行号分组)
913
+ structured_units = None
914
+
915
+ if raw_mode:
916
+ # 原始读取模式:按每20行分组
917
+ line_groups = self._extract_line_groups(full_content, start_line, end_line, group_size=20)
918
+ # 合并导入单元和行号分组
919
+ all_units = import_units + line_groups
920
+ # 确保id唯一
921
+ all_units = self._ensure_unique_ids(all_units)
922
+ # 按行号排序,所有单元按在文件中的实际位置排序
923
+ all_units.sort(key=lambda u: u['start_line'])
924
+ structured_units = all_units
925
+ else:
926
+ # 尝试提取语法单元(结构化读取,full_content 已在上面读取,确保每个单元不超过50行)
927
+ syntax_units = self._extract_syntax_units_with_split(abs_path, full_content, start_line, end_line)
928
+
929
+ if syntax_units:
930
+ # 合并导入单元和语法单元
931
+ all_units = import_units + syntax_units
932
+ # 确保id唯一
933
+ all_units = self._ensure_unique_ids(all_units)
934
+ # 按行号排序,所有单元按在文件中的实际位置排序
935
+ all_units.sort(key=lambda u: u['start_line'])
936
+ structured_units = all_units
937
+ else:
938
+ # 使用空白行分组结构化输出(不支持语言时)
939
+ # 先按空行分割,然后对超过20行的块再按每20行分割
940
+ line_groups = self._extract_blank_line_groups_with_split(full_content, start_line, end_line)
941
+ # 合并导入单元和行号分组
942
+ all_units = import_units + line_groups
943
+ # 确保id唯一
944
+ all_units = self._ensure_unique_ids(all_units)
945
+ # 按行号排序,所有单元按在文件中的实际位置排序
946
+ all_units.sort(key=lambda u: u['start_line'])
947
+ structured_units = all_units
948
+
949
+ if structured_units:
950
+ output = self._format_structured_output(abs_path, structured_units, total_lines, agent)
951
+ else:
952
+ output = ""
953
+ else:
954
+ # 没有agent,无法使用缓存,重新提取units
955
+ import_units = self._extract_imports(abs_path, full_content, start_line, end_line)
956
+
957
+ if raw_mode:
958
+ line_groups = self._extract_line_groups(full_content, start_line, end_line, group_size=20)
959
+ all_units = import_units + line_groups
960
+ all_units = self._ensure_unique_ids(all_units)
961
+ all_units.sort(key=lambda u: u['start_line'])
962
+ structured_units = all_units
963
+ else:
964
+ syntax_units = self._extract_syntax_units_with_split(abs_path, full_content, start_line, end_line)
965
+ if syntax_units:
966
+ all_units = import_units + syntax_units
967
+ all_units = self._ensure_unique_ids(all_units)
968
+ all_units.sort(key=lambda u: u['start_line'])
969
+ structured_units = all_units
970
+ else:
971
+ line_groups = self._extract_blank_line_groups_with_split(full_content, start_line, end_line)
972
+ all_units = import_units + line_groups
973
+ all_units = self._ensure_unique_ids(all_units)
974
+ all_units.sort(key=lambda u: u['start_line'])
975
+ structured_units = all_units
976
+
977
+ if structured_units:
978
+ output = self._format_structured_output(abs_path, structured_units, total_lines, agent)
979
+ else:
980
+ output = ""
981
+
982
+ # 尝试获取并附加上下文信息
983
+ context_info = self._get_file_context(abs_path, start_line, end_line, agent)
984
+ if context_info:
985
+ output += context_info
117
986
 
118
987
  if agent:
119
988
  files = agent.get_user_data("files")
@@ -126,9 +995,270 @@ class ReadCodeTool:
126
995
  return {"success": True, "stdout": output, "stderr": ""}
127
996
 
128
997
  except Exception as e:
129
- PrettyOutput.print(str(e), OutputType.ERROR)
998
+ print(f"❌ {str(e)}")
130
999
  return {"success": False, "stdout": "", "stderr": f"文件读取失败: {str(e)}"}
131
1000
 
1001
+ def _handle_merged_ranges(
1002
+ self, filepath: str, requests: List[Dict], agent: Any = None
1003
+ ) -> Dict[str, Any]:
1004
+ """处理同一文件的多个范围请求,合并后去重
1005
+
1006
+ Args:
1007
+ filepath: 文件绝对路径
1008
+ requests: 范围请求列表,每个请求包含 start_line, end_line, raw_mode
1009
+ agent: Agent实例
1010
+
1011
+ Returns:
1012
+ Dict[str, Any]: 包含成功状态、输出内容和错误信息的字典
1013
+ """
1014
+ try:
1015
+ # 文件存在性检查
1016
+ if not os.path.exists(filepath):
1017
+ return {
1018
+ "success": False,
1019
+ "stdout": "",
1020
+ "stderr": f"文件不存在: {filepath}",
1021
+ }
1022
+
1023
+ # 读取文件内容
1024
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
1025
+ full_content = f.read()
1026
+
1027
+ total_lines = len(full_content.split('\n'))
1028
+ if total_lines == 0:
1029
+ return {
1030
+ "success": True,
1031
+ "stdout": f"\n🔍 文件: {filepath}\n📄 文件为空 (0行)\n",
1032
+ "stderr": "",
1033
+ }
1034
+
1035
+ # 先确保缓存存在(通过读取整个文件建立缓存)
1036
+ first_request = requests[0]
1037
+ self._handle_single_file(
1038
+ filepath, 1, -1, agent, first_request.get("raw_mode", False)
1039
+ )
1040
+
1041
+ # 获取缓存
1042
+ cache_info = self._get_file_cache(agent, filepath)
1043
+ if not cache_info or not self._is_cache_valid(cache_info, filepath):
1044
+ # 缓存无效,使用合并范围的方式去重
1045
+ # 合并所有范围,计算最小起始行和最大结束行
1046
+ min_start = float('inf')
1047
+ max_end = 0
1048
+ raw_mode = False
1049
+ for req in requests:
1050
+ start_line = req.get("start_line", 1)
1051
+ end_line = req.get("end_line", -1)
1052
+ raw_mode = raw_mode or req.get("raw_mode", False)
1053
+
1054
+ # 处理特殊值
1055
+ if end_line == -1:
1056
+ end_line = total_lines
1057
+ else:
1058
+ end_line = max(1, min(end_line, total_lines)) if end_line >= 0 else total_lines + end_line + 1
1059
+ start_line = max(1, min(start_line, total_lines)) if start_line >= 0 else total_lines + start_line + 1
1060
+
1061
+ min_start = min(min_start, start_line)
1062
+ max_end = max(max_end, end_line)
1063
+
1064
+ # 用合并后的范围读取一次,自然就去重了
1065
+ result = self._handle_single_file(
1066
+ filepath, int(min_start), int(max_end), agent, raw_mode
1067
+ )
1068
+ return result
1069
+
1070
+ # 收集所有范围覆盖的块ID(去重)
1071
+ seen_block_ids = set()
1072
+ merged_blocks = []
1073
+
1074
+ for req in requests:
1075
+ start_line = req.get("start_line", 1)
1076
+ end_line = req.get("end_line", -1)
1077
+
1078
+ # 处理特殊值
1079
+ if end_line == -1:
1080
+ end_line = total_lines
1081
+ else:
1082
+ end_line = max(1, min(end_line, total_lines)) if end_line >= 0 else total_lines + end_line + 1
1083
+ start_line = max(1, min(start_line, total_lines)) if start_line >= 0 else total_lines + start_line + 1
1084
+
1085
+ # 从缓存获取对应范围的块
1086
+ cached_blocks = self._get_blocks_from_cache(cache_info, start_line, end_line)
1087
+ for block in cached_blocks:
1088
+ block_id = block["block_id"]
1089
+ if block_id not in seen_block_ids:
1090
+ seen_block_ids.add(block_id)
1091
+ merged_blocks.append(block)
1092
+
1093
+ # 按block_id排序(block-1, block-2, ...)
1094
+ def extract_block_num(block):
1095
+ block_id = block.get("block_id", "block-0")
1096
+ try:
1097
+ return int(block_id.split("-")[1])
1098
+ except (IndexError, ValueError):
1099
+ return 0
1100
+
1101
+ merged_blocks.sort(key=extract_block_num)
1102
+
1103
+ # 转换为units格式并格式化输出(保留真实的文件起始行号)
1104
+ structured_units = []
1105
+ for block in merged_blocks:
1106
+ structured_units.append({
1107
+ "block_id": block["block_id"],
1108
+ "content": block["content"],
1109
+ "start_line": block.get("start_line", 1),
1110
+ })
1111
+
1112
+ output = self._format_structured_output(filepath, structured_units, total_lines, agent)
1113
+
1114
+ # 尝试获取上下文信息(使用合并后的范围)
1115
+ all_start_lines = [req.get("start_line", 1) for req in requests]
1116
+ all_end_lines = [req.get("end_line", total_lines) for req in requests]
1117
+ min_start = min(all_start_lines)
1118
+ max_end = max(all_end_lines)
1119
+ context_info = self._get_file_context(filepath, min_start, max_end, agent)
1120
+ if context_info:
1121
+ output += context_info
1122
+
1123
+ return {"success": True, "stdout": output, "stderr": ""}
1124
+
1125
+ except Exception as e:
1126
+ return {"success": False, "stdout": "", "stderr": f"合并范围读取失败: {str(e)}"}
1127
+
1128
+ def _get_file_context(
1129
+ self, filepath: str, start_line: int, end_line: int, agent: Any = None
1130
+ ) -> str:
1131
+ """获取文件的上下文信息
1132
+
1133
+ Args:
1134
+ filepath: 文件路径
1135
+ start_line: 起始行号
1136
+ end_line: 结束行号
1137
+ agent: Agent实例
1138
+
1139
+ Returns:
1140
+ 格式化的上下文信息字符串,如果无法获取则返回空字符串
1141
+ """
1142
+ try:
1143
+ # 尝试从Agent获取CodeAgent实例
1144
+ if not agent:
1145
+ return ""
1146
+
1147
+ # 通过agent获取CodeAgent实例
1148
+ # CodeAgent在初始化时会将自身关联到agent
1149
+ code_agent = getattr(agent, "_code_agent", None)
1150
+ if not code_agent:
1151
+ return ""
1152
+
1153
+ # 获取上下文管理器
1154
+ context_manager = getattr(code_agent, "context_manager", None)
1155
+ if not context_manager:
1156
+ return ""
1157
+
1158
+ # 输出上下文感知日志
1159
+ file_name = os.path.basename(filepath)
1160
+ if start_line == end_line:
1161
+ line_info = f"第{start_line}行"
1162
+ else:
1163
+ line_info = f"第{start_line}-{end_line}行"
1164
+ print(f"🧠 正在分析代码上下文 ({file_name}, {line_info})...")
1165
+
1166
+ # 确保文件已更新到上下文管理器
1167
+ # 如果文件内容已缓存,直接使用;否则读取并更新
1168
+ if not hasattr(context_manager, "_file_cache") or filepath not in context_manager._file_cache:
1169
+ try:
1170
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
1171
+ content = f.read()
1172
+ context_manager.update_context_for_file(filepath, content)
1173
+ except Exception:
1174
+ # 如果读取失败,尝试获取已有上下文
1175
+ pass
1176
+
1177
+ # 获取编辑上下文
1178
+ edit_context = context_manager.get_edit_context(filepath, start_line, end_line)
1179
+
1180
+ # 构建上下文信息
1181
+ if not edit_context.context_summary or edit_context.context_summary == "No context available":
1182
+ return ""
1183
+
1184
+ # 格式化上下文信息
1185
+ context_lines = ["\n📋 代码上下文信息:"]
1186
+ context_lines.append("─" * 60)
1187
+
1188
+ if edit_context.current_scope:
1189
+ scope_info = f"📍 当前作用域: {edit_context.current_scope.kind} `{edit_context.current_scope.name}`"
1190
+ if edit_context.current_scope.signature:
1191
+ scope_info += f"\n └─ 签名: {edit_context.current_scope.signature}"
1192
+ context_lines.append(scope_info)
1193
+
1194
+ if edit_context.used_symbols:
1195
+ # 对符号去重(基于 name + file_path + line_start)
1196
+ seen_symbols = set()
1197
+ unique_symbols = []
1198
+ for s in edit_context.used_symbols:
1199
+ key = (s.name, getattr(s, 'file_path', ''), getattr(s, 'line_start', 0))
1200
+ if key not in seen_symbols:
1201
+ seen_symbols.add(key)
1202
+ unique_symbols.append(s)
1203
+
1204
+ # 区分定义和调用,显示定义位置信息
1205
+ definitions = []
1206
+ calls = []
1207
+ for symbol in unique_symbols[:10]:
1208
+ is_def = getattr(symbol, 'is_definition', False)
1209
+ if is_def:
1210
+ definitions.append(symbol)
1211
+ else:
1212
+ calls.append(symbol)
1213
+
1214
+ # 显示定义
1215
+ if definitions:
1216
+ def_names = [f"`{s.name}`" for s in definitions]
1217
+ context_lines.append(f"📝 定义的符号: {', '.join(def_names)}")
1218
+
1219
+ # 显示调用(带定义位置信息)
1220
+ if calls:
1221
+ call_info = []
1222
+ for symbol in calls:
1223
+ def_loc = getattr(symbol, 'definition_location', None)
1224
+ if def_loc:
1225
+ def_file = os.path.basename(def_loc.file_path)
1226
+ def_line = def_loc.line_start
1227
+ call_info.append(f"`{symbol.name}` → {def_file}:{def_line}")
1228
+ else:
1229
+ call_info.append(f"`{symbol.name}`")
1230
+ context_lines.append(f"🔗 调用的符号: {', '.join(call_info)}")
1231
+
1232
+ # 如果还有更多符号
1233
+ more = len(edit_context.used_symbols) - 10
1234
+ if more > 0:
1235
+ context_lines.append(f" ... 还有{more}个符号")
1236
+
1237
+ # 不再感知导入符号
1238
+
1239
+ if edit_context.relevant_files:
1240
+ # 对相关文件去重
1241
+ unique_files = list(dict.fromkeys(edit_context.relevant_files))
1242
+ rel_files = unique_files[:10]
1243
+ files_str = "\n ".join(f"• {os.path.relpath(f, context_manager.project_root)}" for f in rel_files)
1244
+ more = len(unique_files) - 10
1245
+ if more > 0:
1246
+ files_str += f"\n ... 还有{more}个相关文件"
1247
+ context_lines.append(f"📁 相关文件 ({len(unique_files)}个):\n {files_str}")
1248
+
1249
+ context_lines.append("─" * 60)
1250
+ context_lines.append("") # 空行
1251
+
1252
+ # 打印上下文感知结果到控制台
1253
+ context_output = "\n".join(context_lines)
1254
+ print(f"🧠 上下文感知结果:\n{context_output}")
1255
+
1256
+ return context_output
1257
+
1258
+ except Exception:
1259
+ # 静默失败,不影响文件读取
1260
+ return ""
1261
+
132
1262
  def execute(self, args: Dict) -> Dict[str, Any]:
133
1263
  """执行代码读取操作
134
1264
 
@@ -146,33 +1276,604 @@ class ReadCodeTool:
146
1276
  "stdout": "",
147
1277
  "stderr": "参数中必须包含文件列表",
148
1278
  }
1279
+
1280
+ if len(args["files"]) == 0:
1281
+ return {
1282
+ "success": False,
1283
+ "stdout": "",
1284
+ "stderr": "文件列表不能为空",
1285
+ }
149
1286
 
150
1287
  all_outputs = []
151
1288
  overall_success = True
1289
+ status_lines = []
1290
+ total_tokens = 0 # 累计读取的token数
1291
+ max_token_limit = self._get_max_token_limit(agent)
152
1292
 
1293
+ # 第一遍:检查所有文件的累计token数是否超过限制
1294
+ file_read_info = [] # 存储每个文件要读取的信息
153
1295
  for file_info in args["files"]:
154
1296
  if not isinstance(file_info, dict) or "path" not in file_info:
155
1297
  continue
1298
+
1299
+ filepath = file_info["path"].strip()
1300
+ start_line = file_info.get("start_line", 1)
1301
+ end_line = file_info.get("end_line", -1)
1302
+
1303
+ # 检查文件是否存在并计算要读取的token数
1304
+ abs_path = os.path.abspath(filepath)
1305
+ if not os.path.exists(abs_path):
1306
+ continue
1307
+
1308
+ try:
1309
+ # 统计总行数
1310
+ with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
1311
+ total_lines = sum(1 for _ in f)
1312
+
1313
+ if total_lines == 0:
1314
+ continue
1315
+
1316
+ # 计算实际要读取的行范围
1317
+ if end_line == -1:
1318
+ actual_end_line = total_lines
1319
+ else:
1320
+ actual_end_line = (
1321
+ max(1, min(end_line, total_lines))
1322
+ if end_line >= 0
1323
+ else total_lines + end_line + 1
1324
+ )
1325
+
1326
+ actual_start_line = (
1327
+ max(1, min(start_line, total_lines))
1328
+ if start_line >= 0
1329
+ else total_lines + start_line + 1
1330
+ )
1331
+
1332
+ if actual_start_line <= actual_end_line:
1333
+ # 读取完整文件内容用于token估算
1334
+ with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
1335
+ file_content = f.read()
1336
+
1337
+ # 估算结构化输出的token数
1338
+ raw_mode = file_info.get("raw_mode", False)
1339
+ content_tokens = self._estimate_structured_tokens(
1340
+ abs_path, file_content, actual_start_line, actual_end_line, total_lines, raw_mode
1341
+ )
1342
+
1343
+ file_read_info.append({
1344
+ "filepath": filepath,
1345
+ "start_line": actual_start_line,
1346
+ "end_line": actual_end_line,
1347
+ "read_lines": actual_end_line - actual_start_line + 1,
1348
+ "tokens": content_tokens,
1349
+ "file_info": file_info,
1350
+ })
1351
+ total_tokens += content_tokens
1352
+ except Exception:
1353
+ continue
156
1354
 
157
- result = self._handle_single_file(
158
- file_info["path"].strip(),
159
- file_info.get("start_line", 1),
160
- file_info.get("end_line", -1),
161
- agent,
1355
+ # 检查累计token数是否超过限制
1356
+ if total_tokens > max_token_limit:
1357
+ file_list = "\n ".join(
1358
+ f" {info['filepath']}: {info['tokens']} tokens ({info['read_lines']} 行, 范围: {info['start_line']}-{info['end_line']})"
1359
+ for info in file_read_info[:10]
162
1360
  )
1361
+ more_files = len(file_read_info) - 10
1362
+ if more_files > 0:
1363
+ file_list += f"\n ... 还有 {more_files} 个文件"
1364
+
1365
+ return {
1366
+ "success": False,
1367
+ "stdout": "",
1368
+ "stderr": (
1369
+ f"⚠️ 累计读取范围过大: 请求累计读取内容约 {total_tokens} tokens,超过限制 ({max_token_limit} tokens,约2/3最大窗口)\n"
1370
+ f"📋 文件列表 ({len(file_read_info)} 个文件):\n {file_list}\n"
1371
+ f"💡 建议:\n"
1372
+ f" 1. 分批读取:将文件分成多个批次,每批累计内容不超过 {max_token_limit} tokens\n"
1373
+ f" 2. 先定位:使用搜索或分析工具定位关键代码位置,再读取具体范围\n"
1374
+ f" 3. 缩小范围:为每个文件指定更精确的行号范围"
1375
+ ),
1376
+ }
163
1377
 
164
- if result["success"]:
165
- all_outputs.append(result["stdout"])
1378
+ # 第二遍:实际读取文件(按文件分组,合并同一文件的多个范围请求,避免块重复)
1379
+ # 按文件路径分组
1380
+ from collections import defaultdict
1381
+ file_requests = defaultdict(list)
1382
+ for file_info in args["files"]:
1383
+ if not isinstance(file_info, dict) or "path" not in file_info:
1384
+ continue
1385
+ abs_path = os.path.abspath(file_info["path"].strip())
1386
+ file_requests[abs_path].append(file_info)
1387
+
1388
+ # 按文件处理,合并同一文件的多个范围请求
1389
+ for abs_path, requests in file_requests.items():
1390
+ if len(requests) == 1:
1391
+ # 单个范围请求,直接处理
1392
+ file_info = requests[0]
1393
+ result = self._handle_single_file(
1394
+ file_info["path"].strip(),
1395
+ file_info.get("start_line", 1),
1396
+ file_info.get("end_line", -1),
1397
+ agent,
1398
+ file_info.get("raw_mode", False),
1399
+ )
1400
+ if result["success"]:
1401
+ all_outputs.append(result["stdout"])
1402
+ status_lines.append(f"✅ {file_info['path']} 文件读取成功")
1403
+ else:
1404
+ all_outputs.append(f"❌ {file_info['path']}: {result['stderr']}")
1405
+ status_lines.append(f"❌ {file_info['path']} 文件读取失败")
1406
+ overall_success = False
166
1407
  else:
167
- all_outputs.append(f"❌ {file_info['path']}: {result['stderr']}")
168
- overall_success = False
1408
+ # 多个范围请求,合并处理并去重
1409
+ merged_result = self._handle_merged_ranges(
1410
+ abs_path, requests, agent
1411
+ )
1412
+ display_path = requests[0]["path"]
1413
+ if merged_result["success"]:
1414
+ all_outputs.append(merged_result["stdout"])
1415
+ status_lines.append(f"✅ {display_path} 文件读取成功 (合并{len(requests)}个范围请求,已去重)")
1416
+ else:
1417
+ all_outputs.append(f"❌ {display_path}: {merged_result['stderr']}")
1418
+ status_lines.append(f"❌ {display_path} 文件读取失败")
1419
+ overall_success = False
169
1420
 
1421
+ stdout_text = "\n".join(all_outputs)
1422
+ # 仅打印每个文件的读取状态,不打印具体内容
1423
+ try:
1424
+ if status_lines:
1425
+ print("\n".join(status_lines), end="\n")
1426
+ except Exception:
1427
+ pass
170
1428
  return {
171
1429
  "success": overall_success,
172
- "stdout": "\n".join(all_outputs),
1430
+ "stdout": stdout_text,
173
1431
  "stderr": "",
174
1432
  }
175
1433
 
176
1434
  except Exception as e:
177
- PrettyOutput.print(str(e), OutputType.ERROR)
1435
+ print(f"❌ {str(e)}")
178
1436
  return {"success": False, "stdout": "", "stderr": f"代码读取失败: {str(e)}"}
1437
+
1438
+
1439
+ def main():
1440
+ """测试结构化读取功能"""
1441
+ import tempfile
1442
+ import os
1443
+
1444
+ tool = ReadCodeTool()
1445
+
1446
+ print("=" * 80)
1447
+ print("测试结构化读取功能")
1448
+ print("=" * 80)
1449
+
1450
+ # 测试1: C语言文件(tree-sitter支持)
1451
+ print("\n【测试1】C语言文件 - 语法单元提取")
1452
+ print("-" * 80)
1453
+
1454
+ c_code = """#include <stdio.h>
1455
+
1456
+ void main() {
1457
+ printf("Hello, World!\\n");
1458
+ }
1459
+
1460
+ int add(int a, int b) {
1461
+ return a + b;
1462
+ }
1463
+
1464
+ int sub(int a, int b) {
1465
+ return a - b;
1466
+ }
1467
+
1468
+ struct Point {
1469
+ int x;
1470
+ int y;
1471
+ };
1472
+ """
1473
+
1474
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False) as f:
1475
+ c_file = f.name
1476
+ f.write(c_code)
1477
+
1478
+ try:
1479
+ result = tool.execute({
1480
+ "files": [{"path": c_file, "start_line": 1, "end_line": -1}],
1481
+ "agent": None
1482
+ })
1483
+
1484
+ if result["success"]:
1485
+ print("✅ C语言文件读取成功")
1486
+ print("\n输出内容:")
1487
+ print(result["stdout"])
1488
+ else:
1489
+ print(f"❌ C语言文件读取失败: {result['stderr']}")
1490
+ finally:
1491
+ os.unlink(c_file)
1492
+
1493
+ # 测试2: Python文件(AST支持)
1494
+ print("\n【测试2】Python文件 - 语法单元提取")
1495
+ print("-" * 80)
1496
+
1497
+ python_code = """def main():
1498
+ print("Hello, World!")
1499
+
1500
+ def add(a, b):
1501
+ return a + b
1502
+
1503
+ def sub(a, b):
1504
+ return a - b
1505
+
1506
+ class Point:
1507
+ def __init__(self, x, y):
1508
+ self.x = x
1509
+ self.y = y
1510
+ """
1511
+
1512
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
1513
+ py_file = f.name
1514
+ f.write(python_code)
1515
+
1516
+ try:
1517
+ result = tool.execute({
1518
+ "files": [{"path": py_file, "start_line": 1, "end_line": -1}],
1519
+ "agent": None
1520
+ })
1521
+
1522
+ if result["success"]:
1523
+ print("✅ Python文件读取成功")
1524
+ print("\n输出内容:")
1525
+ print(result["stdout"])
1526
+ else:
1527
+ print(f"❌ Python文件读取失败: {result['stderr']}")
1528
+ finally:
1529
+ os.unlink(py_file)
1530
+
1531
+ # 测试3: 不支持的语言 - 行号分组
1532
+ print("\n【测试3】不支持的语言 - 行号分组(20行一组)")
1533
+ print("-" * 80)
1534
+
1535
+ text_content = "\n".join([f"这是第 {i} 行内容" for i in range(1, 51)])
1536
+
1537
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
1538
+ txt_file = f.name
1539
+ f.write(text_content)
1540
+
1541
+ try:
1542
+ result = tool.execute({
1543
+ "files": [{"path": txt_file, "start_line": 1, "end_line": -1}],
1544
+ "agent": None
1545
+ })
1546
+
1547
+ if result["success"]:
1548
+ print("✅ 文本文件读取成功(使用行号分组)")
1549
+ print("\n输出内容(前500字符):")
1550
+ print(result["stdout"][:500] + "..." if len(result["stdout"]) > 500 else result["stdout"])
1551
+ else:
1552
+ print(f"❌ 文本文件读取失败: {result['stderr']}")
1553
+ finally:
1554
+ os.unlink(txt_file)
1555
+
1556
+ # 测试4: 指定行号范围
1557
+ print("\n【测试4】指定行号范围读取")
1558
+ print("-" * 80)
1559
+
1560
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False) as f:
1561
+ c_file2 = f.name
1562
+ f.write(c_code)
1563
+
1564
+ try:
1565
+ result = tool.execute({
1566
+ "files": [{"path": c_file2, "start_line": 1, "end_line": 10}],
1567
+ "agent": None
1568
+ })
1569
+
1570
+ if result["success"]:
1571
+ print("✅ 指定范围读取成功")
1572
+ print("\n输出内容:")
1573
+ print(result["stdout"])
1574
+ else:
1575
+ print(f"❌ 指定范围读取失败: {result['stderr']}")
1576
+ finally:
1577
+ os.unlink(c_file2)
1578
+
1579
+ # 测试5: 边界情况 - 返回边界上的语法单元
1580
+ print("\n【测试5】边界情况 - 返回边界上的语法单元")
1581
+ print("-" * 80)
1582
+
1583
+ boundary_test_code = """def func1():
1584
+ line1 = 1
1585
+ line2 = 2
1586
+ line3 = 3
1587
+
1588
+ def func2():
1589
+ line1 = 1
1590
+ line2 = 2
1591
+
1592
+ def func3():
1593
+ line1 = 1
1594
+ line2 = 2
1595
+ line3 = 3
1596
+ line4 = 4
1597
+ """
1598
+
1599
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
1600
+ boundary_file = f.name
1601
+ f.write(boundary_test_code)
1602
+
1603
+ try:
1604
+ # 请求第3-8行
1605
+ # func1: 1-4行(结束行4在范围内,应该返回完整func1)
1606
+ # func2: 6-8行(开始行6在范围内,应该返回完整func2)
1607
+ # func3: 10-14行(完全不在范围内,不应该返回)
1608
+ result = tool.execute({
1609
+ "files": [{"path": boundary_file, "start_line": 3, "end_line": 8}],
1610
+ "agent": None
1611
+ })
1612
+
1613
+ if result["success"]:
1614
+ print("✅ 边界情况测试成功")
1615
+ print("请求范围: 3-8行")
1616
+ print("预期结果:")
1617
+ print(" - func1 (1-4行): 结束行4在范围内,应返回完整func1")
1618
+ print(" - func2 (6-8行): 开始行6在范围内,应返回完整func2")
1619
+ print(" - func3 (10-14行): 完全不在范围内,不应返回")
1620
+ print("\n实际输出:")
1621
+ print(result["stdout"])
1622
+ else:
1623
+ print(f"❌ 边界情况测试失败: {result['stderr']}")
1624
+ finally:
1625
+ os.unlink(boundary_file)
1626
+
1627
+ # 测试6: 多个文件
1628
+ print("\n【测试6】多个文件读取")
1629
+ print("-" * 80)
1630
+
1631
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False) as f1, \
1632
+ tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f2:
1633
+ c_file3 = f1.name
1634
+ py_file2 = f2.name
1635
+ f1.write(c_code)
1636
+ f2.write(python_code)
1637
+
1638
+ try:
1639
+ result = tool.execute({
1640
+ "files": [
1641
+ {"path": c_file3, "start_line": 1, "end_line": -1},
1642
+ {"path": py_file2, "start_line": 1, "end_line": -1}
1643
+ ],
1644
+ "agent": None
1645
+ })
1646
+
1647
+ if result["success"]:
1648
+ print("✅ 多文件读取成功")
1649
+ print("\n输出内容(前800字符):")
1650
+ print(result["stdout"][:800] + "..." if len(result["stdout"]) > 800 else result["stdout"])
1651
+ else:
1652
+ print(f"❌ 多文件读取失败: {result['stderr']}")
1653
+ finally:
1654
+ os.unlink(c_file3)
1655
+ os.unlink(py_file2)
1656
+
1657
+ # 测试7: 嵌套作用域的边界情况
1658
+ print("\n【测试7】嵌套作用域的边界情况")
1659
+ print("-" * 80)
1660
+
1661
+ nested_code = """class Outer:
1662
+ def method1(self):
1663
+ line1 = 1
1664
+ line2 = 2
1665
+
1666
+ def method2(self):
1667
+ line1 = 1
1668
+ line2 = 2
1669
+ line3 = 3
1670
+
1671
+ def standalone_func():
1672
+ line1 = 1
1673
+ line2 = 2
1674
+ """
1675
+
1676
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
1677
+ nested_file = f.name
1678
+ f.write(nested_code)
1679
+
1680
+ try:
1681
+ # 请求第4-7行
1682
+ # Outer.method1: 2-4行(结束行4在范围内,应该返回完整method1)
1683
+ # Outer.method2: 6-9行(开始行6在范围内,应该返回完整method2)
1684
+ # Outer类: 1-9行(包含method1和method2,应该返回)
1685
+ # standalone_func: 11-13行(完全不在范围内,不应返回)
1686
+ result = tool.execute({
1687
+ "files": [{"path": nested_file, "start_line": 4, "end_line": 7}],
1688
+ "agent": None
1689
+ })
1690
+
1691
+ if result["success"]:
1692
+ print("✅ 嵌套作用域边界测试成功")
1693
+ print("请求范围: 4-7行")
1694
+ print("预期结果:")
1695
+ print(" - Outer类 (1-9行): 包含method1和method2,应返回")
1696
+ print(" - Outer.method1 (2-4行): 结束行4在范围内,应返回完整method1")
1697
+ print(" - Outer.method2 (6-9行): 开始行6在范围内,应返回完整method2")
1698
+ print("\n实际输出:")
1699
+ print(result["stdout"])
1700
+ else:
1701
+ print(f"❌ 嵌套作用域边界测试失败: {result['stderr']}")
1702
+ finally:
1703
+ os.unlink(nested_file)
1704
+
1705
+ # 测试8: Java文件(tree-sitter支持)
1706
+ print("\n【测试8】Java文件 - 语法单元提取")
1707
+ print("-" * 80)
1708
+
1709
+ java_code = """public class Main {
1710
+ public static void main(String[] args) {
1711
+ System.out.println("Hello, World!");
1712
+ }
1713
+
1714
+ public int add(int a, int b) {
1715
+ return a + b;
1716
+ }
1717
+
1718
+ private int subtract(int a, int b) {
1719
+ return a - b;
1720
+ }
1721
+ }
1722
+
1723
+ class Point {
1724
+ private int x;
1725
+ private int y;
1726
+
1727
+ public Point(int x, int y) {
1728
+ this.x = x;
1729
+ this.y = y;
1730
+ }
1731
+ }
1732
+ """
1733
+
1734
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.java', delete=False) as f:
1735
+ java_file = f.name
1736
+ f.write(java_code)
1737
+
1738
+ try:
1739
+ result = tool.execute({
1740
+ "files": [{"path": java_file, "start_line": 1, "end_line": -1}],
1741
+ "agent": None
1742
+ })
1743
+
1744
+ if result["success"]:
1745
+ print("✅ Java文件读取成功")
1746
+ print("\n输出内容:")
1747
+ print(result["stdout"])
1748
+ else:
1749
+ print(f"❌ Java文件读取失败: {result['stderr']}")
1750
+ finally:
1751
+ os.unlink(java_file)
1752
+
1753
+ # 测试9: Rust文件(tree-sitter支持)
1754
+ print("\n【测试9】Rust文件 - 语法单元提取")
1755
+ print("-" * 80)
1756
+
1757
+ rust_code = """fn main() {
1758
+ println!("Hello, World!");
1759
+ }
1760
+
1761
+ fn add(a: i32, b: i32) -> i32 {
1762
+ a + b
1763
+ }
1764
+
1765
+ fn subtract(a: i32, b: i32) -> i32 {
1766
+ a - b
1767
+ }
1768
+
1769
+ struct Point {
1770
+ x: i32,
1771
+ y: i32,
1772
+ }
1773
+
1774
+ impl Point {
1775
+ fn new(x: i32, y: i32) -> Point {
1776
+ Point { x, y }
1777
+ }
1778
+ }
1779
+
1780
+ enum Color {
1781
+ Red,
1782
+ Green,
1783
+ Blue,
1784
+ }
1785
+ """
1786
+
1787
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.rs', delete=False) as f:
1788
+ rust_file = f.name
1789
+ f.write(rust_code)
1790
+
1791
+ try:
1792
+ result = tool.execute({
1793
+ "files": [{"path": rust_file, "start_line": 1, "end_line": -1}],
1794
+ "agent": None
1795
+ })
1796
+
1797
+ if result["success"]:
1798
+ print("✅ Rust文件读取成功")
1799
+ print("\n输出内容:")
1800
+ print(result["stdout"])
1801
+ else:
1802
+ print(f"❌ Rust文件读取失败: {result['stderr']}")
1803
+ finally:
1804
+ os.unlink(rust_file)
1805
+
1806
+ # 测试10: Go文件(tree-sitter支持)
1807
+ print("\n【测试10】Go文件 - 语法单元提取")
1808
+ print("-" * 80)
1809
+
1810
+ go_code = """package main
1811
+
1812
+ import "fmt"
1813
+
1814
+ func main() {
1815
+ fmt.Println("Hello, World!")
1816
+ }
1817
+
1818
+ func add(a int, b int) int {
1819
+ return a + b
1820
+ }
1821
+
1822
+ func subtract(a int, b int) int {
1823
+ return a - b
1824
+ }
1825
+
1826
+ type Point struct {
1827
+ x int
1828
+ y int
1829
+ }
1830
+
1831
+ func (p *Point) New(x int, y int) {
1832
+ p.x = x
1833
+ p.y = y
1834
+ }
1835
+
1836
+ type Color int
1837
+
1838
+ const (
1839
+ Red Color = iota
1840
+ Green
1841
+ Blue
1842
+ )
1843
+
1844
+ type Shape interface {
1845
+ Area() float64
1846
+ Perimeter() float64
1847
+ }
1848
+
1849
+ type Drawable interface {
1850
+ Draw()
1851
+ }
1852
+ """
1853
+
1854
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.go', delete=False) as f:
1855
+ go_file = f.name
1856
+ f.write(go_code)
1857
+
1858
+ try:
1859
+ result = tool.execute({
1860
+ "files": [{"path": go_file, "start_line": 1, "end_line": -1}],
1861
+ "agent": None
1862
+ })
1863
+
1864
+ if result["success"]:
1865
+ print("✅ Go文件读取成功")
1866
+ print("\n输出内容:")
1867
+ print(result["stdout"])
1868
+ else:
1869
+ print(f"❌ Go文件读取失败: {result['stderr']}")
1870
+ finally:
1871
+ os.unlink(go_file)
1872
+
1873
+ print("\n" + "=" * 80)
1874
+ print("测试完成")
1875
+ print("=" * 80)
1876
+
1877
+
1878
+ if __name__ == "__main__":
1879
+ main()