jarvis-ai-assistant 0.7.16__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +567 -222
- jarvis/jarvis_agent/agent_manager.py +19 -12
- jarvis/jarvis_agent/builtin_input_handler.py +79 -11
- jarvis/jarvis_agent/config_editor.py +7 -2
- jarvis/jarvis_agent/event_bus.py +24 -13
- jarvis/jarvis_agent/events.py +19 -1
- jarvis/jarvis_agent/file_context_handler.py +67 -64
- jarvis/jarvis_agent/file_methodology_manager.py +38 -24
- jarvis/jarvis_agent/jarvis.py +186 -114
- jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
- jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
- jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
- jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
- jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
- jarvis/jarvis_agent/language_support_info.py +250 -219
- jarvis/jarvis_agent/main.py +19 -23
- jarvis/jarvis_agent/memory_manager.py +9 -6
- jarvis/jarvis_agent/methodology_share_manager.py +21 -15
- jarvis/jarvis_agent/output_handler.py +4 -2
- jarvis/jarvis_agent/prompt_builder.py +7 -6
- jarvis/jarvis_agent/prompt_manager.py +113 -8
- jarvis/jarvis_agent/prompts.py +317 -85
- jarvis/jarvis_agent/protocols.py +5 -2
- jarvis/jarvis_agent/run_loop.py +192 -32
- jarvis/jarvis_agent/session_manager.py +7 -3
- jarvis/jarvis_agent/share_manager.py +23 -13
- jarvis/jarvis_agent/shell_input_handler.py +12 -8
- jarvis/jarvis_agent/stdio_redirect.py +25 -26
- jarvis/jarvis_agent/task_analyzer.py +29 -23
- jarvis/jarvis_agent/task_list.py +869 -0
- jarvis/jarvis_agent/task_manager.py +26 -23
- jarvis/jarvis_agent/tool_executor.py +6 -5
- jarvis/jarvis_agent/tool_share_manager.py +24 -14
- jarvis/jarvis_agent/user_interaction.py +3 -3
- jarvis/jarvis_agent/utils.py +9 -1
- jarvis/jarvis_agent/web_bridge.py +37 -17
- jarvis/jarvis_agent/web_output_sink.py +5 -2
- jarvis/jarvis_agent/web_server.py +165 -36
- jarvis/jarvis_c2rust/__init__.py +1 -1
- jarvis/jarvis_c2rust/cli.py +260 -141
- jarvis/jarvis_c2rust/collector.py +37 -18
- jarvis/jarvis_c2rust/constants.py +60 -0
- jarvis/jarvis_c2rust/library_replacer.py +242 -1010
- jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
- jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
- jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
- jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
- jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
- jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
- jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
- jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
- jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
- jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
- jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
- jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
- jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
- jarvis/jarvis_c2rust/loaders.py +28 -10
- jarvis/jarvis_c2rust/models.py +5 -2
- jarvis/jarvis_c2rust/optimizer.py +192 -1974
- jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
- jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
- jarvis/jarvis_c2rust/optimizer_config.py +49 -0
- jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
- jarvis/jarvis_c2rust/optimizer_options.py +48 -0
- jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
- jarvis/jarvis_c2rust/optimizer_report.py +52 -0
- jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
- jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
- jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
- jarvis/jarvis_c2rust/scanner.py +229 -166
- jarvis/jarvis_c2rust/transpiler.py +531 -2732
- jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
- jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
- jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
- jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
- jarvis/jarvis_c2rust/transpiler_config.py +178 -0
- jarvis/jarvis_c2rust/transpiler_context.py +122 -0
- jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
- jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
- jarvis/jarvis_c2rust/transpiler_git.py +163 -0
- jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
- jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
- jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
- jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
- jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
- jarvis/jarvis_c2rust/utils.py +269 -79
- jarvis/jarvis_code_agent/after_change.py +233 -0
- jarvis/jarvis_code_agent/build_validation_config.py +37 -30
- jarvis/jarvis_code_agent/builtin_rules.py +68 -0
- jarvis/jarvis_code_agent/code_agent.py +976 -1517
- jarvis/jarvis_code_agent/code_agent_build.py +227 -0
- jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
- jarvis/jarvis_code_agent/code_agent_git.py +525 -0
- jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
- jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
- jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
- jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
- jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
- jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
- jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
- jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
- jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
- jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
- jarvis/jarvis_code_agent/lint.py +223 -524
- jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
- jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
- jarvis/jarvis_code_agent/rules/code_review.md +115 -0
- jarvis/jarvis_code_agent/rules/documentation.md +165 -0
- jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
- jarvis/jarvis_code_agent/rules/performance.md +158 -0
- jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
- jarvis/jarvis_code_agent/rules/security.md +160 -0
- jarvis/jarvis_code_agent/rules/tdd.md +78 -0
- jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
- jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
- jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
- jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
- jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
- jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
- jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
- jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
- jarvis/jarvis_code_agent/utils.py +36 -26
- jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
- jarvis/jarvis_code_analysis/code_review.py +64 -33
- jarvis/jarvis_data/config_schema.json +285 -192
- jarvis/jarvis_git_squash/main.py +8 -6
- jarvis/jarvis_git_utils/git_commiter.py +53 -76
- jarvis/jarvis_mcp/__init__.py +5 -2
- jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
- jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
- jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
- jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
- jarvis/jarvis_methodology/main.py +48 -39
- jarvis/jarvis_multi_agent/__init__.py +56 -23
- jarvis/jarvis_multi_agent/main.py +15 -18
- jarvis/jarvis_platform/base.py +179 -111
- jarvis/jarvis_platform/human.py +27 -16
- jarvis/jarvis_platform/kimi.py +52 -45
- jarvis/jarvis_platform/openai.py +101 -40
- jarvis/jarvis_platform/registry.py +51 -33
- jarvis/jarvis_platform/tongyi.py +68 -38
- jarvis/jarvis_platform/yuanbao.py +59 -43
- jarvis/jarvis_platform_manager/main.py +68 -76
- jarvis/jarvis_platform_manager/service.py +24 -14
- jarvis/jarvis_rag/README_CONFIG.md +314 -0
- jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
- jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
- jarvis/jarvis_rag/__init__.py +57 -4
- jarvis/jarvis_rag/cache.py +3 -1
- jarvis/jarvis_rag/cli.py +48 -68
- jarvis/jarvis_rag/embedding_interface.py +39 -0
- jarvis/jarvis_rag/embedding_manager.py +7 -230
- jarvis/jarvis_rag/embeddings/__init__.py +41 -0
- jarvis/jarvis_rag/embeddings/base.py +114 -0
- jarvis/jarvis_rag/embeddings/cohere.py +66 -0
- jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
- jarvis/jarvis_rag/embeddings/local.py +260 -0
- jarvis/jarvis_rag/embeddings/openai.py +62 -0
- jarvis/jarvis_rag/embeddings/registry.py +293 -0
- jarvis/jarvis_rag/llm_interface.py +8 -6
- jarvis/jarvis_rag/query_rewriter.py +8 -9
- jarvis/jarvis_rag/rag_pipeline.py +61 -52
- jarvis/jarvis_rag/reranker.py +7 -75
- jarvis/jarvis_rag/reranker_interface.py +32 -0
- jarvis/jarvis_rag/rerankers/__init__.py +41 -0
- jarvis/jarvis_rag/rerankers/base.py +109 -0
- jarvis/jarvis_rag/rerankers/cohere.py +67 -0
- jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
- jarvis/jarvis_rag/rerankers/jina.py +79 -0
- jarvis/jarvis_rag/rerankers/local.py +89 -0
- jarvis/jarvis_rag/rerankers/registry.py +293 -0
- jarvis/jarvis_rag/retriever.py +58 -43
- jarvis/jarvis_sec/__init__.py +66 -141
- jarvis/jarvis_sec/agents.py +21 -17
- jarvis/jarvis_sec/analysis.py +80 -33
- jarvis/jarvis_sec/checkers/__init__.py +7 -13
- jarvis/jarvis_sec/checkers/c_checker.py +356 -164
- jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
- jarvis/jarvis_sec/cli.py +43 -21
- jarvis/jarvis_sec/clustering.py +430 -272
- jarvis/jarvis_sec/file_manager.py +99 -55
- jarvis/jarvis_sec/parsers.py +9 -6
- jarvis/jarvis_sec/prompts.py +4 -3
- jarvis/jarvis_sec/report.py +44 -22
- jarvis/jarvis_sec/review.py +180 -107
- jarvis/jarvis_sec/status.py +50 -41
- jarvis/jarvis_sec/types.py +3 -0
- jarvis/jarvis_sec/utils.py +160 -83
- jarvis/jarvis_sec/verification.py +411 -181
- jarvis/jarvis_sec/workflow.py +132 -21
- jarvis/jarvis_smart_shell/main.py +28 -41
- jarvis/jarvis_stats/cli.py +14 -12
- jarvis/jarvis_stats/stats.py +28 -19
- jarvis/jarvis_stats/storage.py +14 -8
- jarvis/jarvis_stats/visualizer.py +12 -7
- jarvis/jarvis_tools/base.py +5 -2
- jarvis/jarvis_tools/clear_memory.py +13 -9
- jarvis/jarvis_tools/cli/main.py +23 -18
- jarvis/jarvis_tools/edit_file.py +572 -873
- jarvis/jarvis_tools/execute_script.py +10 -7
- jarvis/jarvis_tools/file_analyzer.py +7 -8
- jarvis/jarvis_tools/meta_agent.py +287 -0
- jarvis/jarvis_tools/methodology.py +5 -3
- jarvis/jarvis_tools/read_code.py +305 -1438
- jarvis/jarvis_tools/read_symbols.py +50 -17
- jarvis/jarvis_tools/read_webpage.py +19 -18
- jarvis/jarvis_tools/registry.py +435 -156
- jarvis/jarvis_tools/retrieve_memory.py +16 -11
- jarvis/jarvis_tools/save_memory.py +8 -6
- jarvis/jarvis_tools/search_web.py +31 -31
- jarvis/jarvis_tools/sub_agent.py +32 -28
- jarvis/jarvis_tools/sub_code_agent.py +44 -60
- jarvis/jarvis_tools/task_list_manager.py +1811 -0
- jarvis/jarvis_tools/virtual_tty.py +29 -19
- jarvis/jarvis_utils/__init__.py +4 -0
- jarvis/jarvis_utils/builtin_replace_map.py +2 -1
- jarvis/jarvis_utils/clipboard.py +9 -8
- jarvis/jarvis_utils/collections.py +331 -0
- jarvis/jarvis_utils/config.py +699 -194
- jarvis/jarvis_utils/dialogue_recorder.py +294 -0
- jarvis/jarvis_utils/embedding.py +6 -3
- jarvis/jarvis_utils/file_processors.py +7 -1
- jarvis/jarvis_utils/fzf.py +9 -3
- jarvis/jarvis_utils/git_utils.py +71 -42
- jarvis/jarvis_utils/globals.py +116 -32
- jarvis/jarvis_utils/http.py +6 -2
- jarvis/jarvis_utils/input.py +318 -83
- jarvis/jarvis_utils/jsonnet_compat.py +119 -104
- jarvis/jarvis_utils/methodology.py +37 -28
- jarvis/jarvis_utils/output.py +201 -44
- jarvis/jarvis_utils/utils.py +986 -628
- {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
- jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
- jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
- jarvis/jarvis_tools/generate_new_tool.py +0 -205
- jarvis/jarvis_tools/lsp_client.py +0 -1552
- jarvis/jarvis_tools/rewrite_file.py +0 -105
- jarvis_ai_assistant-0.7.16.dist-info/RECORD +0 -218
- {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0
jarvis/jarvis_tools/read_code.py
CHANGED
|
@@ -1,34 +1,16 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import os
|
|
3
|
-
import time
|
|
4
3
|
from typing import Any, Dict, List
|
|
5
4
|
|
|
6
5
|
from jarvis.jarvis_utils.config import get_max_input_token_count
|
|
7
6
|
from jarvis.jarvis_utils.embedding import get_context_token_count
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
try:
|
|
11
|
-
from jarvis.jarvis_code_agent.code_analyzer.language_support import (
|
|
12
|
-
detect_language,
|
|
13
|
-
get_dependency_analyzer,
|
|
14
|
-
)
|
|
15
|
-
from jarvis.jarvis_code_agent.code_analyzer.structured_code import StructuredCodeExtractor
|
|
16
|
-
LANGUAGE_SUPPORT_AVAILABLE = True
|
|
17
|
-
except ImportError:
|
|
18
|
-
LANGUAGE_SUPPORT_AVAILABLE = False
|
|
19
|
-
def get_dependency_analyzer(language: str):
|
|
20
|
-
return None
|
|
21
|
-
StructuredCodeExtractor = None
|
|
7
|
+
from jarvis.jarvis_utils.globals import get_global_model_group
|
|
8
|
+
from jarvis.jarvis_utils.output import PrettyOutput
|
|
22
9
|
|
|
23
10
|
|
|
24
11
|
class ReadCodeTool:
|
|
25
12
|
name = "read_code"
|
|
26
|
-
description =
|
|
27
|
-
"结构化读取源代码文件。"
|
|
28
|
-
"支持的语言按语法单元(函数、类等)读取;不支持的语言按空白行分组;"
|
|
29
|
-
"raw_mode=true 时按每20行分组读取。"
|
|
30
|
-
)
|
|
31
|
-
# 工具标签
|
|
13
|
+
description = "读取源代码文件的指定行号范围,并为每行添加行号后返回。"
|
|
32
14
|
parameters = {
|
|
33
15
|
"type": "object",
|
|
34
16
|
"properties": {
|
|
@@ -40,637 +22,21 @@ class ReadCodeTool:
|
|
|
40
22
|
"path": {"type": "string"},
|
|
41
23
|
"start_line": {"type": "number", "default": 1},
|
|
42
24
|
"end_line": {"type": "number", "default": -1},
|
|
43
|
-
"raw_mode": {"type": "boolean", "default": False},
|
|
44
25
|
},
|
|
45
26
|
"required": ["path"],
|
|
46
27
|
},
|
|
47
|
-
"description": "要读取的文件列表,每个文件可指定行号范围(start_line 到 end_line,-1 表示文件末尾)。
|
|
28
|
+
"description": "要读取的文件列表,每个文件可指定行号范围(start_line 到 end_line,-1 表示文件末尾)。",
|
|
48
29
|
}
|
|
49
30
|
},
|
|
50
31
|
"required": ["files"],
|
|
51
32
|
}
|
|
52
|
-
|
|
53
|
-
def _extract_syntax_units(
|
|
54
|
-
self, filepath: str, content: str, start_line: int, end_line: int
|
|
55
|
-
) -> List[Dict[str, Any]]:
|
|
56
|
-
"""提取语法单元(函数、类等)
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
filepath: 文件路径
|
|
60
|
-
content: 文件内容
|
|
61
|
-
start_line: 起始行号
|
|
62
|
-
end_line: 结束行号
|
|
63
|
-
|
|
64
|
-
Returns:
|
|
65
|
-
语法单元列表,每个单元包含 id, start_line, end_line, content
|
|
66
|
-
"""
|
|
67
|
-
if StructuredCodeExtractor:
|
|
68
|
-
return StructuredCodeExtractor.extract_syntax_units(filepath, content, start_line, end_line)
|
|
69
|
-
return []
|
|
70
|
-
|
|
71
|
-
def _extract_syntax_units_with_split(
|
|
72
|
-
self, filepath: str, content: str, start_line: int, end_line: int
|
|
73
|
-
) -> List[Dict[str, Any]]:
|
|
74
|
-
"""提取语法单元,然后对超过50行的单元进行二级切分:
|
|
75
|
-
1. 先按连续空白行切分大块
|
|
76
|
-
2. 如果子块仍然超过50行,再按固定行数(50行一组)切分
|
|
77
|
-
|
|
78
|
-
Args:
|
|
79
|
-
filepath: 文件路径
|
|
80
|
-
content: 文件内容
|
|
81
|
-
start_line: 起始行号
|
|
82
|
-
end_line: 结束行号
|
|
83
|
-
|
|
84
|
-
Returns:
|
|
85
|
-
语法单元列表,每个单元不超过50行
|
|
86
|
-
"""
|
|
87
|
-
# 先获取语法单元(仅在支持语法解析的语言中才会返回非空)
|
|
88
|
-
syntax_units = self._extract_syntax_units(filepath, content, start_line, end_line)
|
|
89
|
-
|
|
90
|
-
if not syntax_units:
|
|
91
|
-
return []
|
|
92
|
-
|
|
93
|
-
result = []
|
|
94
|
-
for unit in syntax_units:
|
|
95
|
-
unit_line_count = unit['end_line'] - unit['start_line'] + 1
|
|
96
|
-
if unit_line_count > 50:
|
|
97
|
-
# 第一步:对大块先按空白行切分(基于 StructuredCodeExtractor)
|
|
98
|
-
blank_groups = self._extract_blank_line_groups(
|
|
99
|
-
content, unit['start_line'], unit['end_line']
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
# 如果按空白行切分失败(例如全部为空白或实现返回空),退回原始大块
|
|
103
|
-
if not blank_groups:
|
|
104
|
-
blank_groups = [unit]
|
|
105
|
-
|
|
106
|
-
for group in blank_groups:
|
|
107
|
-
group_line_count = group['end_line'] - group['start_line'] + 1
|
|
108
|
-
if group_line_count > 50:
|
|
109
|
-
# 第二步:对子块中仍然超过50行的部分,按每50行固定切分
|
|
110
|
-
sub_groups = self._extract_line_groups(
|
|
111
|
-
content, group['start_line'], group['end_line'], group_size=50
|
|
112
|
-
)
|
|
113
|
-
result.extend(sub_groups)
|
|
114
|
-
else:
|
|
115
|
-
# 经过空白行切分得到的中等大小块,直接加入结果
|
|
116
|
-
result.append(group)
|
|
117
|
-
else:
|
|
118
|
-
# 如果单元不超过50行,直接添加
|
|
119
|
-
result.append(unit)
|
|
120
|
-
|
|
121
|
-
return result
|
|
122
|
-
|
|
123
|
-
def _extract_blank_line_groups(
|
|
124
|
-
self, content: str, start_line: int, end_line: int
|
|
125
|
-
) -> List[Dict[str, Any]]:
|
|
126
|
-
"""按空白行分组提取内容(委托给StructuredCodeExtractor)"""
|
|
127
|
-
if StructuredCodeExtractor:
|
|
128
|
-
return StructuredCodeExtractor.extract_blank_line_groups(content, start_line, end_line)
|
|
129
|
-
return []
|
|
130
|
-
|
|
131
|
-
def _extract_blank_line_groups_with_split(
|
|
132
|
-
self, content: str, start_line: int, end_line: int
|
|
133
|
-
) -> List[Dict[str, Any]]:
|
|
134
|
-
"""先按空白行分组,然后对超过20行的块再按每20行分割
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
content: 文件内容
|
|
138
|
-
start_line: 起始行号
|
|
139
|
-
end_line: 结束行号
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
分组列表,每个分组包含 id, start_line, end_line, content
|
|
143
|
-
"""
|
|
144
|
-
# 先获取空白行分组
|
|
145
|
-
blank_line_groups = self._extract_blank_line_groups(content, start_line, end_line)
|
|
146
|
-
|
|
147
|
-
if not blank_line_groups:
|
|
148
|
-
return []
|
|
149
|
-
|
|
150
|
-
result = []
|
|
151
|
-
for group in blank_line_groups:
|
|
152
|
-
group_line_count = group['end_line'] - group['start_line'] + 1
|
|
153
|
-
if group_line_count > 20:
|
|
154
|
-
# 如果块超过20行,按每20行分割
|
|
155
|
-
sub_groups = self._extract_line_groups(
|
|
156
|
-
content, group['start_line'], group['end_line'], group_size=20
|
|
157
|
-
)
|
|
158
|
-
result.extend(sub_groups)
|
|
159
|
-
else:
|
|
160
|
-
# 如果块不超过20行,直接添加
|
|
161
|
-
result.append(group)
|
|
162
|
-
|
|
163
|
-
return result
|
|
164
|
-
|
|
165
|
-
def _extract_line_groups(
|
|
166
|
-
self, content: str, start_line: int, end_line: int, group_size: int = 20
|
|
167
|
-
) -> List[Dict[str, Any]]:
|
|
168
|
-
"""按行号分组提取内容(委托给StructuredCodeExtractor)"""
|
|
169
|
-
if StructuredCodeExtractor:
|
|
170
|
-
return StructuredCodeExtractor.extract_line_groups(content, start_line, end_line, group_size)
|
|
171
|
-
return []
|
|
172
|
-
|
|
173
|
-
def _ensure_unique_ids(self, units: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
174
|
-
"""确保单元列表中所有id唯一(委托给StructuredCodeExtractor)"""
|
|
175
|
-
if StructuredCodeExtractor:
|
|
176
|
-
return StructuredCodeExtractor.ensure_unique_ids(units)
|
|
177
|
-
return units
|
|
178
|
-
|
|
179
|
-
def _extract_imports(self, filepath: str, content: str, start_line: int, end_line: int) -> List[Dict[str, Any]]:
|
|
180
|
-
"""提取文件的导入/包含语句作为结构化单元(委托给StructuredCodeExtractor)"""
|
|
181
|
-
if StructuredCodeExtractor:
|
|
182
|
-
return StructuredCodeExtractor.extract_imports(filepath, content, start_line, end_line)
|
|
183
|
-
return []
|
|
184
|
-
|
|
185
|
-
def _create_import_unit(self, import_group: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
186
|
-
"""创建导入语句单元(委托给StructuredCodeExtractor)"""
|
|
187
|
-
if StructuredCodeExtractor:
|
|
188
|
-
return StructuredCodeExtractor.create_import_unit(import_group)
|
|
189
|
-
return {}
|
|
190
|
-
|
|
191
|
-
def _format_structured_output(
|
|
192
|
-
self, filepath: str, units: List[Dict[str, Any]], total_lines: int, agent: Any = None
|
|
193
|
-
) -> str:
|
|
194
|
-
"""格式化结构化输出
|
|
195
|
-
|
|
196
|
-
Args:
|
|
197
|
-
filepath: 文件路径
|
|
198
|
-
units: 语法单元或行号分组列表(已包含导入语句单元)
|
|
199
|
-
total_lines: 文件总行数
|
|
200
|
-
agent: Agent实例,用于从缓存中获取block_id
|
|
201
|
-
|
|
202
|
-
Returns:
|
|
203
|
-
格式化后的输出字符串
|
|
204
|
-
"""
|
|
205
|
-
# 文件开始分界符
|
|
206
|
-
output_lines = [
|
|
207
|
-
"=" * 80,
|
|
208
|
-
f"🔍 文件: {filepath}",
|
|
209
|
-
f"📄 总行数: {total_lines}",
|
|
210
|
-
f"📦 结构化单元数: {len(units)}",
|
|
211
|
-
"=" * 80,
|
|
212
|
-
"",
|
|
213
|
-
]
|
|
214
|
-
|
|
215
|
-
# 为每个单元分配block-id
|
|
216
|
-
# 如果unit已经有block_id(从缓存中获取),直接使用;否则按顺序生成
|
|
217
|
-
for idx, unit in enumerate(units, start=1):
|
|
218
|
-
# 如果unit已经有block_id,直接使用(在生成structured_units时已分配)
|
|
219
|
-
block_id = unit.get('block_id')
|
|
220
|
-
if not block_id:
|
|
221
|
-
# 否则按顺序生成临时id
|
|
222
|
-
block_id = f"block-{idx}"
|
|
223
|
-
# 显示id
|
|
224
|
-
output_lines.append(f"[id:{block_id}]")
|
|
225
|
-
# 添加内容,保持原有缩进,并为每行添加行号
|
|
226
|
-
content = unit.get('content', '')
|
|
227
|
-
if content:
|
|
228
|
-
# 获取单元的起始行号
|
|
229
|
-
start_line = unit.get('start_line', 1)
|
|
230
|
-
# 将内容按行分割
|
|
231
|
-
content_lines = content.split('\n')
|
|
232
|
-
# 为每一行添加行号(右对齐,4位,不足补空格)
|
|
233
|
-
numbered_lines = []
|
|
234
|
-
current_line = start_line
|
|
235
|
-
for line in content_lines:
|
|
236
|
-
# 行号右对齐,占4位
|
|
237
|
-
line_number_str = f"{current_line:4d}"
|
|
238
|
-
numbered_lines.append(f"{line_number_str}:{line}")
|
|
239
|
-
current_line += 1
|
|
240
|
-
# 将带行号的内容添加到输出
|
|
241
|
-
output_lines.append('\n'.join(numbered_lines))
|
|
242
|
-
# 块结束分界符
|
|
243
|
-
output_lines.append("-" * 80)
|
|
244
|
-
output_lines.append("") # 单元之间空行分隔
|
|
245
|
-
|
|
246
|
-
# 文件结束分界符
|
|
247
|
-
output_lines.append("=" * 80)
|
|
248
|
-
output_lines.append("")
|
|
249
|
-
|
|
250
|
-
return '\n'.join(output_lines)
|
|
251
|
-
|
|
252
|
-
def _get_file_cache(self, agent: Any, filepath: str) -> Dict[str, Any]:
|
|
253
|
-
"""获取文件的缓存信息
|
|
254
|
-
|
|
255
|
-
Args:
|
|
256
|
-
agent: Agent实例
|
|
257
|
-
filepath: 文件路径
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
缓存信息字典,如果不存在则返回None
|
|
261
|
-
"""
|
|
262
|
-
if not agent:
|
|
263
|
-
return None
|
|
264
|
-
|
|
265
|
-
cache = agent.get_user_data("read_code_cache")
|
|
266
|
-
if not cache:
|
|
267
|
-
return None
|
|
268
|
-
|
|
269
|
-
abs_path = os.path.abspath(filepath)
|
|
270
|
-
return cache.get(abs_path)
|
|
271
|
-
|
|
272
|
-
def _get_blocks_from_cache(self, cache_info: Dict[str, Any], start_line: int, end_line: int) -> List[Dict[str, Any]]:
|
|
273
|
-
"""从缓存中获取对应范围的blocks
|
|
274
|
-
|
|
275
|
-
Args:
|
|
276
|
-
cache_info: 缓存信息
|
|
277
|
-
start_line: 起始行号(1-based)
|
|
278
|
-
end_line: 结束行号(1-based,-1表示文件末尾)
|
|
279
|
-
|
|
280
|
-
Returns:
|
|
281
|
-
blocks列表,每个block包含block_id和content
|
|
282
|
-
"""
|
|
283
|
-
if not cache_info or "id_list" not in cache_info or "blocks" not in cache_info:
|
|
284
|
-
return []
|
|
285
|
-
|
|
286
|
-
id_list = cache_info.get("id_list", [])
|
|
287
|
-
blocks = cache_info.get("blocks", {})
|
|
288
|
-
result = []
|
|
289
|
-
|
|
290
|
-
# 如果end_line是-1,表示文件末尾,需要先计算文件总行数
|
|
291
|
-
if end_line == -1:
|
|
292
|
-
# 先遍历所有blocks计算总行数
|
|
293
|
-
# 注意:块内容不包含末尾换行符,块之间需要添加换行符
|
|
294
|
-
total_lines = 0
|
|
295
|
-
for idx, block_id in enumerate(id_list):
|
|
296
|
-
block_data = blocks.get(block_id)
|
|
297
|
-
if block_data:
|
|
298
|
-
block_content = block_data.get("content", "")
|
|
299
|
-
if block_content:
|
|
300
|
-
# 块内容中的换行符数量 + 1 = 行数
|
|
301
|
-
block_line_count = block_content.count('\n') + 1
|
|
302
|
-
total_lines += block_line_count
|
|
303
|
-
# 如果不是最后一个块,块之间有一个换行符分隔(已计入下一个块的第一行)
|
|
304
|
-
# 所以不需要额外添加
|
|
305
|
-
end_line = total_lines
|
|
306
|
-
|
|
307
|
-
# 通过前面blocks的内容推算每个block的行号范围
|
|
308
|
-
# 注意:块内容不包含末尾换行符,块之间需要添加换行符
|
|
309
|
-
current_line = 1 # 从第1行开始
|
|
310
|
-
|
|
311
|
-
for idx, block_id in enumerate(id_list):
|
|
312
|
-
block_data = blocks.get(block_id)
|
|
313
|
-
if not block_data:
|
|
314
|
-
continue
|
|
315
|
-
block_content = block_data.get("content", "")
|
|
316
|
-
if not block_content:
|
|
317
|
-
continue
|
|
318
|
-
|
|
319
|
-
# 计算这个block的行数
|
|
320
|
-
# 块内容中的换行符数量 + 1 = 行数(因为块内容不包含末尾换行符)
|
|
321
|
-
block_line_count = block_content.count('\n') + 1
|
|
322
|
-
|
|
323
|
-
block_start_line = current_line
|
|
324
|
-
block_end_line = current_line + block_line_count - 1
|
|
325
|
-
|
|
326
|
-
# block与请求范围有重叠就包含
|
|
327
|
-
if block_end_line >= start_line and block_start_line <= end_line:
|
|
328
|
-
result.append({
|
|
329
|
-
"block_id": block_id,
|
|
330
|
-
"content": block_content,
|
|
331
|
-
"start_line": block_start_line,
|
|
332
|
-
})
|
|
333
|
-
|
|
334
|
-
# 更新当前行号
|
|
335
|
-
# 块之间有一个换行符分隔,所以下一个块从 block_end_line + 1 开始
|
|
336
|
-
current_line = block_end_line + 1
|
|
337
|
-
|
|
338
|
-
# 如果已经超过请求的结束行,可以提前退出
|
|
339
|
-
if block_start_line > end_line:
|
|
340
|
-
break
|
|
341
|
-
|
|
342
|
-
return result
|
|
343
|
-
|
|
344
|
-
def _convert_units_to_sequential_ids(self, units: List[Dict[str, Any]], full_content: str = None) -> Dict[str, Any]:
|
|
345
|
-
"""将单元列表转换为缓存格式(id_list和blocks字典)
|
|
346
|
-
|
|
347
|
-
按照行号范围分割文件,不区分语法单元,确保完美恢复。
|
|
348
|
-
|
|
349
|
-
Args:
|
|
350
|
-
units: 结构化单元列表,每个单元包含 id, start_line, end_line, content
|
|
351
|
-
full_content: 完整的文件内容(可选),用于确保块之间的空白行也被包含
|
|
352
|
-
|
|
353
|
-
Returns:
|
|
354
|
-
包含 id_list 和 blocks 的字典:
|
|
355
|
-
- id_list: 有序的id列表,如 ["block-1", "block-2", "block-3"]
|
|
356
|
-
- blocks: id到块信息的字典,如 {"block-1": {"content": "..."}, ...}
|
|
357
|
-
"""
|
|
358
|
-
if not full_content or not units:
|
|
359
|
-
# 没有完整内容,直接使用原始的content
|
|
360
|
-
sorted_original = sorted(units, key=lambda u: u.get('start_line', 0))
|
|
361
|
-
id_list = []
|
|
362
|
-
blocks = {}
|
|
363
|
-
for unit in sorted_original:
|
|
364
|
-
block_id = f"block-{len(id_list) + 1}" # block-1, block-2, ...
|
|
365
|
-
id_list.append(block_id)
|
|
366
|
-
content = unit.get('content', '')
|
|
367
|
-
# 去掉块末尾的换行符
|
|
368
|
-
if content.endswith('\n'):
|
|
369
|
-
content = content[:-1]
|
|
370
|
-
blocks[block_id] = {
|
|
371
|
-
"content": content,
|
|
372
|
-
}
|
|
373
|
-
return {
|
|
374
|
-
"id_list": id_list,
|
|
375
|
-
"blocks": blocks,
|
|
376
|
-
"file_ends_with_newline": False, # 无法确定,默认False
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
# 收集所有单元的开始行号作为分割点
|
|
380
|
-
# 关键:直接使用每个单元的start_line,不合并范围,保留语法单元边界
|
|
381
|
-
split_points_set = {1} # 从第1行开始
|
|
382
|
-
for unit in units:
|
|
383
|
-
start_line = unit.get('start_line', 1)
|
|
384
|
-
if start_line > 0:
|
|
385
|
-
split_points_set.add(start_line)
|
|
386
|
-
|
|
387
|
-
if not split_points_set:
|
|
388
|
-
# 没有有效的分割点,返回空列表
|
|
389
|
-
return {"id_list": [], "blocks": {}, "file_ends_with_newline": False}
|
|
390
|
-
|
|
391
|
-
# 按照每个单元的开始行作为分割点,连续分割文件内容
|
|
392
|
-
# 每个块包含从当前分割点到下一个分割点之前的所有内容
|
|
393
|
-
# 关键:直接按行号范围从原始内容中提取,确保完美恢复(包括文件末尾的换行符和所有空白行)
|
|
394
|
-
# 使用 split('\n') 分割,然后手动为每行添加换行符(除了最后一行,根据原始文件决定)
|
|
395
|
-
lines = full_content.split('\n')
|
|
396
|
-
result_units = []
|
|
397
|
-
|
|
398
|
-
# 排序分割点
|
|
399
|
-
split_points = sorted(split_points_set)
|
|
400
|
-
split_points.append(len(lines) + 1) # 文件末尾
|
|
401
|
-
|
|
402
|
-
# 按照分割点连续分割文件
|
|
403
|
-
# 注意:如果文件以换行符结尾,split('\n')会在末尾产生一个空字符串
|
|
404
|
-
# 我们需要正确处理这种情况
|
|
405
|
-
file_ends_with_newline = full_content.endswith('\n')
|
|
406
|
-
|
|
407
|
-
for idx in range(len(split_points) - 1):
|
|
408
|
-
start_line = split_points[idx] # 1-based
|
|
409
|
-
next_start_line = split_points[idx + 1] # 1-based
|
|
410
|
-
|
|
411
|
-
# 提取从当前分割点到下一个分割点之前的所有内容
|
|
412
|
-
unit_start_idx = max(0, start_line - 1) # 0-based索引
|
|
413
|
-
unit_end_idx = min(len(lines) - 1, next_start_line - 2) # 0-based索引,下一个分割点之前
|
|
414
|
-
|
|
415
|
-
# 确保索引有效
|
|
416
|
-
if unit_start_idx <= unit_end_idx:
|
|
417
|
-
# 提取行并重新组合,确保保留所有换行符
|
|
418
|
-
extracted_lines = lines[unit_start_idx:unit_end_idx + 1]
|
|
419
|
-
|
|
420
|
-
# 重新组合:每行后面添加换行符
|
|
421
|
-
# 对于非最后一个块,最后一行也需要换行符,因为下一个块从下一行开始
|
|
422
|
-
# 对于最后一个块,根据原始文件是否以换行符结尾来决定
|
|
423
|
-
full_unit_content_parts = []
|
|
424
|
-
is_last_block = (idx == len(split_points) - 2)
|
|
425
|
-
|
|
426
|
-
for i, line in enumerate(extracted_lines):
|
|
427
|
-
if i < len(extracted_lines) - 1:
|
|
428
|
-
# 不是最后一行,添加换行符
|
|
429
|
-
full_unit_content_parts.append(line + '\n')
|
|
430
|
-
else:
|
|
431
|
-
# 最后一行
|
|
432
|
-
if not is_last_block:
|
|
433
|
-
# 非最后一个块:最后一行必须添加换行符,因为下一个块从下一行开始
|
|
434
|
-
# 这样可以保留块之间的空白行
|
|
435
|
-
full_unit_content_parts.append(line + '\n')
|
|
436
|
-
else:
|
|
437
|
-
# 最后一个块:需要特殊处理
|
|
438
|
-
# 如果文件以换行符结尾,且最后一行是空字符串(来自split('\n')的副作用),
|
|
439
|
-
# 且不是唯一的一行,那么前面的行已经输出了换行符,这里不需要再输出
|
|
440
|
-
if file_ends_with_newline and line == '' and len(extracted_lines) > 1:
|
|
441
|
-
# 最后一行是空字符串且来自trailing newline,且不是唯一的一行
|
|
442
|
-
# 前面的行已经输出了换行符,所以这里不需要再输出任何内容
|
|
443
|
-
# 空字符串表示不输出任何内容
|
|
444
|
-
full_unit_content_parts.append('')
|
|
445
|
-
elif file_ends_with_newline:
|
|
446
|
-
# 文件以换行符结尾,最后一行需要换行符
|
|
447
|
-
full_unit_content_parts.append(line + '\n')
|
|
448
|
-
else:
|
|
449
|
-
# 文件不以换行符结尾
|
|
450
|
-
full_unit_content_parts.append(line)
|
|
451
|
-
|
|
452
|
-
full_unit_content = ''.join(full_unit_content_parts)
|
|
453
|
-
|
|
454
|
-
# 去掉块末尾的换行符(存储时去掉,恢复时再添加)
|
|
455
|
-
if full_unit_content.endswith('\n'):
|
|
456
|
-
full_unit_content = full_unit_content[:-1]
|
|
457
|
-
|
|
458
|
-
block_id = f"block-{len(result_units) + 1}" # block-1, block-2, ...
|
|
459
|
-
result_units.append({
|
|
460
|
-
"id": block_id,
|
|
461
|
-
"content": full_unit_content,
|
|
462
|
-
})
|
|
463
|
-
|
|
464
|
-
# 转换为 id_list 和 blocks 格式
|
|
465
|
-
id_list = [unit["id"] for unit in result_units]
|
|
466
|
-
blocks = {
|
|
467
|
-
unit["id"]: {
|
|
468
|
-
"content": unit["content"],
|
|
469
|
-
}
|
|
470
|
-
for unit in result_units
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
# 保存文件是否以换行符结尾的信息(用于恢复时正确处理)
|
|
474
|
-
file_ends_with_newline = full_content.endswith('\n')
|
|
475
|
-
|
|
476
|
-
return {
|
|
477
|
-
"id_list": id_list,
|
|
478
|
-
"blocks": blocks,
|
|
479
|
-
"file_ends_with_newline": file_ends_with_newline,
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
def _save_file_cache(
|
|
483
|
-
self, agent: Any, filepath: str, units: List[Dict[str, Any]],
|
|
484
|
-
total_lines: int, file_mtime: float, full_content: str = None
|
|
485
|
-
) -> None:
|
|
486
|
-
"""保存文件的结构化信息到缓存
|
|
487
|
-
|
|
488
|
-
Args:
|
|
489
|
-
agent: Agent实例
|
|
490
|
-
filepath: 文件路径
|
|
491
|
-
units: 结构化单元列表
|
|
492
|
-
total_lines: 文件总行数
|
|
493
|
-
file_mtime: 文件修改时间
|
|
494
|
-
full_content: 完整的文件内容(可选),用于确保块之间的空白行也被包含
|
|
495
|
-
"""
|
|
496
|
-
if not agent:
|
|
497
|
-
return
|
|
498
|
-
|
|
499
|
-
cache = agent.get_user_data("read_code_cache")
|
|
500
|
-
if not cache:
|
|
501
|
-
cache = {}
|
|
502
|
-
agent.set_user_data("read_code_cache", cache)
|
|
503
|
-
|
|
504
|
-
abs_path = os.path.abspath(filepath)
|
|
505
|
-
|
|
506
|
-
# 转换为 id_list 和 blocks 格式
|
|
507
|
-
cache_data = self._convert_units_to_sequential_ids(units, full_content)
|
|
508
|
-
|
|
509
|
-
cache[abs_path] = {
|
|
510
|
-
"id_list": cache_data["id_list"],
|
|
511
|
-
"blocks": cache_data["blocks"],
|
|
512
|
-
"total_lines": total_lines,
|
|
513
|
-
"read_time": time.time(),
|
|
514
|
-
"file_mtime": file_mtime,
|
|
515
|
-
"file_ends_with_newline": cache_data.get("file_ends_with_newline", False),
|
|
516
|
-
}
|
|
517
|
-
agent.set_user_data("read_code_cache", cache)
|
|
518
|
-
|
|
519
|
-
def _is_cache_valid(self, cache_info: Dict[str, Any], filepath: str) -> bool:
|
|
520
|
-
"""检查缓存是否有效
|
|
521
|
-
|
|
522
|
-
Args:
|
|
523
|
-
cache_info: 缓存信息字典
|
|
524
|
-
filepath: 文件路径
|
|
525
|
-
|
|
526
|
-
Returns:
|
|
527
|
-
True表示缓存有效,False表示缓存无效
|
|
528
|
-
"""
|
|
529
|
-
if not cache_info:
|
|
530
|
-
return False
|
|
531
|
-
|
|
532
|
-
try:
|
|
533
|
-
# 检查文件是否存在
|
|
534
|
-
if not os.path.exists(filepath):
|
|
535
|
-
return False
|
|
536
|
-
|
|
537
|
-
# 检查文件修改时间是否变化
|
|
538
|
-
current_mtime = os.path.getmtime(filepath)
|
|
539
|
-
cached_mtime = cache_info.get("file_mtime")
|
|
540
|
-
|
|
541
|
-
if cached_mtime is None or abs(current_mtime - cached_mtime) > 0.1: # 允许0.1秒的误差
|
|
542
|
-
return False
|
|
543
|
-
|
|
544
|
-
# 检查缓存数据结构是否完整
|
|
545
|
-
if "id_list" not in cache_info or "blocks" not in cache_info or "total_lines" not in cache_info:
|
|
546
|
-
return False
|
|
547
|
-
|
|
548
|
-
return True
|
|
549
|
-
except Exception:
|
|
550
|
-
return False
|
|
551
|
-
|
|
552
|
-
def _restore_file_from_cache(self, cache_info: Dict[str, Any]) -> str:
|
|
553
|
-
"""从缓存恢复文件内容
|
|
554
|
-
|
|
555
|
-
Args:
|
|
556
|
-
cache_info: 缓存信息字典
|
|
557
|
-
|
|
558
|
-
Returns:
|
|
559
|
-
恢复的文件内容字符串(与原始文件内容完全一致)
|
|
560
|
-
"""
|
|
561
|
-
if not cache_info:
|
|
562
|
-
return ""
|
|
563
|
-
|
|
564
|
-
# 按照 id_list 的顺序恢复
|
|
565
|
-
id_list = cache_info.get("id_list", [])
|
|
566
|
-
blocks = cache_info.get("blocks", {})
|
|
567
|
-
file_ends_with_newline = cache_info.get("file_ends_with_newline", False)
|
|
568
|
-
|
|
569
|
-
result = []
|
|
570
|
-
for idx, block_id in enumerate(id_list):
|
|
571
|
-
block = blocks.get(block_id)
|
|
572
|
-
if block:
|
|
573
|
-
content = block.get('content', '')
|
|
574
|
-
if content:
|
|
575
|
-
result.append(content)
|
|
576
|
-
# 在块之间添加换行符(最后一个块后面根据文件是否以换行符结尾决定)
|
|
577
|
-
is_last_block = (idx == len(id_list) - 1)
|
|
578
|
-
if is_last_block:
|
|
579
|
-
# 最后一个块:如果文件以换行符结尾,添加换行符
|
|
580
|
-
if file_ends_with_newline:
|
|
581
|
-
result.append('\n')
|
|
582
|
-
else:
|
|
583
|
-
# 非最后一个块:在块之间添加换行符
|
|
584
|
-
result.append('\n')
|
|
585
|
-
|
|
586
|
-
return ''.join(result) if result else ""
|
|
587
|
-
|
|
588
|
-
def _estimate_structured_tokens(
|
|
589
|
-
self, filepath: str, content: str, start_line: int, end_line: int, total_lines: int, raw_mode: bool = False
|
|
590
|
-
) -> int:
|
|
591
|
-
"""估算结构化输出的token数
|
|
592
|
-
|
|
593
|
-
Args:
|
|
594
|
-
filepath: 文件路径
|
|
595
|
-
content: 文件内容
|
|
596
|
-
start_line: 起始行号
|
|
597
|
-
end_line: 结束行号
|
|
598
|
-
total_lines: 文件总行数
|
|
599
|
-
|
|
600
|
-
Returns:
|
|
601
|
-
估算的token数
|
|
602
|
-
"""
|
|
603
|
-
try:
|
|
604
|
-
if raw_mode:
|
|
605
|
-
# 原始模式:按每20行分组计算token
|
|
606
|
-
line_groups = self._extract_line_groups(content, start_line, end_line, group_size=20)
|
|
607
|
-
if line_groups:
|
|
608
|
-
import_units = self._extract_imports(filepath, content, start_line, end_line)
|
|
609
|
-
all_units = import_units + line_groups[:1]
|
|
610
|
-
# 确保id唯一
|
|
611
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
612
|
-
# 按行号排序
|
|
613
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
614
|
-
sample_output = self._format_structured_output(filepath, all_units, total_lines)
|
|
615
|
-
if len(line_groups) > 1:
|
|
616
|
-
group_tokens = get_context_token_count(sample_output)
|
|
617
|
-
return group_tokens * len(line_groups)
|
|
618
|
-
else:
|
|
619
|
-
return get_context_token_count(sample_output)
|
|
620
|
-
else:
|
|
621
|
-
# 尝试提取语法单元(确保每个单元不超过50行)
|
|
622
|
-
syntax_units = self._extract_syntax_units_with_split(filepath, content, start_line, end_line)
|
|
623
|
-
|
|
624
|
-
if syntax_units:
|
|
625
|
-
# 使用语法单元结构化输出格式计算token
|
|
626
|
-
import_units = self._extract_imports(filepath, content, start_line, end_line)
|
|
627
|
-
all_units = import_units + syntax_units[:1]
|
|
628
|
-
# 确保id唯一
|
|
629
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
630
|
-
# 按行号排序
|
|
631
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
632
|
-
sample_output = self._format_structured_output(filepath, all_units, total_lines)
|
|
633
|
-
if len(syntax_units) > 1:
|
|
634
|
-
unit_tokens = get_context_token_count(sample_output)
|
|
635
|
-
return unit_tokens * len(syntax_units)
|
|
636
|
-
else:
|
|
637
|
-
return get_context_token_count(sample_output)
|
|
638
|
-
else:
|
|
639
|
-
# 使用空白行分组格式计算token(不支持语言时)
|
|
640
|
-
# 先按空行分割,然后对超过20行的块再按每20行分割
|
|
641
|
-
line_groups = self._extract_blank_line_groups_with_split(content, start_line, end_line)
|
|
642
|
-
if line_groups:
|
|
643
|
-
import_units = self._extract_imports(filepath, content, start_line, end_line)
|
|
644
|
-
all_units = import_units + line_groups[:1]
|
|
645
|
-
# 确保id唯一
|
|
646
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
647
|
-
# 按行号排序
|
|
648
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
649
|
-
sample_output = self._format_structured_output(filepath, all_units, total_lines)
|
|
650
|
-
if len(line_groups) > 1:
|
|
651
|
-
group_tokens = get_context_token_count(sample_output)
|
|
652
|
-
return group_tokens * len(line_groups)
|
|
653
|
-
else:
|
|
654
|
-
return get_context_token_count(sample_output)
|
|
655
|
-
else:
|
|
656
|
-
# 回退到原始格式计算
|
|
657
|
-
lines = content.split('\n')
|
|
658
|
-
selected_lines = lines[start_line - 1:end_line]
|
|
659
|
-
numbered_content = "".join(f"{i:5d}:{line}\n" for i, line in enumerate(selected_lines, start=start_line))
|
|
660
|
-
return get_context_token_count(numbered_content)
|
|
661
|
-
except Exception:
|
|
662
|
-
# 如果估算失败,使用简单的行号格式估算
|
|
663
|
-
lines = content.split('\n')
|
|
664
|
-
selected_lines = lines[start_line - 1:end_line]
|
|
665
|
-
numbered_content = "".join(f"{i:5d}:{line}\n" for i, line in enumerate(selected_lines, start=start_line))
|
|
666
|
-
return get_context_token_count(numbered_content)
|
|
667
|
-
|
|
33
|
+
|
|
668
34
|
def _get_max_token_limit(self, agent: Any = None) -> int:
|
|
669
35
|
"""获取基于剩余token数量的token限制
|
|
670
|
-
|
|
36
|
+
|
|
671
37
|
Args:
|
|
672
38
|
agent: Agent实例,用于获取模型和剩余token数量
|
|
673
|
-
|
|
39
|
+
|
|
674
40
|
Returns:
|
|
675
41
|
int: 允许的最大token数(剩余token的2/3,或至少保留1/3剩余token)
|
|
676
42
|
"""
|
|
@@ -679,28 +45,32 @@ class ReadCodeTool:
|
|
|
679
45
|
if agent and hasattr(agent, "model"):
|
|
680
46
|
try:
|
|
681
47
|
remaining_tokens = agent.model.get_remaining_token_count()
|
|
682
|
-
# 使用剩余token的2
|
|
683
|
-
limit_tokens = int(remaining_tokens *
|
|
48
|
+
# 使用剩余token的1/2作为限制,保留1/2作为安全余量
|
|
49
|
+
limit_tokens = int(remaining_tokens * 1 / 2)
|
|
684
50
|
# 确保至少返回一个合理的值
|
|
685
51
|
if limit_tokens > 0:
|
|
686
52
|
return limit_tokens
|
|
687
53
|
except Exception:
|
|
688
54
|
pass
|
|
689
|
-
|
|
55
|
+
|
|
690
56
|
# 回退方案:使用输入窗口的2/3
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
57
|
+
# 使用全局模型组(不再从 agent 继承)
|
|
58
|
+
model_group = get_global_model_group()
|
|
59
|
+
|
|
695
60
|
max_input_tokens = get_max_input_token_count(model_group)
|
|
696
|
-
# 计算2
|
|
697
|
-
limit_tokens = int(max_input_tokens *
|
|
61
|
+
# 计算1/2限制的token数
|
|
62
|
+
limit_tokens = int(max_input_tokens * 1 / 2)
|
|
698
63
|
return limit_tokens
|
|
699
64
|
except Exception:
|
|
700
|
-
# 如果获取失败,使用默认值(假设
|
|
65
|
+
# 如果获取失败,使用默认值(假设128000 token,2/3是85333)
|
|
701
66
|
return 21333
|
|
67
|
+
|
|
702
68
|
def _handle_single_file(
|
|
703
|
-
self,
|
|
69
|
+
self,
|
|
70
|
+
filepath: str,
|
|
71
|
+
start_line: int = 1,
|
|
72
|
+
end_line: int = -1,
|
|
73
|
+
agent: Any = None,
|
|
704
74
|
) -> Dict[str, Any]:
|
|
705
75
|
"""处理单个文件的读取操作
|
|
706
76
|
|
|
@@ -708,7 +78,7 @@ class ReadCodeTool:
|
|
|
708
78
|
filepath (str): 文件路径
|
|
709
79
|
start_line (int): 起始行号,默认为1
|
|
710
80
|
end_line (int): 结束行号,默认为-1表示文件末尾
|
|
711
|
-
agent: Agent
|
|
81
|
+
agent: Agent实例,用于获取token限制
|
|
712
82
|
|
|
713
83
|
Returns:
|
|
714
84
|
Dict[str, Any]: 包含成功状态、输出内容和错误信息的字典
|
|
@@ -733,9 +103,10 @@ class ReadCodeTool:
|
|
|
733
103
|
}
|
|
734
104
|
|
|
735
105
|
# 读取文件内容
|
|
736
|
-
# 第一遍流式读取,仅统计总行数,避免一次性读入内存
|
|
737
106
|
with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
738
|
-
|
|
107
|
+
lines = f.readlines()
|
|
108
|
+
|
|
109
|
+
total_lines = len(lines)
|
|
739
110
|
|
|
740
111
|
# 处理空文件情况
|
|
741
112
|
if total_lines == 0:
|
|
@@ -762,61 +133,51 @@ class ReadCodeTool:
|
|
|
762
133
|
)
|
|
763
134
|
|
|
764
135
|
if start_line > end_line:
|
|
765
|
-
|
|
766
136
|
return {
|
|
767
137
|
"success": False,
|
|
768
138
|
"stdout": "",
|
|
769
139
|
"stderr": f"无效的行范围 [{start_line}-{end_line}] (总行数: {total_lines})",
|
|
770
140
|
}
|
|
771
141
|
|
|
772
|
-
#
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
#
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
790
|
-
full_content = f.read()
|
|
791
|
-
|
|
792
|
-
# 读取要读取的行范围内容
|
|
793
|
-
selected_content_lines = []
|
|
794
|
-
lines = full_content.split('\n')
|
|
795
|
-
for i in range(start_line - 1, min(end_line, len(lines))):
|
|
796
|
-
selected_content_lines.append(lines[i])
|
|
797
|
-
|
|
798
|
-
# 估算结构化输出的token数
|
|
799
|
-
content_tokens = self._estimate_structured_tokens(abs_path, full_content, start_line, end_line, total_lines, raw_mode)
|
|
800
|
-
|
|
142
|
+
# 读取指定行号范围的内容
|
|
143
|
+
selected_lines = lines[start_line - 1 : end_line]
|
|
144
|
+
|
|
145
|
+
# 为每行添加行号
|
|
146
|
+
numbered_lines = []
|
|
147
|
+
for i, line in enumerate(selected_lines, start=start_line):
|
|
148
|
+
# 行号右对齐,占4位
|
|
149
|
+
line_number_str = f"{i:4d}"
|
|
150
|
+
# 移除行尾的换行符,因为我们会在后面统一添加
|
|
151
|
+
line_content = line.rstrip("\n\r")
|
|
152
|
+
numbered_lines.append(f"{line_number_str}:{line_content}")
|
|
153
|
+
|
|
154
|
+
# 构造输出内容
|
|
155
|
+
output_content = "\n".join(numbered_lines)
|
|
156
|
+
|
|
157
|
+
# 估算token数
|
|
158
|
+
content_tokens = get_context_token_count(output_content)
|
|
801
159
|
max_token_limit = self._get_max_token_limit(agent)
|
|
802
|
-
|
|
803
|
-
#
|
|
160
|
+
|
|
161
|
+
# 检查token数是否超过限制
|
|
804
162
|
if content_tokens > max_token_limit:
|
|
805
163
|
read_lines = end_line - start_line + 1
|
|
806
|
-
|
|
164
|
+
|
|
807
165
|
# 计算安全读取的行数 (按比例缩减)
|
|
808
166
|
safe_lines = int((max_token_limit / content_tokens) * read_lines)
|
|
809
167
|
safe_lines = max(1, min(safe_lines, read_lines))
|
|
810
168
|
safe_end_line = start_line + safe_lines - 1
|
|
811
|
-
|
|
169
|
+
|
|
812
170
|
# 读取安全范围内的内容
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
171
|
+
safe_selected_lines = lines[start_line - 1 : safe_end_line]
|
|
172
|
+
safe_numbered_lines = []
|
|
173
|
+
for i, line in enumerate(safe_selected_lines, start=start_line):
|
|
174
|
+
line_number_str = f"{i:4d}"
|
|
175
|
+
line_content = line.rstrip("\n\r")
|
|
176
|
+
safe_numbered_lines.append(f"{line_number_str}:{line_content}")
|
|
177
|
+
|
|
817
178
|
# 构造部分读取结果
|
|
818
|
-
partial_content =
|
|
819
|
-
|
|
179
|
+
partial_content = "\n".join(safe_numbered_lines)
|
|
180
|
+
|
|
820
181
|
return {
|
|
821
182
|
"success": True,
|
|
822
183
|
"stdout": (
|
|
@@ -834,150 +195,11 @@ class ReadCodeTool:
|
|
|
834
195
|
),
|
|
835
196
|
}
|
|
836
197
|
|
|
837
|
-
#
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
full_structured_units = None
|
|
843
|
-
|
|
844
|
-
if raw_mode:
|
|
845
|
-
# 原始读取模式:按每20行分组(整个文件)
|
|
846
|
-
full_line_groups = self._extract_line_groups(full_content, 1, total_lines, group_size=20)
|
|
847
|
-
# 合并导入单元和行号分组
|
|
848
|
-
full_all_units = full_import_units + full_line_groups
|
|
849
|
-
# 确保id唯一
|
|
850
|
-
full_all_units = self._ensure_unique_ids(full_all_units)
|
|
851
|
-
# 按行号排序
|
|
852
|
-
full_all_units.sort(key=lambda u: u['start_line'])
|
|
853
|
-
full_structured_units = full_all_units
|
|
854
|
-
else:
|
|
855
|
-
# 尝试提取整个文件的语法单元(确保每个单元不超过50行)
|
|
856
|
-
full_syntax_units = self._extract_syntax_units_with_split(abs_path, full_content, 1, total_lines)
|
|
857
|
-
|
|
858
|
-
# 检测语言类型
|
|
859
|
-
if LANGUAGE_SUPPORT_AVAILABLE:
|
|
860
|
-
try:
|
|
861
|
-
detect_language(abs_path)
|
|
862
|
-
except Exception:
|
|
863
|
-
pass
|
|
864
|
-
|
|
865
|
-
if full_syntax_units:
|
|
866
|
-
# 合并导入单元和语法单元
|
|
867
|
-
full_all_units = full_import_units + full_syntax_units
|
|
868
|
-
# 确保id唯一
|
|
869
|
-
full_all_units = self._ensure_unique_ids(full_all_units)
|
|
870
|
-
# 按行号排序
|
|
871
|
-
full_all_units.sort(key=lambda u: u['start_line'])
|
|
872
|
-
full_structured_units = full_all_units
|
|
873
|
-
else:
|
|
874
|
-
# 使用空白行分组结构化输出(不支持语言时)
|
|
875
|
-
# 先按空行分割,然后对超过20行的块再按每20行分割(整个文件)
|
|
876
|
-
full_line_groups = self._extract_blank_line_groups_with_split(full_content, 1, total_lines)
|
|
877
|
-
# 合并导入单元和行号分组
|
|
878
|
-
full_all_units = full_import_units + full_line_groups
|
|
879
|
-
# 确保id唯一
|
|
880
|
-
full_all_units = self._ensure_unique_ids(full_all_units)
|
|
881
|
-
# 按行号排序
|
|
882
|
-
full_all_units.sort(key=lambda u: u['start_line'])
|
|
883
|
-
full_structured_units = full_all_units
|
|
884
|
-
|
|
885
|
-
# 保存整个文件的结构化信息到缓存
|
|
886
|
-
if full_structured_units is not None:
|
|
887
|
-
self._save_file_cache(agent, abs_path, full_structured_units, total_lines, file_mtime, full_content)
|
|
888
|
-
|
|
889
|
-
# 如果缓存有效,直接使用缓存中的blocks输出
|
|
890
|
-
if agent:
|
|
891
|
-
cache_info = self._get_file_cache(agent, abs_path)
|
|
892
|
-
if cache_info and self._is_cache_valid(cache_info, abs_path):
|
|
893
|
-
# 直接从缓存中获取对应范围的blocks
|
|
894
|
-
cached_blocks = self._get_blocks_from_cache(cache_info, start_line, end_line)
|
|
895
|
-
if cached_blocks:
|
|
896
|
-
# 转换为units格式(用于输出),保留真实的文件起始行号
|
|
897
|
-
structured_units = []
|
|
898
|
-
for block in cached_blocks:
|
|
899
|
-
structured_units.append({
|
|
900
|
-
"block_id": block["block_id"],
|
|
901
|
-
"content": block["content"],
|
|
902
|
-
"start_line": block.get("start_line", 1),
|
|
903
|
-
})
|
|
904
|
-
output = self._format_structured_output(abs_path, structured_units, total_lines, agent)
|
|
905
|
-
else:
|
|
906
|
-
output = ""
|
|
907
|
-
else:
|
|
908
|
-
# 缓存无效,重新提取units
|
|
909
|
-
# 提取请求范围的结构化单元(用于输出)
|
|
910
|
-
import_units = self._extract_imports(abs_path, full_content, start_line, end_line)
|
|
911
|
-
|
|
912
|
-
# 确定使用的结构化单元(语法单元或行号分组)
|
|
913
|
-
structured_units = None
|
|
914
|
-
|
|
915
|
-
if raw_mode:
|
|
916
|
-
# 原始读取模式:按每20行分组
|
|
917
|
-
line_groups = self._extract_line_groups(full_content, start_line, end_line, group_size=20)
|
|
918
|
-
# 合并导入单元和行号分组
|
|
919
|
-
all_units = import_units + line_groups
|
|
920
|
-
# 确保id唯一
|
|
921
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
922
|
-
# 按行号排序,所有单元按在文件中的实际位置排序
|
|
923
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
924
|
-
structured_units = all_units
|
|
925
|
-
else:
|
|
926
|
-
# 尝试提取语法单元(结构化读取,full_content 已在上面读取,确保每个单元不超过50行)
|
|
927
|
-
syntax_units = self._extract_syntax_units_with_split(abs_path, full_content, start_line, end_line)
|
|
928
|
-
|
|
929
|
-
if syntax_units:
|
|
930
|
-
# 合并导入单元和语法单元
|
|
931
|
-
all_units = import_units + syntax_units
|
|
932
|
-
# 确保id唯一
|
|
933
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
934
|
-
# 按行号排序,所有单元按在文件中的实际位置排序
|
|
935
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
936
|
-
structured_units = all_units
|
|
937
|
-
else:
|
|
938
|
-
# 使用空白行分组结构化输出(不支持语言时)
|
|
939
|
-
# 先按空行分割,然后对超过20行的块再按每20行分割
|
|
940
|
-
line_groups = self._extract_blank_line_groups_with_split(full_content, start_line, end_line)
|
|
941
|
-
# 合并导入单元和行号分组
|
|
942
|
-
all_units = import_units + line_groups
|
|
943
|
-
# 确保id唯一
|
|
944
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
945
|
-
# 按行号排序,所有单元按在文件中的实际位置排序
|
|
946
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
947
|
-
structured_units = all_units
|
|
948
|
-
|
|
949
|
-
if structured_units:
|
|
950
|
-
output = self._format_structured_output(abs_path, structured_units, total_lines, agent)
|
|
951
|
-
else:
|
|
952
|
-
output = ""
|
|
953
|
-
else:
|
|
954
|
-
# 没有agent,无法使用缓存,重新提取units
|
|
955
|
-
import_units = self._extract_imports(abs_path, full_content, start_line, end_line)
|
|
956
|
-
|
|
957
|
-
if raw_mode:
|
|
958
|
-
line_groups = self._extract_line_groups(full_content, start_line, end_line, group_size=20)
|
|
959
|
-
all_units = import_units + line_groups
|
|
960
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
961
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
962
|
-
structured_units = all_units
|
|
963
|
-
else:
|
|
964
|
-
syntax_units = self._extract_syntax_units_with_split(abs_path, full_content, start_line, end_line)
|
|
965
|
-
if syntax_units:
|
|
966
|
-
all_units = import_units + syntax_units
|
|
967
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
968
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
969
|
-
structured_units = all_units
|
|
970
|
-
else:
|
|
971
|
-
line_groups = self._extract_blank_line_groups_with_split(full_content, start_line, end_line)
|
|
972
|
-
all_units = import_units + line_groups
|
|
973
|
-
all_units = self._ensure_unique_ids(all_units)
|
|
974
|
-
all_units.sort(key=lambda u: u['start_line'])
|
|
975
|
-
structured_units = all_units
|
|
976
|
-
|
|
977
|
-
if structured_units:
|
|
978
|
-
output = self._format_structured_output(abs_path, structured_units, total_lines, agent)
|
|
979
|
-
else:
|
|
980
|
-
output = ""
|
|
198
|
+
# 构造完整输出
|
|
199
|
+
output = f"\n🔍 文件: {abs_path}\n📄 总行数: {total_lines}\n📊 读取范围: {start_line}-{end_line}\n"
|
|
200
|
+
output += "=" * 80 + "\n"
|
|
201
|
+
output += output_content
|
|
202
|
+
output += "\n" + "=" * 80 + "\n"
|
|
981
203
|
|
|
982
204
|
# 尝试获取并附加上下文信息
|
|
983
205
|
context_info = self._get_file_context(abs_path, start_line, end_line, agent)
|
|
@@ -995,136 +217,9 @@ class ReadCodeTool:
|
|
|
995
217
|
return {"success": True, "stdout": output, "stderr": ""}
|
|
996
218
|
|
|
997
219
|
except Exception as e:
|
|
998
|
-
|
|
220
|
+
PrettyOutput.auto_print(f"❌ {str(e)}")
|
|
999
221
|
return {"success": False, "stdout": "", "stderr": f"文件读取失败: {str(e)}"}
|
|
1000
222
|
|
|
1001
|
-
def _handle_merged_ranges(
|
|
1002
|
-
self, filepath: str, requests: List[Dict], agent: Any = None
|
|
1003
|
-
) -> Dict[str, Any]:
|
|
1004
|
-
"""处理同一文件的多个范围请求,合并后去重
|
|
1005
|
-
|
|
1006
|
-
Args:
|
|
1007
|
-
filepath: 文件绝对路径
|
|
1008
|
-
requests: 范围请求列表,每个请求包含 start_line, end_line, raw_mode
|
|
1009
|
-
agent: Agent实例
|
|
1010
|
-
|
|
1011
|
-
Returns:
|
|
1012
|
-
Dict[str, Any]: 包含成功状态、输出内容和错误信息的字典
|
|
1013
|
-
"""
|
|
1014
|
-
try:
|
|
1015
|
-
# 文件存在性检查
|
|
1016
|
-
if not os.path.exists(filepath):
|
|
1017
|
-
return {
|
|
1018
|
-
"success": False,
|
|
1019
|
-
"stdout": "",
|
|
1020
|
-
"stderr": f"文件不存在: {filepath}",
|
|
1021
|
-
}
|
|
1022
|
-
|
|
1023
|
-
# 读取文件内容
|
|
1024
|
-
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
1025
|
-
full_content = f.read()
|
|
1026
|
-
|
|
1027
|
-
total_lines = len(full_content.split('\n'))
|
|
1028
|
-
if total_lines == 0:
|
|
1029
|
-
return {
|
|
1030
|
-
"success": True,
|
|
1031
|
-
"stdout": f"\n🔍 文件: {filepath}\n📄 文件为空 (0行)\n",
|
|
1032
|
-
"stderr": "",
|
|
1033
|
-
}
|
|
1034
|
-
|
|
1035
|
-
# 先确保缓存存在(通过读取整个文件建立缓存)
|
|
1036
|
-
first_request = requests[0]
|
|
1037
|
-
self._handle_single_file(
|
|
1038
|
-
filepath, 1, -1, agent, first_request.get("raw_mode", False)
|
|
1039
|
-
)
|
|
1040
|
-
|
|
1041
|
-
# 获取缓存
|
|
1042
|
-
cache_info = self._get_file_cache(agent, filepath)
|
|
1043
|
-
if not cache_info or not self._is_cache_valid(cache_info, filepath):
|
|
1044
|
-
# 缓存无效,使用合并范围的方式去重
|
|
1045
|
-
# 合并所有范围,计算最小起始行和最大结束行
|
|
1046
|
-
min_start = float('inf')
|
|
1047
|
-
max_end = 0
|
|
1048
|
-
raw_mode = False
|
|
1049
|
-
for req in requests:
|
|
1050
|
-
start_line = req.get("start_line", 1)
|
|
1051
|
-
end_line = req.get("end_line", -1)
|
|
1052
|
-
raw_mode = raw_mode or req.get("raw_mode", False)
|
|
1053
|
-
|
|
1054
|
-
# 处理特殊值
|
|
1055
|
-
if end_line == -1:
|
|
1056
|
-
end_line = total_lines
|
|
1057
|
-
else:
|
|
1058
|
-
end_line = max(1, min(end_line, total_lines)) if end_line >= 0 else total_lines + end_line + 1
|
|
1059
|
-
start_line = max(1, min(start_line, total_lines)) if start_line >= 0 else total_lines + start_line + 1
|
|
1060
|
-
|
|
1061
|
-
min_start = min(min_start, start_line)
|
|
1062
|
-
max_end = max(max_end, end_line)
|
|
1063
|
-
|
|
1064
|
-
# 用合并后的范围读取一次,自然就去重了
|
|
1065
|
-
result = self._handle_single_file(
|
|
1066
|
-
filepath, int(min_start), int(max_end), agent, raw_mode
|
|
1067
|
-
)
|
|
1068
|
-
return result
|
|
1069
|
-
|
|
1070
|
-
# 收集所有范围覆盖的块ID(去重)
|
|
1071
|
-
seen_block_ids = set()
|
|
1072
|
-
merged_blocks = []
|
|
1073
|
-
|
|
1074
|
-
for req in requests:
|
|
1075
|
-
start_line = req.get("start_line", 1)
|
|
1076
|
-
end_line = req.get("end_line", -1)
|
|
1077
|
-
|
|
1078
|
-
# 处理特殊值
|
|
1079
|
-
if end_line == -1:
|
|
1080
|
-
end_line = total_lines
|
|
1081
|
-
else:
|
|
1082
|
-
end_line = max(1, min(end_line, total_lines)) if end_line >= 0 else total_lines + end_line + 1
|
|
1083
|
-
start_line = max(1, min(start_line, total_lines)) if start_line >= 0 else total_lines + start_line + 1
|
|
1084
|
-
|
|
1085
|
-
# 从缓存获取对应范围的块
|
|
1086
|
-
cached_blocks = self._get_blocks_from_cache(cache_info, start_line, end_line)
|
|
1087
|
-
for block in cached_blocks:
|
|
1088
|
-
block_id = block["block_id"]
|
|
1089
|
-
if block_id not in seen_block_ids:
|
|
1090
|
-
seen_block_ids.add(block_id)
|
|
1091
|
-
merged_blocks.append(block)
|
|
1092
|
-
|
|
1093
|
-
# 按block_id排序(block-1, block-2, ...)
|
|
1094
|
-
def extract_block_num(block):
|
|
1095
|
-
block_id = block.get("block_id", "block-0")
|
|
1096
|
-
try:
|
|
1097
|
-
return int(block_id.split("-")[1])
|
|
1098
|
-
except (IndexError, ValueError):
|
|
1099
|
-
return 0
|
|
1100
|
-
|
|
1101
|
-
merged_blocks.sort(key=extract_block_num)
|
|
1102
|
-
|
|
1103
|
-
# 转换为units格式并格式化输出(保留真实的文件起始行号)
|
|
1104
|
-
structured_units = []
|
|
1105
|
-
for block in merged_blocks:
|
|
1106
|
-
structured_units.append({
|
|
1107
|
-
"block_id": block["block_id"],
|
|
1108
|
-
"content": block["content"],
|
|
1109
|
-
"start_line": block.get("start_line", 1),
|
|
1110
|
-
})
|
|
1111
|
-
|
|
1112
|
-
output = self._format_structured_output(filepath, structured_units, total_lines, agent)
|
|
1113
|
-
|
|
1114
|
-
# 尝试获取上下文信息(使用合并后的范围)
|
|
1115
|
-
all_start_lines = [req.get("start_line", 1) for req in requests]
|
|
1116
|
-
all_end_lines = [req.get("end_line", total_lines) for req in requests]
|
|
1117
|
-
min_start = min(all_start_lines)
|
|
1118
|
-
max_end = max(all_end_lines)
|
|
1119
|
-
context_info = self._get_file_context(filepath, min_start, max_end, agent)
|
|
1120
|
-
if context_info:
|
|
1121
|
-
output += context_info
|
|
1122
|
-
|
|
1123
|
-
return {"success": True, "stdout": output, "stderr": ""}
|
|
1124
|
-
|
|
1125
|
-
except Exception as e:
|
|
1126
|
-
return {"success": False, "stdout": "", "stderr": f"合并范围读取失败: {str(e)}"}
|
|
1127
|
-
|
|
1128
223
|
def _get_file_context(
|
|
1129
224
|
self, filepath: str, start_line: int, end_line: int, agent: Any = None
|
|
1130
225
|
) -> str:
|
|
@@ -1155,17 +250,14 @@ class ReadCodeTool:
|
|
|
1155
250
|
if not context_manager:
|
|
1156
251
|
return ""
|
|
1157
252
|
|
|
1158
|
-
#
|
|
1159
|
-
file_name = os.path.basename(filepath)
|
|
1160
|
-
if start_line == end_line:
|
|
1161
|
-
line_info = f"第{start_line}行"
|
|
1162
|
-
else:
|
|
1163
|
-
line_info = f"第{start_line}-{end_line}行"
|
|
1164
|
-
print(f"🧠 正在分析代码上下文 ({file_name}, {line_info})...")
|
|
253
|
+
# 上下文感知日志已移除
|
|
1165
254
|
|
|
1166
255
|
# 确保文件已更新到上下文管理器
|
|
1167
256
|
# 如果文件内容已缓存,直接使用;否则读取并更新
|
|
1168
|
-
if
|
|
257
|
+
if (
|
|
258
|
+
not hasattr(context_manager, "_file_cache")
|
|
259
|
+
or filepath not in context_manager._file_cache
|
|
260
|
+
):
|
|
1169
261
|
try:
|
|
1170
262
|
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
|
|
1171
263
|
content = f.read()
|
|
@@ -1175,10 +267,15 @@ class ReadCodeTool:
|
|
|
1175
267
|
pass
|
|
1176
268
|
|
|
1177
269
|
# 获取编辑上下文
|
|
1178
|
-
edit_context = context_manager.get_edit_context(
|
|
270
|
+
edit_context = context_manager.get_edit_context(
|
|
271
|
+
filepath, start_line, end_line
|
|
272
|
+
)
|
|
1179
273
|
|
|
1180
274
|
# 构建上下文信息
|
|
1181
|
-
if
|
|
275
|
+
if (
|
|
276
|
+
not edit_context.context_summary
|
|
277
|
+
or edit_context.context_summary == "No context available"
|
|
278
|
+
):
|
|
1182
279
|
return ""
|
|
1183
280
|
|
|
1184
281
|
# 格式化上下文信息
|
|
@@ -1188,7 +285,9 @@ class ReadCodeTool:
|
|
|
1188
285
|
if edit_context.current_scope:
|
|
1189
286
|
scope_info = f"📍 当前作用域: {edit_context.current_scope.kind} `{edit_context.current_scope.name}`"
|
|
1190
287
|
if edit_context.current_scope.signature:
|
|
1191
|
-
scope_info +=
|
|
288
|
+
scope_info += (
|
|
289
|
+
f"\n └─ 签名: {edit_context.current_scope.signature}"
|
|
290
|
+
)
|
|
1192
291
|
context_lines.append(scope_info)
|
|
1193
292
|
|
|
1194
293
|
if edit_context.used_symbols:
|
|
@@ -1196,31 +295,35 @@ class ReadCodeTool:
|
|
|
1196
295
|
seen_symbols = set()
|
|
1197
296
|
unique_symbols = []
|
|
1198
297
|
for s in edit_context.used_symbols:
|
|
1199
|
-
key = (
|
|
298
|
+
key = (
|
|
299
|
+
s.name,
|
|
300
|
+
getattr(s, "file_path", ""),
|
|
301
|
+
getattr(s, "line_start", 0),
|
|
302
|
+
)
|
|
1200
303
|
if key not in seen_symbols:
|
|
1201
304
|
seen_symbols.add(key)
|
|
1202
305
|
unique_symbols.append(s)
|
|
1203
|
-
|
|
306
|
+
|
|
1204
307
|
# 区分定义和调用,显示定义位置信息
|
|
1205
308
|
definitions = []
|
|
1206
309
|
calls = []
|
|
1207
310
|
for symbol in unique_symbols[:10]:
|
|
1208
|
-
is_def = getattr(symbol,
|
|
311
|
+
is_def = getattr(symbol, "is_definition", False)
|
|
1209
312
|
if is_def:
|
|
1210
313
|
definitions.append(symbol)
|
|
1211
314
|
else:
|
|
1212
315
|
calls.append(symbol)
|
|
1213
|
-
|
|
316
|
+
|
|
1214
317
|
# 显示定义
|
|
1215
318
|
if definitions:
|
|
1216
319
|
def_names = [f"`{s.name}`" for s in definitions]
|
|
1217
320
|
context_lines.append(f"📝 定义的符号: {', '.join(def_names)}")
|
|
1218
|
-
|
|
321
|
+
|
|
1219
322
|
# 显示调用(带定义位置信息)
|
|
1220
323
|
if calls:
|
|
1221
324
|
call_info = []
|
|
1222
325
|
for symbol in calls:
|
|
1223
|
-
def_loc = getattr(symbol,
|
|
326
|
+
def_loc = getattr(symbol, "definition_location", None)
|
|
1224
327
|
if def_loc:
|
|
1225
328
|
def_file = os.path.basename(def_loc.file_path)
|
|
1226
329
|
def_line = def_loc.line_start
|
|
@@ -1228,37 +331,110 @@ class ReadCodeTool:
|
|
|
1228
331
|
else:
|
|
1229
332
|
call_info.append(f"`{symbol.name}`")
|
|
1230
333
|
context_lines.append(f"🔗 调用的符号: {', '.join(call_info)}")
|
|
1231
|
-
|
|
334
|
+
|
|
1232
335
|
# 如果还有更多符号
|
|
1233
336
|
more = len(edit_context.used_symbols) - 10
|
|
1234
337
|
if more > 0:
|
|
1235
338
|
context_lines.append(f" ... 还有{more}个符号")
|
|
1236
339
|
|
|
1237
|
-
# 不再感知导入符号
|
|
1238
|
-
|
|
1239
340
|
if edit_context.relevant_files:
|
|
1240
341
|
# 对相关文件去重
|
|
1241
342
|
unique_files = list(dict.fromkeys(edit_context.relevant_files))
|
|
1242
343
|
rel_files = unique_files[:10]
|
|
1243
|
-
files_str = "\n ".join(
|
|
344
|
+
files_str = "\n ".join(
|
|
345
|
+
f"• {os.path.relpath(f, context_manager.project_root)}"
|
|
346
|
+
for f in rel_files
|
|
347
|
+
)
|
|
1244
348
|
more = len(unique_files) - 10
|
|
1245
349
|
if more > 0:
|
|
1246
350
|
files_str += f"\n ... 还有{more}个相关文件"
|
|
1247
|
-
context_lines.append(
|
|
351
|
+
context_lines.append(
|
|
352
|
+
f"📁 相关文件 ({len(unique_files)}个):\n {files_str}"
|
|
353
|
+
)
|
|
1248
354
|
|
|
1249
355
|
context_lines.append("─" * 60)
|
|
1250
356
|
context_lines.append("") # 空行
|
|
1251
357
|
|
|
1252
|
-
#
|
|
358
|
+
# 上下文感知结果已移除,不再打印到控制台
|
|
1253
359
|
context_output = "\n".join(context_lines)
|
|
1254
|
-
print(f"🧠 上下文感知结果:\n{context_output}")
|
|
1255
|
-
|
|
1256
360
|
return context_output
|
|
1257
361
|
|
|
1258
362
|
except Exception:
|
|
1259
363
|
# 静默失败,不影响文件读取
|
|
1260
364
|
return ""
|
|
1261
365
|
|
|
366
|
+
def _handle_merged_ranges(
|
|
367
|
+
self, filepath: str, requests: List[Dict], agent: Any = None
|
|
368
|
+
) -> Dict[str, Any]:
|
|
369
|
+
"""处理同一文件的多个范围请求,合并后去重
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
filepath: 文件绝对路径
|
|
373
|
+
requests: 范围请求列表,每个请求包含 start_line, end_line
|
|
374
|
+
agent: Agent实例
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Dict[str, Any]: 包含成功状态、输出内容和错误信息的字典
|
|
378
|
+
"""
|
|
379
|
+
try:
|
|
380
|
+
# 文件存在性检查
|
|
381
|
+
if not os.path.exists(filepath):
|
|
382
|
+
return {
|
|
383
|
+
"success": False,
|
|
384
|
+
"stdout": "",
|
|
385
|
+
"stderr": f"文件不存在: {filepath}",
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
# 读取文件内容
|
|
389
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
390
|
+
lines = f.readlines()
|
|
391
|
+
|
|
392
|
+
total_lines = len(lines)
|
|
393
|
+
if total_lines == 0:
|
|
394
|
+
return {
|
|
395
|
+
"success": True,
|
|
396
|
+
"stdout": f"\n🔍 文件: {filepath}\n📄 文件为空 (0行)\n",
|
|
397
|
+
"stderr": "",
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
# 合并所有范围,计算最小起始行和最大结束行
|
|
401
|
+
min_start = float("inf")
|
|
402
|
+
max_end = 0
|
|
403
|
+
for req in requests:
|
|
404
|
+
start_line = req.get("start_line", 1)
|
|
405
|
+
end_line = req.get("end_line", -1)
|
|
406
|
+
|
|
407
|
+
# 处理特殊值
|
|
408
|
+
if end_line == -1:
|
|
409
|
+
end_line = total_lines
|
|
410
|
+
else:
|
|
411
|
+
end_line = (
|
|
412
|
+
max(1, min(end_line, total_lines))
|
|
413
|
+
if end_line >= 0
|
|
414
|
+
else total_lines + end_line + 1
|
|
415
|
+
)
|
|
416
|
+
start_line = (
|
|
417
|
+
max(1, min(start_line, total_lines))
|
|
418
|
+
if start_line >= 0
|
|
419
|
+
else total_lines + start_line + 1
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
min_start = min(min_start, start_line)
|
|
423
|
+
max_end = max(max_end, end_line)
|
|
424
|
+
|
|
425
|
+
# 用合并后的范围读取一次,自然就去重了
|
|
426
|
+
result = self._handle_single_file(
|
|
427
|
+
filepath, int(min_start), int(max_end), agent
|
|
428
|
+
)
|
|
429
|
+
return result
|
|
430
|
+
|
|
431
|
+
except Exception as e:
|
|
432
|
+
return {
|
|
433
|
+
"success": False,
|
|
434
|
+
"stdout": "",
|
|
435
|
+
"stderr": f"合并范围读取失败: {str(e)}",
|
|
436
|
+
}
|
|
437
|
+
|
|
1262
438
|
def execute(self, args: Dict) -> Dict[str, Any]:
|
|
1263
439
|
"""执行代码读取操作
|
|
1264
440
|
|
|
@@ -1276,7 +452,7 @@ class ReadCodeTool:
|
|
|
1276
452
|
"stdout": "",
|
|
1277
453
|
"stderr": "参数中必须包含文件列表",
|
|
1278
454
|
}
|
|
1279
|
-
|
|
455
|
+
|
|
1280
456
|
if len(args["files"]) == 0:
|
|
1281
457
|
return {
|
|
1282
458
|
"success": False,
|
|
@@ -1295,24 +471,26 @@ class ReadCodeTool:
|
|
|
1295
471
|
for file_info in args["files"]:
|
|
1296
472
|
if not isinstance(file_info, dict) or "path" not in file_info:
|
|
1297
473
|
continue
|
|
1298
|
-
|
|
474
|
+
|
|
1299
475
|
filepath = file_info["path"].strip()
|
|
1300
476
|
start_line = file_info.get("start_line", 1)
|
|
1301
477
|
end_line = file_info.get("end_line", -1)
|
|
1302
|
-
|
|
478
|
+
|
|
1303
479
|
# 检查文件是否存在并计算要读取的token数
|
|
1304
480
|
abs_path = os.path.abspath(filepath)
|
|
1305
481
|
if not os.path.exists(abs_path):
|
|
1306
482
|
continue
|
|
1307
|
-
|
|
483
|
+
|
|
1308
484
|
try:
|
|
1309
|
-
#
|
|
485
|
+
# 读取文件内容
|
|
1310
486
|
with open(abs_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
1311
|
-
|
|
1312
|
-
|
|
487
|
+
lines = f.readlines()
|
|
488
|
+
|
|
489
|
+
total_lines = len(lines)
|
|
490
|
+
|
|
1313
491
|
if total_lines == 0:
|
|
1314
492
|
continue
|
|
1315
|
-
|
|
493
|
+
|
|
1316
494
|
# 计算实际要读取的行范围
|
|
1317
495
|
if end_line == -1:
|
|
1318
496
|
actual_end_line = total_lines
|
|
@@ -1322,32 +500,40 @@ class ReadCodeTool:
|
|
|
1322
500
|
if end_line >= 0
|
|
1323
501
|
else total_lines + end_line + 1
|
|
1324
502
|
)
|
|
1325
|
-
|
|
503
|
+
|
|
1326
504
|
actual_start_line = (
|
|
1327
505
|
max(1, min(start_line, total_lines))
|
|
1328
506
|
if start_line >= 0
|
|
1329
507
|
else total_lines + start_line + 1
|
|
1330
508
|
)
|
|
1331
|
-
|
|
509
|
+
|
|
1332
510
|
if actual_start_line <= actual_end_line:
|
|
1333
|
-
#
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
511
|
+
# 读取指定行号范围的内容
|
|
512
|
+
selected_lines = lines[actual_start_line - 1 : actual_end_line]
|
|
513
|
+
|
|
514
|
+
# 为每行添加行号
|
|
515
|
+
numbered_lines = []
|
|
516
|
+
for i, line in enumerate(
|
|
517
|
+
selected_lines, start=actual_start_line
|
|
518
|
+
):
|
|
519
|
+
line_number_str = f"{i:4d}"
|
|
520
|
+
line_content = line.rstrip("\n\r")
|
|
521
|
+
numbered_lines.append(f"{line_number_str}:{line_content}")
|
|
522
|
+
|
|
523
|
+
# 构造输出内容用于token估算
|
|
524
|
+
output_content = "\n".join(numbered_lines)
|
|
525
|
+
content_tokens = get_context_token_count(output_content)
|
|
526
|
+
|
|
527
|
+
file_read_info.append(
|
|
528
|
+
{
|
|
529
|
+
"filepath": filepath,
|
|
530
|
+
"start_line": actual_start_line,
|
|
531
|
+
"end_line": actual_end_line,
|
|
532
|
+
"read_lines": actual_end_line - actual_start_line + 1,
|
|
533
|
+
"tokens": content_tokens,
|
|
534
|
+
"file_info": file_info,
|
|
535
|
+
}
|
|
1341
536
|
)
|
|
1342
|
-
|
|
1343
|
-
file_read_info.append({
|
|
1344
|
-
"filepath": filepath,
|
|
1345
|
-
"start_line": actual_start_line,
|
|
1346
|
-
"end_line": actual_end_line,
|
|
1347
|
-
"read_lines": actual_end_line - actual_start_line + 1,
|
|
1348
|
-
"tokens": content_tokens,
|
|
1349
|
-
"file_info": file_info,
|
|
1350
|
-
})
|
|
1351
537
|
total_tokens += content_tokens
|
|
1352
538
|
except Exception:
|
|
1353
539
|
continue
|
|
@@ -1361,7 +547,7 @@ class ReadCodeTool:
|
|
|
1361
547
|
more_files = len(file_read_info) - 10
|
|
1362
548
|
if more_files > 0:
|
|
1363
549
|
file_list += f"\n ... 还有 {more_files} 个文件"
|
|
1364
|
-
|
|
550
|
+
|
|
1365
551
|
return {
|
|
1366
552
|
"success": False,
|
|
1367
553
|
"stdout": "",
|
|
@@ -1378,13 +564,14 @@ class ReadCodeTool:
|
|
|
1378
564
|
# 第二遍:实际读取文件(按文件分组,合并同一文件的多个范围请求,避免块重复)
|
|
1379
565
|
# 按文件路径分组
|
|
1380
566
|
from collections import defaultdict
|
|
567
|
+
|
|
1381
568
|
file_requests = defaultdict(list)
|
|
1382
569
|
for file_info in args["files"]:
|
|
1383
570
|
if not isinstance(file_info, dict) or "path" not in file_info:
|
|
1384
571
|
continue
|
|
1385
572
|
abs_path = os.path.abspath(file_info["path"].strip())
|
|
1386
573
|
file_requests[abs_path].append(file_info)
|
|
1387
|
-
|
|
574
|
+
|
|
1388
575
|
# 按文件处理,合并同一文件的多个范围请求
|
|
1389
576
|
for abs_path, requests in file_requests.items():
|
|
1390
577
|
if len(requests) == 1:
|
|
@@ -1395,13 +582,14 @@ class ReadCodeTool:
|
|
|
1395
582
|
file_info.get("start_line", 1),
|
|
1396
583
|
file_info.get("end_line", -1),
|
|
1397
584
|
agent,
|
|
1398
|
-
file_info.get("raw_mode", False),
|
|
1399
585
|
)
|
|
1400
586
|
if result["success"]:
|
|
1401
587
|
all_outputs.append(result["stdout"])
|
|
1402
588
|
status_lines.append(f"✅ {file_info['path']} 文件读取成功")
|
|
1403
589
|
else:
|
|
1404
|
-
all_outputs.append(
|
|
590
|
+
all_outputs.append(
|
|
591
|
+
f"❌ {file_info['path']}: {result['stderr']}"
|
|
592
|
+
)
|
|
1405
593
|
status_lines.append(f"❌ {file_info['path']} 文件读取失败")
|
|
1406
594
|
overall_success = False
|
|
1407
595
|
else:
|
|
@@ -1412,9 +600,13 @@ class ReadCodeTool:
|
|
|
1412
600
|
display_path = requests[0]["path"]
|
|
1413
601
|
if merged_result["success"]:
|
|
1414
602
|
all_outputs.append(merged_result["stdout"])
|
|
1415
|
-
status_lines.append(
|
|
603
|
+
status_lines.append(
|
|
604
|
+
f"✅ {display_path} 文件读取成功 (合并{len(requests)}个范围请求,已去重)"
|
|
605
|
+
)
|
|
1416
606
|
else:
|
|
1417
|
-
all_outputs.append(
|
|
607
|
+
all_outputs.append(
|
|
608
|
+
f"❌ {display_path}: {merged_result['stderr']}"
|
|
609
|
+
)
|
|
1418
610
|
status_lines.append(f"❌ {display_path} 文件读取失败")
|
|
1419
611
|
overall_success = False
|
|
1420
612
|
|
|
@@ -1422,7 +614,7 @@ class ReadCodeTool:
|
|
|
1422
614
|
# 仅打印每个文件的读取状态,不打印具体内容
|
|
1423
615
|
try:
|
|
1424
616
|
if status_lines:
|
|
1425
|
-
|
|
617
|
+
PrettyOutput.auto_print("\n".join(status_lines))
|
|
1426
618
|
except Exception:
|
|
1427
619
|
pass
|
|
1428
620
|
return {
|
|
@@ -1432,447 +624,122 @@ class ReadCodeTool:
|
|
|
1432
624
|
}
|
|
1433
625
|
|
|
1434
626
|
except Exception as e:
|
|
1435
|
-
|
|
627
|
+
PrettyOutput.auto_print(f"❌ {str(e)}")
|
|
1436
628
|
return {"success": False, "stdout": "", "stderr": f"代码读取失败: {str(e)}"}
|
|
1437
629
|
|
|
1438
630
|
|
|
1439
631
|
def main():
|
|
1440
|
-
"""
|
|
1441
|
-
import tempfile
|
|
632
|
+
"""测试读取功能"""
|
|
1442
633
|
import os
|
|
1443
|
-
|
|
634
|
+
import tempfile
|
|
635
|
+
|
|
1444
636
|
tool = ReadCodeTool()
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
# 测试1:
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
void main() {
|
|
1457
|
-
printf("Hello, World!\\n");
|
|
1458
|
-
}
|
|
1459
|
-
|
|
1460
|
-
int add(int a, int b) {
|
|
1461
|
-
return a + b;
|
|
1462
|
-
}
|
|
1463
|
-
|
|
1464
|
-
int sub(int a, int b) {
|
|
1465
|
-
return a - b;
|
|
1466
|
-
}
|
|
1467
|
-
|
|
1468
|
-
struct Point {
|
|
1469
|
-
int x;
|
|
1470
|
-
int y;
|
|
1471
|
-
};
|
|
1472
|
-
"""
|
|
1473
|
-
|
|
1474
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False) as f:
|
|
1475
|
-
c_file = f.name
|
|
1476
|
-
f.write(c_code)
|
|
1477
|
-
|
|
1478
|
-
try:
|
|
1479
|
-
result = tool.execute({
|
|
1480
|
-
"files": [{"path": c_file, "start_line": 1, "end_line": -1}],
|
|
1481
|
-
"agent": None
|
|
1482
|
-
})
|
|
1483
|
-
|
|
1484
|
-
if result["success"]:
|
|
1485
|
-
print("✅ C语言文件读取成功")
|
|
1486
|
-
print("\n输出内容:")
|
|
1487
|
-
print(result["stdout"])
|
|
1488
|
-
else:
|
|
1489
|
-
print(f"❌ C语言文件读取失败: {result['stderr']}")
|
|
1490
|
-
finally:
|
|
1491
|
-
os.unlink(c_file)
|
|
1492
|
-
|
|
1493
|
-
# 测试2: Python文件(AST支持)
|
|
1494
|
-
print("\n【测试2】Python文件 - 语法单元提取")
|
|
1495
|
-
print("-" * 80)
|
|
1496
|
-
|
|
1497
|
-
python_code = """def main():
|
|
1498
|
-
print("Hello, World!")
|
|
637
|
+
|
|
638
|
+
PrettyOutput.auto_print("=" * 80)
|
|
639
|
+
PrettyOutput.auto_print("测试读取功能")
|
|
640
|
+
PrettyOutput.auto_print("=" * 80)
|
|
641
|
+
|
|
642
|
+
# 测试1: 基本读取
|
|
643
|
+
PrettyOutput.auto_print("\n【测试1】基本读取")
|
|
644
|
+
PrettyOutput.auto_print("-" * 80)
|
|
645
|
+
|
|
646
|
+
test_code = """def hello():
|
|
647
|
+
PrettyOutput.auto_print("Hello, World!")
|
|
1499
648
|
|
|
1500
649
|
def add(a, b):
|
|
1501
650
|
return a + b
|
|
1502
651
|
|
|
1503
652
|
def sub(a, b):
|
|
1504
653
|
return a - b
|
|
1505
|
-
|
|
1506
|
-
class Point:
|
|
1507
|
-
def __init__(self, x, y):
|
|
1508
|
-
self.x = x
|
|
1509
|
-
self.y = y
|
|
1510
|
-
"""
|
|
1511
|
-
|
|
1512
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
|
1513
|
-
py_file = f.name
|
|
1514
|
-
f.write(python_code)
|
|
1515
|
-
|
|
1516
|
-
try:
|
|
1517
|
-
result = tool.execute({
|
|
1518
|
-
"files": [{"path": py_file, "start_line": 1, "end_line": -1}],
|
|
1519
|
-
"agent": None
|
|
1520
|
-
})
|
|
1521
|
-
|
|
1522
|
-
if result["success"]:
|
|
1523
|
-
print("✅ Python文件读取成功")
|
|
1524
|
-
print("\n输出内容:")
|
|
1525
|
-
print(result["stdout"])
|
|
1526
|
-
else:
|
|
1527
|
-
print(f"❌ Python文件读取失败: {result['stderr']}")
|
|
1528
|
-
finally:
|
|
1529
|
-
os.unlink(py_file)
|
|
1530
|
-
|
|
1531
|
-
# 测试3: 不支持的语言 - 行号分组
|
|
1532
|
-
print("\n【测试3】不支持的语言 - 行号分组(20行一组)")
|
|
1533
|
-
print("-" * 80)
|
|
1534
|
-
|
|
1535
|
-
text_content = "\n".join([f"这是第 {i} 行内容" for i in range(1, 51)])
|
|
1536
|
-
|
|
1537
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
|
|
1538
|
-
txt_file = f.name
|
|
1539
|
-
f.write(text_content)
|
|
1540
|
-
|
|
1541
|
-
try:
|
|
1542
|
-
result = tool.execute({
|
|
1543
|
-
"files": [{"path": txt_file, "start_line": 1, "end_line": -1}],
|
|
1544
|
-
"agent": None
|
|
1545
|
-
})
|
|
1546
|
-
|
|
1547
|
-
if result["success"]:
|
|
1548
|
-
print("✅ 文本文件读取成功(使用行号分组)")
|
|
1549
|
-
print("\n输出内容(前500字符):")
|
|
1550
|
-
print(result["stdout"][:500] + "..." if len(result["stdout"]) > 500 else result["stdout"])
|
|
1551
|
-
else:
|
|
1552
|
-
print(f"❌ 文本文件读取失败: {result['stderr']}")
|
|
1553
|
-
finally:
|
|
1554
|
-
os.unlink(txt_file)
|
|
1555
|
-
|
|
1556
|
-
# 测试4: 指定行号范围
|
|
1557
|
-
print("\n【测试4】指定行号范围读取")
|
|
1558
|
-
print("-" * 80)
|
|
1559
|
-
|
|
1560
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False) as f:
|
|
1561
|
-
c_file2 = f.name
|
|
1562
|
-
f.write(c_code)
|
|
1563
|
-
|
|
1564
|
-
try:
|
|
1565
|
-
result = tool.execute({
|
|
1566
|
-
"files": [{"path": c_file2, "start_line": 1, "end_line": 10}],
|
|
1567
|
-
"agent": None
|
|
1568
|
-
})
|
|
1569
|
-
|
|
1570
|
-
if result["success"]:
|
|
1571
|
-
print("✅ 指定范围读取成功")
|
|
1572
|
-
print("\n输出内容:")
|
|
1573
|
-
print(result["stdout"])
|
|
1574
|
-
else:
|
|
1575
|
-
print(f"❌ 指定范围读取失败: {result['stderr']}")
|
|
1576
|
-
finally:
|
|
1577
|
-
os.unlink(c_file2)
|
|
1578
|
-
|
|
1579
|
-
# 测试5: 边界情况 - 返回边界上的语法单元
|
|
1580
|
-
print("\n【测试5】边界情况 - 返回边界上的语法单元")
|
|
1581
|
-
print("-" * 80)
|
|
1582
|
-
|
|
1583
|
-
boundary_test_code = """def func1():
|
|
1584
|
-
line1 = 1
|
|
1585
|
-
line2 = 2
|
|
1586
|
-
line3 = 3
|
|
1587
|
-
|
|
1588
|
-
def func2():
|
|
1589
|
-
line1 = 1
|
|
1590
|
-
line2 = 2
|
|
1591
|
-
|
|
1592
|
-
def func3():
|
|
1593
|
-
line1 = 1
|
|
1594
|
-
line2 = 2
|
|
1595
|
-
line3 = 3
|
|
1596
|
-
line4 = 4
|
|
1597
654
|
"""
|
|
1598
|
-
|
|
1599
|
-
with tempfile.NamedTemporaryFile(mode=
|
|
1600
|
-
|
|
1601
|
-
f.write(
|
|
1602
|
-
|
|
1603
|
-
try:
|
|
1604
|
-
# 请求第3-8行
|
|
1605
|
-
# func1: 1-4行(结束行4在范围内,应该返回完整func1)
|
|
1606
|
-
# func2: 6-8行(开始行6在范围内,应该返回完整func2)
|
|
1607
|
-
# func3: 10-14行(完全不在范围内,不应该返回)
|
|
1608
|
-
result = tool.execute({
|
|
1609
|
-
"files": [{"path": boundary_file, "start_line": 3, "end_line": 8}],
|
|
1610
|
-
"agent": None
|
|
1611
|
-
})
|
|
1612
|
-
|
|
1613
|
-
if result["success"]:
|
|
1614
|
-
print("✅ 边界情况测试成功")
|
|
1615
|
-
print("请求范围: 3-8行")
|
|
1616
|
-
print("预期结果:")
|
|
1617
|
-
print(" - func1 (1-4行): 结束行4在范围内,应返回完整func1")
|
|
1618
|
-
print(" - func2 (6-8行): 开始行6在范围内,应返回完整func2")
|
|
1619
|
-
print(" - func3 (10-14行): 完全不在范围内,不应返回")
|
|
1620
|
-
print("\n实际输出:")
|
|
1621
|
-
print(result["stdout"])
|
|
1622
|
-
else:
|
|
1623
|
-
print(f"❌ 边界情况测试失败: {result['stderr']}")
|
|
1624
|
-
finally:
|
|
1625
|
-
os.unlink(boundary_file)
|
|
1626
|
-
|
|
1627
|
-
# 测试6: 多个文件
|
|
1628
|
-
print("\n【测试6】多个文件读取")
|
|
1629
|
-
print("-" * 80)
|
|
1630
|
-
|
|
1631
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False) as f1, \
|
|
1632
|
-
tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f2:
|
|
1633
|
-
c_file3 = f1.name
|
|
1634
|
-
py_file2 = f2.name
|
|
1635
|
-
f1.write(c_code)
|
|
1636
|
-
f2.write(python_code)
|
|
1637
|
-
|
|
1638
|
-
try:
|
|
1639
|
-
result = tool.execute({
|
|
1640
|
-
"files": [
|
|
1641
|
-
{"path": c_file3, "start_line": 1, "end_line": -1},
|
|
1642
|
-
{"path": py_file2, "start_line": 1, "end_line": -1}
|
|
1643
|
-
],
|
|
1644
|
-
"agent": None
|
|
1645
|
-
})
|
|
1646
|
-
|
|
1647
|
-
if result["success"]:
|
|
1648
|
-
print("✅ 多文件读取成功")
|
|
1649
|
-
print("\n输出内容(前800字符):")
|
|
1650
|
-
print(result["stdout"][:800] + "..." if len(result["stdout"]) > 800 else result["stdout"])
|
|
1651
|
-
else:
|
|
1652
|
-
print(f"❌ 多文件读取失败: {result['stderr']}")
|
|
1653
|
-
finally:
|
|
1654
|
-
os.unlink(c_file3)
|
|
1655
|
-
os.unlink(py_file2)
|
|
1656
|
-
|
|
1657
|
-
# 测试7: 嵌套作用域的边界情况
|
|
1658
|
-
print("\n【测试7】嵌套作用域的边界情况")
|
|
1659
|
-
print("-" * 80)
|
|
1660
|
-
|
|
1661
|
-
nested_code = """class Outer:
|
|
1662
|
-
def method1(self):
|
|
1663
|
-
line1 = 1
|
|
1664
|
-
line2 = 2
|
|
1665
|
-
|
|
1666
|
-
def method2(self):
|
|
1667
|
-
line1 = 1
|
|
1668
|
-
line2 = 2
|
|
1669
|
-
line3 = 3
|
|
1670
|
-
|
|
1671
|
-
def standalone_func():
|
|
1672
|
-
line1 = 1
|
|
1673
|
-
line2 = 2
|
|
1674
|
-
"""
|
|
1675
|
-
|
|
1676
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
|
1677
|
-
nested_file = f.name
|
|
1678
|
-
f.write(nested_code)
|
|
1679
|
-
|
|
1680
|
-
try:
|
|
1681
|
-
# 请求第4-7行
|
|
1682
|
-
# Outer.method1: 2-4行(结束行4在范围内,应该返回完整method1)
|
|
1683
|
-
# Outer.method2: 6-9行(开始行6在范围内,应该返回完整method2)
|
|
1684
|
-
# Outer类: 1-9行(包含method1和method2,应该返回)
|
|
1685
|
-
# standalone_func: 11-13行(完全不在范围内,不应返回)
|
|
1686
|
-
result = tool.execute({
|
|
1687
|
-
"files": [{"path": nested_file, "start_line": 4, "end_line": 7}],
|
|
1688
|
-
"agent": None
|
|
1689
|
-
})
|
|
1690
|
-
|
|
1691
|
-
if result["success"]:
|
|
1692
|
-
print("✅ 嵌套作用域边界测试成功")
|
|
1693
|
-
print("请求范围: 4-7行")
|
|
1694
|
-
print("预期结果:")
|
|
1695
|
-
print(" - Outer类 (1-9行): 包含method1和method2,应返回")
|
|
1696
|
-
print(" - Outer.method1 (2-4行): 结束行4在范围内,应返回完整method1")
|
|
1697
|
-
print(" - Outer.method2 (6-9行): 开始行6在范围内,应返回完整method2")
|
|
1698
|
-
print("\n实际输出:")
|
|
1699
|
-
print(result["stdout"])
|
|
1700
|
-
else:
|
|
1701
|
-
print(f"❌ 嵌套作用域边界测试失败: {result['stderr']}")
|
|
1702
|
-
finally:
|
|
1703
|
-
os.unlink(nested_file)
|
|
1704
|
-
|
|
1705
|
-
# 测试8: Java文件(tree-sitter支持)
|
|
1706
|
-
print("\n【测试8】Java文件 - 语法单元提取")
|
|
1707
|
-
print("-" * 80)
|
|
1708
|
-
|
|
1709
|
-
java_code = """public class Main {
|
|
1710
|
-
public static void main(String[] args) {
|
|
1711
|
-
System.out.println("Hello, World!");
|
|
1712
|
-
}
|
|
1713
|
-
|
|
1714
|
-
public int add(int a, int b) {
|
|
1715
|
-
return a + b;
|
|
1716
|
-
}
|
|
1717
|
-
|
|
1718
|
-
private int subtract(int a, int b) {
|
|
1719
|
-
return a - b;
|
|
1720
|
-
}
|
|
1721
|
-
}
|
|
1722
|
-
|
|
1723
|
-
class Point {
|
|
1724
|
-
private int x;
|
|
1725
|
-
private int y;
|
|
1726
|
-
|
|
1727
|
-
public Point(int x, int y) {
|
|
1728
|
-
this.x = x;
|
|
1729
|
-
this.y = y;
|
|
1730
|
-
}
|
|
1731
|
-
}
|
|
1732
|
-
"""
|
|
1733
|
-
|
|
1734
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.java', delete=False) as f:
|
|
1735
|
-
java_file = f.name
|
|
1736
|
-
f.write(java_code)
|
|
1737
|
-
|
|
655
|
+
|
|
656
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
657
|
+
test_file = f.name
|
|
658
|
+
f.write(test_code)
|
|
659
|
+
|
|
1738
660
|
try:
|
|
1739
|
-
result = tool.execute(
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
661
|
+
result = tool.execute(
|
|
662
|
+
{
|
|
663
|
+
"files": [{"path": test_file, "start_line": 1, "end_line": -1}],
|
|
664
|
+
"agent": None,
|
|
665
|
+
}
|
|
666
|
+
)
|
|
667
|
+
|
|
1744
668
|
if result["success"]:
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
669
|
+
PrettyOutput.auto_print("✅ 文件读取成功")
|
|
670
|
+
PrettyOutput.auto_print("\n输出内容:")
|
|
671
|
+
PrettyOutput.auto_print(result["stdout"])
|
|
1748
672
|
else:
|
|
1749
|
-
|
|
673
|
+
PrettyOutput.auto_print(f"❌ 文件读取失败: {result['stderr']}")
|
|
1750
674
|
finally:
|
|
1751
|
-
os.unlink(
|
|
1752
|
-
|
|
1753
|
-
# 测试
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
fn add(a: i32, b: i32) -> i32 {
|
|
1762
|
-
a + b
|
|
1763
|
-
}
|
|
1764
|
-
|
|
1765
|
-
fn subtract(a: i32, b: i32) -> i32 {
|
|
1766
|
-
a - b
|
|
1767
|
-
}
|
|
1768
|
-
|
|
1769
|
-
struct Point {
|
|
1770
|
-
x: i32,
|
|
1771
|
-
y: i32,
|
|
1772
|
-
}
|
|
1773
|
-
|
|
1774
|
-
impl Point {
|
|
1775
|
-
fn new(x: i32, y: i32) -> Point {
|
|
1776
|
-
Point { x, y }
|
|
1777
|
-
}
|
|
1778
|
-
}
|
|
675
|
+
os.unlink(test_file)
|
|
676
|
+
|
|
677
|
+
# 测试2: 指定行号范围
|
|
678
|
+
PrettyOutput.auto_print("\n【测试2】指定行号范围读取")
|
|
679
|
+
PrettyOutput.auto_print("-" * 80)
|
|
680
|
+
|
|
681
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
|
|
682
|
+
test_file2 = f.name
|
|
683
|
+
f.write(test_code)
|
|
1779
684
|
|
|
1780
|
-
enum Color {
|
|
1781
|
-
Red,
|
|
1782
|
-
Green,
|
|
1783
|
-
Blue,
|
|
1784
|
-
}
|
|
1785
|
-
"""
|
|
1786
|
-
|
|
1787
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.rs', delete=False) as f:
|
|
1788
|
-
rust_file = f.name
|
|
1789
|
-
f.write(rust_code)
|
|
1790
|
-
|
|
1791
685
|
try:
|
|
1792
|
-
result = tool.execute(
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
686
|
+
result = tool.execute(
|
|
687
|
+
{
|
|
688
|
+
"files": [{"path": test_file2, "start_line": 1, "end_line": 3}],
|
|
689
|
+
"agent": None,
|
|
690
|
+
}
|
|
691
|
+
)
|
|
692
|
+
|
|
1797
693
|
if result["success"]:
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
694
|
+
PrettyOutput.auto_print("✅ 指定范围读取成功")
|
|
695
|
+
PrettyOutput.auto_print("\n输出内容:")
|
|
696
|
+
PrettyOutput.auto_print(result["stdout"])
|
|
1801
697
|
else:
|
|
1802
|
-
|
|
698
|
+
PrettyOutput.auto_print(f"❌ 指定范围读取失败: {result['stderr']}")
|
|
1803
699
|
finally:
|
|
1804
|
-
os.unlink(
|
|
1805
|
-
|
|
1806
|
-
# 测试10: Go文件(tree-sitter支持)
|
|
1807
|
-
print("\n【测试10】Go文件 - 语法单元提取")
|
|
1808
|
-
print("-" * 80)
|
|
1809
|
-
|
|
1810
|
-
go_code = """package main
|
|
700
|
+
os.unlink(test_file2)
|
|
1811
701
|
|
|
1812
|
-
|
|
702
|
+
# 测试3: 多个文件
|
|
703
|
+
PrettyOutput.auto_print("\n【测试3】多个文件读取")
|
|
704
|
+
PrettyOutput.auto_print("-" * 80)
|
|
1813
705
|
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
706
|
+
with (
|
|
707
|
+
tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f1,
|
|
708
|
+
tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f2,
|
|
709
|
+
):
|
|
710
|
+
test_file3 = f1.name
|
|
711
|
+
test_file4 = f2.name
|
|
712
|
+
f1.write(test_code)
|
|
713
|
+
f2.write(test_code)
|
|
1817
714
|
|
|
1818
|
-
func add(a int, b int) int {
|
|
1819
|
-
return a + b
|
|
1820
|
-
}
|
|
1821
|
-
|
|
1822
|
-
func subtract(a int, b int) int {
|
|
1823
|
-
return a - b
|
|
1824
|
-
}
|
|
1825
|
-
|
|
1826
|
-
type Point struct {
|
|
1827
|
-
x int
|
|
1828
|
-
y int
|
|
1829
|
-
}
|
|
1830
|
-
|
|
1831
|
-
func (p *Point) New(x int, y int) {
|
|
1832
|
-
p.x = x
|
|
1833
|
-
p.y = y
|
|
1834
|
-
}
|
|
1835
|
-
|
|
1836
|
-
type Color int
|
|
1837
|
-
|
|
1838
|
-
const (
|
|
1839
|
-
Red Color = iota
|
|
1840
|
-
Green
|
|
1841
|
-
Blue
|
|
1842
|
-
)
|
|
1843
|
-
|
|
1844
|
-
type Shape interface {
|
|
1845
|
-
Area() float64
|
|
1846
|
-
Perimeter() float64
|
|
1847
|
-
}
|
|
1848
|
-
|
|
1849
|
-
type Drawable interface {
|
|
1850
|
-
Draw()
|
|
1851
|
-
}
|
|
1852
|
-
"""
|
|
1853
|
-
|
|
1854
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.go', delete=False) as f:
|
|
1855
|
-
go_file = f.name
|
|
1856
|
-
f.write(go_code)
|
|
1857
|
-
|
|
1858
715
|
try:
|
|
1859
|
-
result = tool.execute(
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
716
|
+
result = tool.execute(
|
|
717
|
+
{
|
|
718
|
+
"files": [
|
|
719
|
+
{"path": test_file3, "start_line": 1, "end_line": -1},
|
|
720
|
+
{"path": test_file4, "start_line": 1, "end_line": -1},
|
|
721
|
+
],
|
|
722
|
+
"agent": None,
|
|
723
|
+
}
|
|
724
|
+
)
|
|
725
|
+
|
|
1864
726
|
if result["success"]:
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
727
|
+
PrettyOutput.auto_print("✅ 多文件读取成功")
|
|
728
|
+
PrettyOutput.auto_print("\n输出内容(前500字符):")
|
|
729
|
+
PrettyOutput.auto_print(
|
|
730
|
+
result["stdout"][:500] + "..."
|
|
731
|
+
if len(result["stdout"]) > 500
|
|
732
|
+
else result["stdout"]
|
|
733
|
+
)
|
|
1868
734
|
else:
|
|
1869
|
-
|
|
735
|
+
PrettyOutput.auto_print(f"❌ 多文件读取失败: {result['stderr']}")
|
|
1870
736
|
finally:
|
|
1871
|
-
os.unlink(
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
737
|
+
os.unlink(test_file3)
|
|
738
|
+
os.unlink(test_file4)
|
|
739
|
+
|
|
740
|
+
PrettyOutput.auto_print("\n" + "=" * 80)
|
|
741
|
+
PrettyOutput.auto_print("测试完成")
|
|
742
|
+
PrettyOutput.auto_print("=" * 80)
|
|
1876
743
|
|
|
1877
744
|
|
|
1878
745
|
if __name__ == "__main__":
|