jarvis-ai-assistant 0.7.8__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +567 -222
  3. jarvis/jarvis_agent/agent_manager.py +19 -12
  4. jarvis/jarvis_agent/builtin_input_handler.py +79 -11
  5. jarvis/jarvis_agent/config_editor.py +7 -2
  6. jarvis/jarvis_agent/event_bus.py +24 -13
  7. jarvis/jarvis_agent/events.py +19 -1
  8. jarvis/jarvis_agent/file_context_handler.py +67 -64
  9. jarvis/jarvis_agent/file_methodology_manager.py +38 -24
  10. jarvis/jarvis_agent/jarvis.py +186 -114
  11. jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
  12. jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
  13. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
  14. jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
  15. jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
  16. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
  17. jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
  18. jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
  19. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
  20. jarvis/jarvis_agent/language_support_info.py +250 -219
  21. jarvis/jarvis_agent/main.py +19 -23
  22. jarvis/jarvis_agent/memory_manager.py +9 -6
  23. jarvis/jarvis_agent/methodology_share_manager.py +21 -15
  24. jarvis/jarvis_agent/output_handler.py +4 -2
  25. jarvis/jarvis_agent/prompt_builder.py +7 -6
  26. jarvis/jarvis_agent/prompt_manager.py +113 -8
  27. jarvis/jarvis_agent/prompts.py +317 -85
  28. jarvis/jarvis_agent/protocols.py +5 -2
  29. jarvis/jarvis_agent/run_loop.py +192 -32
  30. jarvis/jarvis_agent/session_manager.py +7 -3
  31. jarvis/jarvis_agent/share_manager.py +23 -13
  32. jarvis/jarvis_agent/shell_input_handler.py +12 -8
  33. jarvis/jarvis_agent/stdio_redirect.py +25 -26
  34. jarvis/jarvis_agent/task_analyzer.py +29 -23
  35. jarvis/jarvis_agent/task_list.py +869 -0
  36. jarvis/jarvis_agent/task_manager.py +26 -23
  37. jarvis/jarvis_agent/tool_executor.py +6 -5
  38. jarvis/jarvis_agent/tool_share_manager.py +24 -14
  39. jarvis/jarvis_agent/user_interaction.py +3 -3
  40. jarvis/jarvis_agent/utils.py +9 -1
  41. jarvis/jarvis_agent/web_bridge.py +37 -17
  42. jarvis/jarvis_agent/web_output_sink.py +5 -2
  43. jarvis/jarvis_agent/web_server.py +165 -36
  44. jarvis/jarvis_c2rust/__init__.py +1 -1
  45. jarvis/jarvis_c2rust/cli.py +260 -141
  46. jarvis/jarvis_c2rust/collector.py +37 -18
  47. jarvis/jarvis_c2rust/constants.py +60 -0
  48. jarvis/jarvis_c2rust/library_replacer.py +242 -1010
  49. jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
  50. jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
  51. jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
  52. jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
  53. jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
  54. jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
  55. jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
  56. jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
  57. jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
  58. jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
  59. jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
  60. jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
  61. jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
  62. jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
  63. jarvis/jarvis_c2rust/loaders.py +28 -10
  64. jarvis/jarvis_c2rust/models.py +5 -2
  65. jarvis/jarvis_c2rust/optimizer.py +192 -1974
  66. jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
  67. jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
  68. jarvis/jarvis_c2rust/optimizer_config.py +49 -0
  69. jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
  70. jarvis/jarvis_c2rust/optimizer_options.py +48 -0
  71. jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
  72. jarvis/jarvis_c2rust/optimizer_report.py +52 -0
  73. jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
  74. jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
  75. jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
  76. jarvis/jarvis_c2rust/scanner.py +229 -166
  77. jarvis/jarvis_c2rust/transpiler.py +531 -2732
  78. jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
  79. jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
  80. jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
  81. jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
  82. jarvis/jarvis_c2rust/transpiler_config.py +178 -0
  83. jarvis/jarvis_c2rust/transpiler_context.py +122 -0
  84. jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
  85. jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
  86. jarvis/jarvis_c2rust/transpiler_git.py +163 -0
  87. jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
  88. jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
  89. jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
  90. jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
  91. jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
  92. jarvis/jarvis_c2rust/utils.py +269 -79
  93. jarvis/jarvis_code_agent/after_change.py +233 -0
  94. jarvis/jarvis_code_agent/build_validation_config.py +37 -30
  95. jarvis/jarvis_code_agent/builtin_rules.py +68 -0
  96. jarvis/jarvis_code_agent/code_agent.py +976 -1517
  97. jarvis/jarvis_code_agent/code_agent_build.py +227 -0
  98. jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
  99. jarvis/jarvis_code_agent/code_agent_git.py +525 -0
  100. jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
  101. jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
  102. jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
  103. jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
  104. jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
  105. jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
  106. jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
  107. jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
  108. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
  109. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
  110. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
  111. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
  112. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
  113. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
  114. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
  115. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
  116. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
  117. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
  118. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
  119. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
  120. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
  121. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
  122. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
  123. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
  124. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
  125. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
  126. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
  127. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
  128. jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
  129. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
  130. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
  131. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
  132. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
  133. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
  134. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
  135. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
  136. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
  137. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
  138. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
  139. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
  140. jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
  141. jarvis/jarvis_code_agent/lint.py +223 -524
  142. jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
  143. jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
  144. jarvis/jarvis_code_agent/rules/code_review.md +115 -0
  145. jarvis/jarvis_code_agent/rules/documentation.md +165 -0
  146. jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
  147. jarvis/jarvis_code_agent/rules/performance.md +158 -0
  148. jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
  149. jarvis/jarvis_code_agent/rules/security.md +160 -0
  150. jarvis/jarvis_code_agent/rules/tdd.md +78 -0
  151. jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
  152. jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
  153. jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
  154. jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
  155. jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
  156. jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
  157. jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
  158. jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
  159. jarvis/jarvis_code_agent/utils.py +36 -26
  160. jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
  161. jarvis/jarvis_code_analysis/code_review.py +64 -33
  162. jarvis/jarvis_data/config_schema.json +285 -192
  163. jarvis/jarvis_git_squash/main.py +8 -6
  164. jarvis/jarvis_git_utils/git_commiter.py +53 -76
  165. jarvis/jarvis_mcp/__init__.py +5 -2
  166. jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
  167. jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
  168. jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
  169. jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
  170. jarvis/jarvis_methodology/main.py +48 -39
  171. jarvis/jarvis_multi_agent/__init__.py +56 -23
  172. jarvis/jarvis_multi_agent/main.py +15 -18
  173. jarvis/jarvis_platform/base.py +179 -111
  174. jarvis/jarvis_platform/human.py +27 -16
  175. jarvis/jarvis_platform/kimi.py +52 -45
  176. jarvis/jarvis_platform/openai.py +101 -40
  177. jarvis/jarvis_platform/registry.py +51 -33
  178. jarvis/jarvis_platform/tongyi.py +68 -38
  179. jarvis/jarvis_platform/yuanbao.py +59 -43
  180. jarvis/jarvis_platform_manager/main.py +68 -76
  181. jarvis/jarvis_platform_manager/service.py +24 -14
  182. jarvis/jarvis_rag/README_CONFIG.md +314 -0
  183. jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
  184. jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
  185. jarvis/jarvis_rag/__init__.py +57 -4
  186. jarvis/jarvis_rag/cache.py +3 -1
  187. jarvis/jarvis_rag/cli.py +48 -68
  188. jarvis/jarvis_rag/embedding_interface.py +39 -0
  189. jarvis/jarvis_rag/embedding_manager.py +7 -230
  190. jarvis/jarvis_rag/embeddings/__init__.py +41 -0
  191. jarvis/jarvis_rag/embeddings/base.py +114 -0
  192. jarvis/jarvis_rag/embeddings/cohere.py +66 -0
  193. jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
  194. jarvis/jarvis_rag/embeddings/local.py +260 -0
  195. jarvis/jarvis_rag/embeddings/openai.py +62 -0
  196. jarvis/jarvis_rag/embeddings/registry.py +293 -0
  197. jarvis/jarvis_rag/llm_interface.py +8 -6
  198. jarvis/jarvis_rag/query_rewriter.py +8 -9
  199. jarvis/jarvis_rag/rag_pipeline.py +61 -52
  200. jarvis/jarvis_rag/reranker.py +7 -75
  201. jarvis/jarvis_rag/reranker_interface.py +32 -0
  202. jarvis/jarvis_rag/rerankers/__init__.py +41 -0
  203. jarvis/jarvis_rag/rerankers/base.py +109 -0
  204. jarvis/jarvis_rag/rerankers/cohere.py +67 -0
  205. jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
  206. jarvis/jarvis_rag/rerankers/jina.py +79 -0
  207. jarvis/jarvis_rag/rerankers/local.py +89 -0
  208. jarvis/jarvis_rag/rerankers/registry.py +293 -0
  209. jarvis/jarvis_rag/retriever.py +58 -43
  210. jarvis/jarvis_sec/__init__.py +66 -141
  211. jarvis/jarvis_sec/agents.py +21 -17
  212. jarvis/jarvis_sec/analysis.py +80 -33
  213. jarvis/jarvis_sec/checkers/__init__.py +7 -13
  214. jarvis/jarvis_sec/checkers/c_checker.py +356 -164
  215. jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
  216. jarvis/jarvis_sec/cli.py +43 -21
  217. jarvis/jarvis_sec/clustering.py +430 -272
  218. jarvis/jarvis_sec/file_manager.py +99 -55
  219. jarvis/jarvis_sec/parsers.py +9 -6
  220. jarvis/jarvis_sec/prompts.py +4 -3
  221. jarvis/jarvis_sec/report.py +44 -22
  222. jarvis/jarvis_sec/review.py +180 -107
  223. jarvis/jarvis_sec/status.py +50 -41
  224. jarvis/jarvis_sec/types.py +3 -0
  225. jarvis/jarvis_sec/utils.py +160 -83
  226. jarvis/jarvis_sec/verification.py +411 -181
  227. jarvis/jarvis_sec/workflow.py +132 -21
  228. jarvis/jarvis_smart_shell/main.py +28 -41
  229. jarvis/jarvis_stats/cli.py +14 -12
  230. jarvis/jarvis_stats/stats.py +28 -19
  231. jarvis/jarvis_stats/storage.py +14 -8
  232. jarvis/jarvis_stats/visualizer.py +12 -7
  233. jarvis/jarvis_tools/base.py +5 -2
  234. jarvis/jarvis_tools/clear_memory.py +13 -9
  235. jarvis/jarvis_tools/cli/main.py +23 -18
  236. jarvis/jarvis_tools/edit_file.py +572 -873
  237. jarvis/jarvis_tools/execute_script.py +10 -7
  238. jarvis/jarvis_tools/file_analyzer.py +7 -8
  239. jarvis/jarvis_tools/meta_agent.py +287 -0
  240. jarvis/jarvis_tools/methodology.py +5 -3
  241. jarvis/jarvis_tools/read_code.py +305 -1438
  242. jarvis/jarvis_tools/read_symbols.py +50 -17
  243. jarvis/jarvis_tools/read_webpage.py +19 -18
  244. jarvis/jarvis_tools/registry.py +435 -156
  245. jarvis/jarvis_tools/retrieve_memory.py +16 -11
  246. jarvis/jarvis_tools/save_memory.py +8 -6
  247. jarvis/jarvis_tools/search_web.py +31 -31
  248. jarvis/jarvis_tools/sub_agent.py +32 -28
  249. jarvis/jarvis_tools/sub_code_agent.py +44 -60
  250. jarvis/jarvis_tools/task_list_manager.py +1811 -0
  251. jarvis/jarvis_tools/virtual_tty.py +29 -19
  252. jarvis/jarvis_utils/__init__.py +4 -0
  253. jarvis/jarvis_utils/builtin_replace_map.py +2 -1
  254. jarvis/jarvis_utils/clipboard.py +9 -8
  255. jarvis/jarvis_utils/collections.py +331 -0
  256. jarvis/jarvis_utils/config.py +699 -194
  257. jarvis/jarvis_utils/dialogue_recorder.py +294 -0
  258. jarvis/jarvis_utils/embedding.py +6 -3
  259. jarvis/jarvis_utils/file_processors.py +7 -1
  260. jarvis/jarvis_utils/fzf.py +9 -3
  261. jarvis/jarvis_utils/git_utils.py +71 -42
  262. jarvis/jarvis_utils/globals.py +116 -32
  263. jarvis/jarvis_utils/http.py +6 -2
  264. jarvis/jarvis_utils/input.py +318 -83
  265. jarvis/jarvis_utils/jsonnet_compat.py +119 -104
  266. jarvis/jarvis_utils/methodology.py +37 -28
  267. jarvis/jarvis_utils/output.py +201 -44
  268. jarvis/jarvis_utils/utils.py +986 -628
  269. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
  270. jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
  271. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
  272. jarvis/jarvis_tools/generate_new_tool.py +0 -205
  273. jarvis/jarvis_tools/lsp_client.py +0 -1552
  274. jarvis/jarvis_tools/rewrite_file.py +0 -105
  275. jarvis_ai_assistant-0.7.8.dist-info/RECORD +0 -218
  276. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
  277. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
  278. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
  279. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0
@@ -1,556 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """结构化代码提取和查找工具
3
-
4
- 提供从源代码文件中提取结构化单元(函数、类、导入语句等)的功能,
5
- 以及根据块id定位代码块的功能。
6
- """
7
- import os
8
- from typing import Any, Dict, List, Optional, Tuple
9
-
10
- # 尝试导入语言支持模块
11
- try:
12
- from jarvis.jarvis_code_agent.code_analyzer.language_support import (
13
- detect_language,
14
- get_symbol_extractor,
15
- get_dependency_analyzer,
16
- )
17
- from jarvis.jarvis_code_agent.code_analyzer.symbol_extractor import Symbol
18
- LANGUAGE_SUPPORT_AVAILABLE = True
19
- except ImportError:
20
- LANGUAGE_SUPPORT_AVAILABLE = False
21
- def get_dependency_analyzer(language: str):
22
- return None
23
-
24
-
25
- class StructuredCodeExtractor:
26
- """结构化代码提取器
27
-
28
- 提供从源代码文件中提取结构化单元的功能,包括:
29
- - 语法单元(函数、类等)
30
- - 导入/包含语句
31
- - 空白行分组
32
- - 行号分组
33
- """
34
-
35
- @staticmethod
36
- def get_full_definition_range(
37
- symbol: Symbol, content: str, language: Optional[str]
38
- ) -> Tuple[int, int]:
39
- """获取完整的定义范围(包括函数体等)
40
-
41
- 对于 tree-sitter 提取的符号,可能需要向上查找父节点以获取完整定义。
42
- 对于 Python AST,已经包含完整范围。
43
-
44
- Args:
45
- symbol: 符号对象
46
- content: 文件内容
47
- language: 语言名称
48
-
49
- Returns:
50
- (start_line, end_line) 元组
51
- """
52
- # Python AST 已经包含完整范围(使用 end_lineno)
53
- if language == 'python':
54
- return symbol.line_start, symbol.line_end
55
-
56
- # 对于 tree-sitter,尝试查找包含函数体的完整定义
57
- # 由于 tree-sitter 查询可能只捕获声明节点,我们需要查找包含函数体的节点
58
- # 这里使用一个简单的启发式方法:查找下一个同级别的定义或文件结束
59
-
60
- lines = content.split('\n')
61
- start_line = symbol.line_start
62
- end_line = symbol.line_end
63
-
64
- # 如果结束行号看起来不完整(比如只有1-2行),尝试查找函数体结束
65
- if end_line - start_line < 2:
66
- # 从结束行开始向下查找,寻找匹配的大括号或缩进变化
67
- # 这是一个简化的实现,实际可能需要解析语法树
68
- brace_count = 0
69
- found_start = False
70
- for i in range(start_line - 1, min(len(lines), start_line + 100)): # 最多查找100行
71
- line = lines[i]
72
- if '{' in line:
73
- brace_count += line.count('{')
74
- found_start = True
75
- if found_start and '}' in line:
76
- brace_count -= line.count('}')
77
- if brace_count == 0:
78
- end_line = i + 1
79
- break
80
-
81
- # 确保不超过文件末尾和请求的范围
82
- end_line = min(end_line, len(lines))
83
-
84
- return start_line, end_line
85
-
86
- @staticmethod
87
- def extract_syntax_units(
88
- filepath: str, content: str, start_line: int, end_line: int
89
- ) -> List[Dict[str, Any]]:
90
- """提取语法单元(函数、类等)
91
-
92
- Args:
93
- filepath: 文件路径
94
- content: 文件内容
95
- start_line: 起始行号
96
- end_line: 结束行号
97
-
98
- Returns:
99
- 语法单元列表,每个单元包含 id, start_line, end_line, content
100
- """
101
- if not LANGUAGE_SUPPORT_AVAILABLE:
102
- return []
103
-
104
- try:
105
- # 检测语言
106
- language = detect_language(filepath)
107
- if not language:
108
- return []
109
-
110
- # 获取符号提取器
111
- extractor = get_symbol_extractor(language)
112
- if not extractor:
113
- return []
114
-
115
- # 提取符号
116
- symbols = extractor.extract_symbols(filepath, content)
117
- if not symbols:
118
- return []
119
-
120
- # 过滤符号:返回与请求范围有重叠的所有语法单元(包括边界上的)
121
- # 重叠条件:symbol.line_start <= end_line AND symbol.line_end >= start_line
122
- syntax_kinds = {'function', 'method', 'class', 'struct', 'enum', 'union', 'interface', 'trait', 'impl', 'module', 'attribute', 'const', 'static', 'type', 'extern', 'macro', 'typedef', 'template', 'namespace', 'var', 'constructor', 'field', 'annotation', 'decorator'}
123
- filtered_symbols = [
124
- s for s in symbols
125
- if s.kind in syntax_kinds
126
- and s.line_start <= end_line # 开始行在范围结束之前或等于
127
- and s.line_end >= start_line # 结束行在范围开始之后或等于
128
- ]
129
-
130
- # 按行号排序(导入语句通常在文件开头,所以会排在最前面)
131
- filtered_symbols.sort(key=lambda s: s.line_start)
132
-
133
- # 构建语法单元列表(先收集所有单元信息)
134
- units_info = []
135
- lines = content.split('\n')
136
-
137
- for symbol in filtered_symbols:
138
- # 获取完整的定义范围(不截断,返回完整语法单元)
139
- unit_start, unit_end = StructuredCodeExtractor.get_full_definition_range(symbol, content, language)
140
-
141
- # 提取该符号的完整内容(不截断到请求范围)
142
- symbol_start_idx = max(0, unit_start - 1) # 转为0-based索引
143
- symbol_end_idx = min(len(lines), unit_end)
144
-
145
- symbol_content = '\n'.join(lines[symbol_start_idx:symbol_end_idx])
146
-
147
- # 生成id:体现作用域(如果有parent,使用 parent.name 格式)
148
- if symbol.parent:
149
- unit_id = f"{symbol.parent}.{symbol.name}"
150
- else:
151
- unit_id = symbol.name
152
-
153
- # 如果id重复,加上行号
154
- if any(u['id'] == unit_id for u in units_info):
155
- if symbol.parent:
156
- unit_id = f"{symbol.parent}.{symbol.name}_{unit_start}"
157
- else:
158
- unit_id = f"{symbol.name}_{unit_start}"
159
-
160
- units_info.append({
161
- 'id': unit_id,
162
- 'start_line': unit_start,
163
- 'end_line': unit_end,
164
- 'content': symbol_content,
165
- 'has_parent': symbol.parent is not None,
166
- })
167
-
168
- # 处理重叠:如果一个单元完全包含另一个单元,父符号排除被子符号覆盖的行
169
- # 策略:保留所有符号,但父符号只显示未被子符号覆盖的部分
170
- units = []
171
- for unit in units_info:
172
- # 找出所有被unit包含的子符号
173
- child_ranges = []
174
- for other in units_info:
175
- if unit == other:
176
- continue
177
- # 检查other是否完全被unit包含(other是unit的子符号)
178
- if (unit['start_line'] <= other['start_line'] and
179
- unit['end_line'] >= other['end_line']):
180
- # 排除范围完全相同的情况(范围相同时不认为是父子关系)
181
- if not (unit['start_line'] == other['start_line'] and
182
- unit['end_line'] == other['end_line']):
183
- child_ranges.append((other['start_line'], other['end_line']))
184
-
185
- # 如果有子符号,需要排除被子符号覆盖的行
186
- if child_ranges:
187
- # 合并重叠的子符号范围
188
- child_ranges.sort()
189
- merged_ranges = []
190
- for start, end in child_ranges:
191
- if merged_ranges and start <= merged_ranges[-1][1] + 1:
192
- # 合并重叠或相邻的范围
193
- merged_ranges[-1] = (merged_ranges[-1][0], max(merged_ranges[-1][1], end))
194
- else:
195
- merged_ranges.append((start, end))
196
-
197
- # 提取未被覆盖的行
198
- unit_lines = unit['content'].split('\n')
199
- filtered_lines = []
200
- current_line = unit['start_line']
201
-
202
- for line in unit_lines:
203
- # 检查当前行是否在任何子符号范围内
204
- is_covered = any(start <= current_line <= end for start, end in merged_ranges)
205
- if not is_covered:
206
- filtered_lines.append(line)
207
- current_line += 1
208
-
209
- # 如果还有未被覆盖的行,创建新的单元
210
- if filtered_lines:
211
- filtered_content = '\n'.join(filtered_lines)
212
- # 计算新的结束行号(最后一个未被覆盖的行)
213
- unit['start_line'] + len(filtered_lines) - 1
214
- # 需要调整,因为跳过了被覆盖的行
215
- # 重新计算:找到最后一个未被覆盖的实际行号
216
- actual_last_line = unit['start_line']
217
- for i, line in enumerate(unit_lines):
218
- line_num = unit['start_line'] + i
219
- is_covered = any(start <= line_num <= end for start, end in merged_ranges)
220
- if not is_covered:
221
- actual_last_line = line_num
222
-
223
- new_unit = {
224
- 'id': unit['id'],
225
- 'start_line': unit['start_line'],
226
- 'end_line': actual_last_line,
227
- 'content': filtered_content,
228
- }
229
- units.append(new_unit)
230
- # 如果所有行都被覆盖,跳过父符号
231
- else:
232
- # 没有子符号,直接添加
233
- unit.pop('has_parent', None)
234
- units.append(unit)
235
-
236
- return units
237
- except Exception:
238
- # 如果提取失败,返回空列表,将使用行号分组
239
- return []
240
-
241
- @staticmethod
242
- def extract_blank_line_groups(
243
- content: str, start_line: int, end_line: int
244
- ) -> List[Dict[str, Any]]:
245
- """按空白行分组提取内容
246
-
247
- 遇到空白行(除了空格、制表符等,没有任何其他字符的行)时,作为分隔符将代码分成不同的组。
248
-
249
- Args:
250
- content: 文件内容
251
- start_line: 起始行号
252
- end_line: 结束行号
253
-
254
- Returns:
255
- 分组列表,每个分组包含 id, start_line, end_line, content
256
- """
257
- lines = content.split('\n')
258
- groups = []
259
-
260
- # 获取实际要处理的行范围
261
- actual_lines = lines[start_line - 1:end_line]
262
-
263
- if not actual_lines:
264
- return groups
265
-
266
- current_start = start_line
267
- group_start_idx = 0
268
- i = 0
269
-
270
- while i < len(actual_lines):
271
- line = actual_lines[i]
272
- # 空白行定义:除了空格、制表符等,没有任何其他字符的行
273
- is_blank = not line.strip()
274
-
275
- if is_blank:
276
- # 空白行作为分隔符,结束当前分组(不包含空白行)
277
- if group_start_idx < i:
278
- group_end_idx = i - 1
279
- group_content = '\n'.join(actual_lines[group_start_idx:group_end_idx + 1])
280
- if group_content.strip(): # 只添加非空分组
281
- group_id = f"{current_start}-{current_start + (group_end_idx - group_start_idx)}"
282
- groups.append({
283
- 'id': group_id,
284
- 'start_line': current_start,
285
- 'end_line': current_start + (group_end_idx - group_start_idx),
286
- 'content': group_content,
287
- })
288
- # 跳过空白行,开始新分组
289
- i += 1
290
- # 跳过连续的多个空白行
291
- while i < len(actual_lines) and not actual_lines[i].strip():
292
- i += 1
293
- if i < len(actual_lines):
294
- current_start = start_line + i
295
- group_start_idx = i
296
- else:
297
- # 非空白行,继续当前分组
298
- i += 1
299
-
300
- # 处理最后一组
301
- if group_start_idx < len(actual_lines):
302
- group_end_idx = len(actual_lines) - 1
303
- group_content = '\n'.join(actual_lines[group_start_idx:group_end_idx + 1])
304
- if group_content.strip(): # 只添加非空分组
305
- group_id = f"{current_start}-{current_start + (group_end_idx - group_start_idx)}"
306
- groups.append({
307
- 'id': group_id,
308
- 'start_line': current_start,
309
- 'end_line': current_start + (group_end_idx - group_start_idx),
310
- 'content': group_content,
311
- })
312
-
313
- # 如果没有找到任何分组(全部是空白行),返回整个范围作为一个分组
314
- if not groups:
315
- group_content = '\n'.join(actual_lines)
316
- group_id = f"{start_line}-{end_line}"
317
- groups.append({
318
- 'id': group_id,
319
- 'start_line': start_line,
320
- 'end_line': end_line,
321
- 'content': group_content,
322
- })
323
-
324
- return groups
325
-
326
- @staticmethod
327
- def extract_line_groups(
328
- content: str, start_line: int, end_line: int, group_size: int = 20
329
- ) -> List[Dict[str, Any]]:
330
- """按行号分组提取内容
331
-
332
- Args:
333
- content: 文件内容
334
- start_line: 起始行号
335
- end_line: 结束行号
336
- group_size: 每组行数,默认20行
337
-
338
- Returns:
339
- 分组列表,每个分组包含 id, start_line, end_line, content
340
- """
341
- lines = content.split('\n')
342
- groups = []
343
-
344
- current_start = start_line
345
- while current_start <= end_line:
346
- current_end = min(current_start + group_size - 1, end_line)
347
-
348
- # 提取该组的内容(0-based索引)
349
- group_start_idx = current_start - 1
350
- group_end_idx = current_end
351
- group_content = '\n'.join(lines[group_start_idx:group_end_idx])
352
-
353
- # 生成id:行号范围
354
- group_id = f"{current_start}-{current_end}"
355
-
356
- groups.append({
357
- 'id': group_id,
358
- 'start_line': current_start,
359
- 'end_line': current_end,
360
- 'content': group_content,
361
- })
362
-
363
- current_start = current_end + 1
364
-
365
- return groups
366
-
367
- @staticmethod
368
- def ensure_unique_ids(units: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
369
- """确保单元列表中所有id唯一
370
-
371
- Args:
372
- units: 单元列表
373
-
374
- Returns:
375
- 确保id唯一后的单元列表
376
- """
377
- seen_ids = set()
378
- result = []
379
-
380
- for unit in units:
381
- original_id = unit['id']
382
- unit_id = original_id
383
- counter = 1
384
-
385
- # 如果id已存在,添加后缀使其唯一
386
- while unit_id in seen_ids:
387
- unit_id = f"{original_id}_{counter}"
388
- counter += 1
389
-
390
- seen_ids.add(unit_id)
391
- # 创建新单元,使用唯一的id
392
- new_unit = unit.copy()
393
- new_unit['id'] = unit_id
394
- result.append(new_unit)
395
-
396
- return result
397
-
398
- @staticmethod
399
- def extract_imports(filepath: str, content: str, start_line: int, end_line: int) -> List[Dict[str, Any]]:
400
- """提取文件的导入/包含语句作为结构化单元
401
-
402
- Args:
403
- filepath: 文件路径
404
- content: 文件内容
405
- start_line: 起始行号
406
- end_line: 结束行号
407
-
408
- Returns:
409
- 导入语句单元列表,每个单元包含 id, start_line, end_line, content
410
- """
411
- if not LANGUAGE_SUPPORT_AVAILABLE:
412
- return []
413
-
414
- try:
415
- language = detect_language(filepath)
416
- if not language:
417
- return []
418
-
419
- analyzer = get_dependency_analyzer(language)
420
- if not analyzer:
421
- return []
422
-
423
- dependencies = analyzer.analyze_imports(filepath, content)
424
- if not dependencies:
425
- return []
426
-
427
- # 过滤在请求范围内的导入语句
428
- lines = content.split('\n')
429
- import_units = []
430
-
431
- # 按行号分组导入语句(连续的导入语句作为一个单元)
432
- current_group = []
433
- for dep in sorted(dependencies, key=lambda d: d.line):
434
- line_num = dep.line
435
- # 只包含在请求范围内的导入语句
436
- if start_line <= line_num <= end_line and 1 <= line_num <= len(lines):
437
- if not current_group or line_num == current_group[-1]['line'] + 1:
438
- # 连续的导入语句,添加到当前组
439
- current_group.append({
440
- 'line': line_num,
441
- 'content': lines[line_num - 1]
442
- })
443
- else:
444
- # 不连续,先处理当前组
445
- if current_group:
446
- import_units.append(StructuredCodeExtractor.create_import_unit(current_group))
447
- # 开始新组
448
- current_group = [{
449
- 'line': line_num,
450
- 'content': lines[line_num - 1]
451
- }]
452
-
453
- # 处理最后一组
454
- if current_group:
455
- import_units.append(StructuredCodeExtractor.create_import_unit(current_group))
456
-
457
- return import_units
458
- except Exception:
459
- return []
460
-
461
- @staticmethod
462
- def create_import_unit(import_group: List[Dict[str, Any]]) -> Dict[str, Any]:
463
- """创建导入语句单元
464
-
465
- Args:
466
- import_group: 导入语句组(连续的导入语句)
467
-
468
- Returns:
469
- 导入单元字典
470
- """
471
- start_line = import_group[0]['line']
472
- end_line = import_group[-1]['line']
473
- content = '\n'.join(item['content'] for item in import_group)
474
-
475
- # 生成id:根据导入语句内容生成唯一标识
476
- import_group[0]['content'].strip()
477
- if len(import_group) == 1:
478
- unit_id = f"import_{start_line}"
479
- else:
480
- unit_id = f"imports_{start_line}_{end_line}"
481
-
482
- return {
483
- 'id': unit_id,
484
- 'start_line': start_line,
485
- 'end_line': end_line,
486
- 'content': content,
487
- }
488
-
489
- @staticmethod
490
- def find_block_by_id(filepath: str, block_id: str, raw_mode: bool = False) -> Optional[Dict[str, Any]]:
491
- """根据块id定位代码块
492
-
493
- Args:
494
- filepath: 文件路径
495
- block_id: 块id
496
- raw_mode: 原始模式,False(默认,先尝试语法单元,找不到则尝试空白行分组)、True(行号分组模式,每20行一组)
497
-
498
- Returns:
499
- 如果找到,返回包含 start_line, end_line, content 的字典;否则返回 None
500
- """
501
- try:
502
- # 读取文件内容
503
- abs_path = os.path.abspath(filepath)
504
- if not os.path.exists(abs_path):
505
- return None
506
-
507
- with open(abs_path, 'r', encoding='utf-8') as f:
508
- content = f.read()
509
-
510
- total_lines = len(content.split('\n'))
511
-
512
- if raw_mode:
513
- # 行号分组模式(raw_mode=true)
514
- line_groups = StructuredCodeExtractor.extract_line_groups(content, 1, total_lines, group_size=20)
515
- for group in line_groups:
516
- if group['id'] == block_id:
517
- return {
518
- 'start_line': group['start_line'],
519
- 'end_line': group['end_line'],
520
- 'content': group['content']
521
- }
522
- else:
523
- # raw_mode=False: 先尝试语法单元和导入单元,如果找不到再尝试空白行分组
524
- # 语法单元模式:先尝试提取语法单元和导入单元
525
- syntax_units = StructuredCodeExtractor.extract_syntax_units(abs_path, content, 1, total_lines)
526
- import_units = StructuredCodeExtractor.extract_imports(abs_path, content, 1, total_lines)
527
-
528
- # 合并并确保id唯一
529
- all_units = import_units + syntax_units
530
- all_units = StructuredCodeExtractor.ensure_unique_ids(all_units)
531
-
532
- # 查找匹配的块
533
- for unit in all_units:
534
- if unit['id'] == block_id:
535
- return {
536
- 'start_line': unit['start_line'],
537
- 'end_line': unit['end_line'],
538
- 'content': unit['content']
539
- }
540
-
541
- # 如果语法单元模式没找到,尝试空白行分组模式
542
- blank_line_groups = StructuredCodeExtractor.extract_blank_line_groups(content, 1, total_lines)
543
- for group in blank_line_groups:
544
- if group['id'] == block_id:
545
- return {
546
- 'start_line': group['start_line'],
547
- 'end_line': group['end_line'],
548
- 'content': group['content']
549
- }
550
-
551
- # 如果没找到,返回None
552
- return None
553
-
554
- except Exception:
555
- return None
556
-