jarvis-ai-assistant 0.7.16__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +567 -222
  3. jarvis/jarvis_agent/agent_manager.py +19 -12
  4. jarvis/jarvis_agent/builtin_input_handler.py +79 -11
  5. jarvis/jarvis_agent/config_editor.py +7 -2
  6. jarvis/jarvis_agent/event_bus.py +24 -13
  7. jarvis/jarvis_agent/events.py +19 -1
  8. jarvis/jarvis_agent/file_context_handler.py +67 -64
  9. jarvis/jarvis_agent/file_methodology_manager.py +38 -24
  10. jarvis/jarvis_agent/jarvis.py +186 -114
  11. jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
  12. jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
  13. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
  14. jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
  15. jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
  16. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
  17. jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
  18. jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
  19. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
  20. jarvis/jarvis_agent/language_support_info.py +250 -219
  21. jarvis/jarvis_agent/main.py +19 -23
  22. jarvis/jarvis_agent/memory_manager.py +9 -6
  23. jarvis/jarvis_agent/methodology_share_manager.py +21 -15
  24. jarvis/jarvis_agent/output_handler.py +4 -2
  25. jarvis/jarvis_agent/prompt_builder.py +7 -6
  26. jarvis/jarvis_agent/prompt_manager.py +113 -8
  27. jarvis/jarvis_agent/prompts.py +317 -85
  28. jarvis/jarvis_agent/protocols.py +5 -2
  29. jarvis/jarvis_agent/run_loop.py +192 -32
  30. jarvis/jarvis_agent/session_manager.py +7 -3
  31. jarvis/jarvis_agent/share_manager.py +23 -13
  32. jarvis/jarvis_agent/shell_input_handler.py +12 -8
  33. jarvis/jarvis_agent/stdio_redirect.py +25 -26
  34. jarvis/jarvis_agent/task_analyzer.py +29 -23
  35. jarvis/jarvis_agent/task_list.py +869 -0
  36. jarvis/jarvis_agent/task_manager.py +26 -23
  37. jarvis/jarvis_agent/tool_executor.py +6 -5
  38. jarvis/jarvis_agent/tool_share_manager.py +24 -14
  39. jarvis/jarvis_agent/user_interaction.py +3 -3
  40. jarvis/jarvis_agent/utils.py +9 -1
  41. jarvis/jarvis_agent/web_bridge.py +37 -17
  42. jarvis/jarvis_agent/web_output_sink.py +5 -2
  43. jarvis/jarvis_agent/web_server.py +165 -36
  44. jarvis/jarvis_c2rust/__init__.py +1 -1
  45. jarvis/jarvis_c2rust/cli.py +260 -141
  46. jarvis/jarvis_c2rust/collector.py +37 -18
  47. jarvis/jarvis_c2rust/constants.py +60 -0
  48. jarvis/jarvis_c2rust/library_replacer.py +242 -1010
  49. jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
  50. jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
  51. jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
  52. jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
  53. jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
  54. jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
  55. jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
  56. jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
  57. jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
  58. jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
  59. jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
  60. jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
  61. jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
  62. jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
  63. jarvis/jarvis_c2rust/loaders.py +28 -10
  64. jarvis/jarvis_c2rust/models.py +5 -2
  65. jarvis/jarvis_c2rust/optimizer.py +192 -1974
  66. jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
  67. jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
  68. jarvis/jarvis_c2rust/optimizer_config.py +49 -0
  69. jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
  70. jarvis/jarvis_c2rust/optimizer_options.py +48 -0
  71. jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
  72. jarvis/jarvis_c2rust/optimizer_report.py +52 -0
  73. jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
  74. jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
  75. jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
  76. jarvis/jarvis_c2rust/scanner.py +229 -166
  77. jarvis/jarvis_c2rust/transpiler.py +531 -2732
  78. jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
  79. jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
  80. jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
  81. jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
  82. jarvis/jarvis_c2rust/transpiler_config.py +178 -0
  83. jarvis/jarvis_c2rust/transpiler_context.py +122 -0
  84. jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
  85. jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
  86. jarvis/jarvis_c2rust/transpiler_git.py +163 -0
  87. jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
  88. jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
  89. jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
  90. jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
  91. jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
  92. jarvis/jarvis_c2rust/utils.py +269 -79
  93. jarvis/jarvis_code_agent/after_change.py +233 -0
  94. jarvis/jarvis_code_agent/build_validation_config.py +37 -30
  95. jarvis/jarvis_code_agent/builtin_rules.py +68 -0
  96. jarvis/jarvis_code_agent/code_agent.py +976 -1517
  97. jarvis/jarvis_code_agent/code_agent_build.py +227 -0
  98. jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
  99. jarvis/jarvis_code_agent/code_agent_git.py +525 -0
  100. jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
  101. jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
  102. jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
  103. jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
  104. jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
  105. jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
  106. jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
  107. jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
  108. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
  109. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
  110. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
  111. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
  112. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
  113. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
  114. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
  115. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
  116. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
  117. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
  118. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
  119. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
  120. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
  121. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
  122. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
  123. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
  124. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
  125. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
  126. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
  127. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
  128. jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
  129. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
  130. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
  131. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
  132. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
  133. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
  134. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
  135. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
  136. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
  137. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
  138. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
  139. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
  140. jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
  141. jarvis/jarvis_code_agent/lint.py +223 -524
  142. jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
  143. jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
  144. jarvis/jarvis_code_agent/rules/code_review.md +115 -0
  145. jarvis/jarvis_code_agent/rules/documentation.md +165 -0
  146. jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
  147. jarvis/jarvis_code_agent/rules/performance.md +158 -0
  148. jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
  149. jarvis/jarvis_code_agent/rules/security.md +160 -0
  150. jarvis/jarvis_code_agent/rules/tdd.md +78 -0
  151. jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
  152. jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
  153. jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
  154. jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
  155. jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
  156. jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
  157. jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
  158. jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
  159. jarvis/jarvis_code_agent/utils.py +36 -26
  160. jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
  161. jarvis/jarvis_code_analysis/code_review.py +64 -33
  162. jarvis/jarvis_data/config_schema.json +285 -192
  163. jarvis/jarvis_git_squash/main.py +8 -6
  164. jarvis/jarvis_git_utils/git_commiter.py +53 -76
  165. jarvis/jarvis_mcp/__init__.py +5 -2
  166. jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
  167. jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
  168. jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
  169. jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
  170. jarvis/jarvis_methodology/main.py +48 -39
  171. jarvis/jarvis_multi_agent/__init__.py +56 -23
  172. jarvis/jarvis_multi_agent/main.py +15 -18
  173. jarvis/jarvis_platform/base.py +179 -111
  174. jarvis/jarvis_platform/human.py +27 -16
  175. jarvis/jarvis_platform/kimi.py +52 -45
  176. jarvis/jarvis_platform/openai.py +101 -40
  177. jarvis/jarvis_platform/registry.py +51 -33
  178. jarvis/jarvis_platform/tongyi.py +68 -38
  179. jarvis/jarvis_platform/yuanbao.py +59 -43
  180. jarvis/jarvis_platform_manager/main.py +68 -76
  181. jarvis/jarvis_platform_manager/service.py +24 -14
  182. jarvis/jarvis_rag/README_CONFIG.md +314 -0
  183. jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
  184. jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
  185. jarvis/jarvis_rag/__init__.py +57 -4
  186. jarvis/jarvis_rag/cache.py +3 -1
  187. jarvis/jarvis_rag/cli.py +48 -68
  188. jarvis/jarvis_rag/embedding_interface.py +39 -0
  189. jarvis/jarvis_rag/embedding_manager.py +7 -230
  190. jarvis/jarvis_rag/embeddings/__init__.py +41 -0
  191. jarvis/jarvis_rag/embeddings/base.py +114 -0
  192. jarvis/jarvis_rag/embeddings/cohere.py +66 -0
  193. jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
  194. jarvis/jarvis_rag/embeddings/local.py +260 -0
  195. jarvis/jarvis_rag/embeddings/openai.py +62 -0
  196. jarvis/jarvis_rag/embeddings/registry.py +293 -0
  197. jarvis/jarvis_rag/llm_interface.py +8 -6
  198. jarvis/jarvis_rag/query_rewriter.py +8 -9
  199. jarvis/jarvis_rag/rag_pipeline.py +61 -52
  200. jarvis/jarvis_rag/reranker.py +7 -75
  201. jarvis/jarvis_rag/reranker_interface.py +32 -0
  202. jarvis/jarvis_rag/rerankers/__init__.py +41 -0
  203. jarvis/jarvis_rag/rerankers/base.py +109 -0
  204. jarvis/jarvis_rag/rerankers/cohere.py +67 -0
  205. jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
  206. jarvis/jarvis_rag/rerankers/jina.py +79 -0
  207. jarvis/jarvis_rag/rerankers/local.py +89 -0
  208. jarvis/jarvis_rag/rerankers/registry.py +293 -0
  209. jarvis/jarvis_rag/retriever.py +58 -43
  210. jarvis/jarvis_sec/__init__.py +66 -141
  211. jarvis/jarvis_sec/agents.py +21 -17
  212. jarvis/jarvis_sec/analysis.py +80 -33
  213. jarvis/jarvis_sec/checkers/__init__.py +7 -13
  214. jarvis/jarvis_sec/checkers/c_checker.py +356 -164
  215. jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
  216. jarvis/jarvis_sec/cli.py +43 -21
  217. jarvis/jarvis_sec/clustering.py +430 -272
  218. jarvis/jarvis_sec/file_manager.py +99 -55
  219. jarvis/jarvis_sec/parsers.py +9 -6
  220. jarvis/jarvis_sec/prompts.py +4 -3
  221. jarvis/jarvis_sec/report.py +44 -22
  222. jarvis/jarvis_sec/review.py +180 -107
  223. jarvis/jarvis_sec/status.py +50 -41
  224. jarvis/jarvis_sec/types.py +3 -0
  225. jarvis/jarvis_sec/utils.py +160 -83
  226. jarvis/jarvis_sec/verification.py +411 -181
  227. jarvis/jarvis_sec/workflow.py +132 -21
  228. jarvis/jarvis_smart_shell/main.py +28 -41
  229. jarvis/jarvis_stats/cli.py +14 -12
  230. jarvis/jarvis_stats/stats.py +28 -19
  231. jarvis/jarvis_stats/storage.py +14 -8
  232. jarvis/jarvis_stats/visualizer.py +12 -7
  233. jarvis/jarvis_tools/base.py +5 -2
  234. jarvis/jarvis_tools/clear_memory.py +13 -9
  235. jarvis/jarvis_tools/cli/main.py +23 -18
  236. jarvis/jarvis_tools/edit_file.py +572 -873
  237. jarvis/jarvis_tools/execute_script.py +10 -7
  238. jarvis/jarvis_tools/file_analyzer.py +7 -8
  239. jarvis/jarvis_tools/meta_agent.py +287 -0
  240. jarvis/jarvis_tools/methodology.py +5 -3
  241. jarvis/jarvis_tools/read_code.py +305 -1438
  242. jarvis/jarvis_tools/read_symbols.py +50 -17
  243. jarvis/jarvis_tools/read_webpage.py +19 -18
  244. jarvis/jarvis_tools/registry.py +435 -156
  245. jarvis/jarvis_tools/retrieve_memory.py +16 -11
  246. jarvis/jarvis_tools/save_memory.py +8 -6
  247. jarvis/jarvis_tools/search_web.py +31 -31
  248. jarvis/jarvis_tools/sub_agent.py +32 -28
  249. jarvis/jarvis_tools/sub_code_agent.py +44 -60
  250. jarvis/jarvis_tools/task_list_manager.py +1811 -0
  251. jarvis/jarvis_tools/virtual_tty.py +29 -19
  252. jarvis/jarvis_utils/__init__.py +4 -0
  253. jarvis/jarvis_utils/builtin_replace_map.py +2 -1
  254. jarvis/jarvis_utils/clipboard.py +9 -8
  255. jarvis/jarvis_utils/collections.py +331 -0
  256. jarvis/jarvis_utils/config.py +699 -194
  257. jarvis/jarvis_utils/dialogue_recorder.py +294 -0
  258. jarvis/jarvis_utils/embedding.py +6 -3
  259. jarvis/jarvis_utils/file_processors.py +7 -1
  260. jarvis/jarvis_utils/fzf.py +9 -3
  261. jarvis/jarvis_utils/git_utils.py +71 -42
  262. jarvis/jarvis_utils/globals.py +116 -32
  263. jarvis/jarvis_utils/http.py +6 -2
  264. jarvis/jarvis_utils/input.py +318 -83
  265. jarvis/jarvis_utils/jsonnet_compat.py +119 -104
  266. jarvis/jarvis_utils/methodology.py +37 -28
  267. jarvis/jarvis_utils/output.py +201 -44
  268. jarvis/jarvis_utils/utils.py +986 -628
  269. {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
  270. jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
  271. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
  272. jarvis/jarvis_tools/generate_new_tool.py +0 -205
  273. jarvis/jarvis_tools/lsp_client.py +0 -1552
  274. jarvis/jarvis_tools/rewrite_file.py +0 -105
  275. jarvis_ai_assistant-0.7.16.dist-info/RECORD +0 -218
  276. {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
  277. {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
  278. {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
  279. {jarvis_ai_assistant-0.7.16.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0
@@ -4,12 +4,12 @@ Library-based dependency replacer for C→Rust migration (LLM-only subtree evalu
4
4
 
5
5
  要点:
6
6
  - 不依赖 pruner,仅复用 scanner 的通用工具函数
7
- - 将“依赖子树(根函数及其可达的函数集合)”的摘要与局部源码片段提供给 LLM,由 LLM 评估该子树是否可由“指定标准库/第三方 crate 的一个或多个成熟 API(可组合,多库协同)”整体替代
7
+ - 将"依赖子树(根函数及其可达的函数集合)"的摘要与局部源码片段提供给 LLM,由 LLM 评估该子树是否可由"指定标准库/第三方 crate 的一个或多个成熟 API(可组合,多库协同)"整体替代
8
8
  - 若可替代:将根函数的 ref 替换为该库 API(以 lib::<name> 形式的占位符,支持多库组合),并删除其所有子孙函数节点(类型不受影响)
9
9
  - 支持禁用库约束:可传入 disabled_libraries(list[str]),若 LLM 建议命中禁用库,则强制判定为不可替代并记录备注
10
10
  - 断点恢复(checkpoint/resume):可启用 resume,使用 library_replacer_checkpoint.json 记录 eval_counter/processed/pruned/selected 等信息,基于关键输入组合键进行匹配恢复;落盘采用原子写以防损坏
11
11
  - 主库字段回退策略:当存在 libraries 列表优先选择第一个作为 primary;否则回退到单一 library 字段;均为空则置空
12
- - 入口保护:默认跳过 main(可通过环境变量 JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS/JARVIS_C2RUST_DELAY_ENTRIES/C2RUST_DELAY_ENTRIES 配置多个入口名)
12
+ - 入口保护:默认跳过 main(可通过环境变量 c2rust_delay_entry_symbols/c2rust_delay_entries/C2RUST_DELAY_ENTRIES 配置多个入口名)
13
13
 
14
14
  输入数据:
15
15
  - symbols.jsonl(或传入的 .jsonl 路径):由 scanner 生成的统一符号表,字段参见 scanner.py
@@ -30,900 +30,37 @@ from __future__ import annotations
30
30
 
31
31
  import json
32
32
  import shutil
33
- import time
34
33
  from pathlib import Path
35
- from typing import Any, Callable, Dict, List, Optional, Set, Tuple
36
-
37
- import typer
38
-
39
- # 依赖:仅使用 scanner 的工具函数,避免循环导入
40
- from jarvis.jarvis_c2rust.scanner import (
41
- compute_translation_order_jsonl,
42
- find_root_function_ids,
43
- )
44
-
45
- # ============================================================================
46
- # 常量定义
47
- # ============================================================================
48
-
49
- # LLM评估重试配置
50
- MAX_LLM_RETRIES = 3 # LLM评估最大重试次数
51
-
52
- # 源码片段读取配置
53
- DEFAULT_SOURCE_SNIPPET_MAX_LINES = 200 # 默认源码片段最大行数
54
- SUBTREE_SOURCE_SNIPPET_MAX_LINES = 120 # 子树提示词中源码片段最大行数
55
-
56
- # 子树提示词构建配置
57
- MAX_SUBTREE_NODES_META = 200 # 子树节点元数据列表最大长度
58
- MAX_SUBTREE_EDGES = 400 # 子树边列表最大长度
59
- MAX_DOT_EDGES = 200 # DOT图边数阈值(超过此值不生成DOT)
60
- MAX_CHILD_SAMPLES = 2 # 子节点采样数量
61
- MAX_SOURCE_SAMPLES = 3 # 代表性源码样本最大数量(注释说明)
62
-
63
- # 显示配置
64
- MAX_NOTES_DISPLAY_LENGTH = 200 # 备注显示最大长度
65
-
66
- # 输出文件路径配置
67
- DEFAULT_SYMBOLS_OUTPUT = "symbols_library_pruned.jsonl" # 默认符号表输出文件名
68
- DEFAULT_MAPPING_OUTPUT = "library_replacements.jsonl" # 默认替代映射输出文件名
69
- SYMBOLS_PRUNE_OUTPUT = "symbols_prune.jsonl" # 兼容符号表输出文件名
70
- ORDER_PRUNE_OUTPUT = "translation_order_prune.jsonl" # 剪枝阶段转译顺序输出文件名
71
- ORDER_ALIAS_OUTPUT = "translation_order.jsonl" # 通用转译顺序输出文件名
72
- DEFAULT_CHECKPOINT_FILE = "library_replacer_checkpoint.json" # 默认检查点文件名
73
-
74
- # Checkpoint配置
75
- DEFAULT_CHECKPOINT_INTERVAL = 1 # 默认检查点保存间隔(每评估N个节点保存一次)
76
-
77
- # JSON格式化配置
78
- JSON_INDENT = 2 # JSON格式化缩进空格数
79
-
80
-
81
- def _resolve_symbols_jsonl_path(hint: Path) -> Path:
82
- """解析symbols.jsonl路径"""
83
- p = Path(hint)
84
- if p.is_file() and p.suffix.lower() == ".jsonl":
85
- return p
86
- if p.is_dir():
87
- return p / ".jarvis" / "c2rust" / "symbols.jsonl"
88
- return Path(".") / ".jarvis" / "c2rust" / "symbols.jsonl"
89
-
90
-
91
- def _setup_output_paths(
92
- data_dir: Path,
93
- out_symbols_path: Optional[Path],
94
- out_mapping_path: Optional[Path],
95
- ) -> tuple[Path, Path, Path, Path, Path]:
96
- """设置输出路径,返回(符号表路径, 映射路径, 兼容符号表路径, 顺序路径, 别名顺序路径)"""
97
- if out_symbols_path is None:
98
- out_symbols_path = data_dir / DEFAULT_SYMBOLS_OUTPUT
99
- else:
100
- out_symbols_path = Path(out_symbols_path)
101
- if out_mapping_path is None:
102
- out_mapping_path = data_dir / DEFAULT_MAPPING_OUTPUT
103
- else:
104
- out_mapping_path = Path(out_mapping_path)
105
-
106
- # 兼容输出
107
- out_symbols_prune_path = data_dir / SYMBOLS_PRUNE_OUTPUT
108
- order_prune_path = data_dir / ORDER_PRUNE_OUTPUT
109
- alias_order_path = data_dir / ORDER_ALIAS_OUTPUT
110
-
111
- return out_symbols_path, out_mapping_path, out_symbols_prune_path, order_prune_path, alias_order_path
112
-
113
-
114
- def _load_symbols(sjsonl: Path) -> tuple[List[Dict[str, Any]], Dict[int, Dict[str, Any]], Dict[str, int], Set[int], Dict[int, List[str]]]:
115
- """加载符号表,返回(所有记录, id到记录映射, 名称到id映射, 函数id集合, id到引用名称映射)"""
116
- all_records: List[Dict[str, Any]] = []
117
- by_id: Dict[int, Dict[str, Any]] = {}
118
- name_to_id: Dict[str, int] = {}
119
- func_ids: Set[int] = set()
120
- id_refs_names: Dict[int, List[str]] = {}
121
-
122
- with open(sjsonl, "r", encoding="utf-8") as f:
123
- idx = 0
124
- for line in f:
125
- line = line.strip()
126
- if not line:
127
- continue
128
- try:
129
- obj = json.loads(line)
130
- except Exception:
131
- continue
132
- idx += 1
133
- fid = int(obj.get("id") or idx)
134
- obj["id"] = fid
135
- nm = obj.get("name") or ""
136
- qn = obj.get("qualified_name") or ""
137
- cat = obj.get("category") or "" # "function" | "type"
138
- refs = obj.get("ref")
139
- if not isinstance(refs, list):
140
- refs = []
141
- refs = [r for r in refs if isinstance(r, str) and r]
142
-
143
- all_records.append(obj)
144
- by_id[fid] = obj
145
- id_refs_names[fid] = refs
146
- if nm:
147
- name_to_id.setdefault(nm, fid)
148
- if qn:
149
- name_to_id.setdefault(qn, fid)
150
- if cat == "function":
151
- func_ids.add(fid)
152
-
153
- return all_records, by_id, name_to_id, func_ids, id_refs_names
154
-
155
-
156
- def _build_function_graph(
157
- func_ids: Set[int],
158
- id_refs_names: Dict[int, List[str]],
159
- name_to_id: Dict[str, int],
160
- ) -> Dict[int, List[int]]:
161
- """构建函数依赖图,返回id到依赖id列表的映射"""
162
- adj_func: Dict[int, List[int]] = {}
163
- for fid in func_ids:
164
- internal: List[int] = []
165
- for target in id_refs_names.get(fid, []):
166
- tid = name_to_id.get(target)
167
- if tid is not None and tid in func_ids and tid != fid:
168
- internal.append(tid)
169
- try:
170
- internal = list(dict.fromkeys(internal))
171
- except Exception:
172
- internal = sorted(list(set(internal)))
173
- adj_func[fid] = internal
174
- return adj_func
175
-
176
-
177
- def _build_evaluation_order(
178
- sjsonl: Path,
179
- func_ids: Set[int],
180
- adj_func: Dict[int, List[int]],
181
- ) -> List[int]:
182
- """构建评估顺序(广度优先,父先子后)"""
183
- # 评估队列:从所有无入边函数作为种子开始,按层次遍历整个图,使"父先于子"被评估;
184
- # 若不存在无入边节点(如强连通环),则回退为全量函数集合。
185
- try:
186
- roots_all = find_root_function_ids(sjsonl)
187
- except Exception:
188
- roots_all = []
189
- seeds = [rid for rid in roots_all if rid in func_ids]
190
- if not seeds:
191
- seeds = sorted(list(func_ids))
192
-
193
- visited: Set[int] = set()
194
- order: List[int] = []
195
- q: List[int] = list(seeds)
196
- qi = 0
197
- while qi < len(q):
198
- u = q[qi]
199
- qi += 1
200
- if u in visited or u not in func_ids:
201
- continue
202
- visited.add(u)
203
- order.append(u)
204
- for v in adj_func.get(u, []):
205
- if v not in visited and v in func_ids:
206
- q.append(v)
207
- # 若存在未覆盖的孤立/循环组件,补充其节点(确保每个函数节点都将被作为"候选根"参与评估)
208
- if len(visited) < len(func_ids):
209
- leftovers = [fid for fid in sorted(func_ids) if fid not in visited]
210
- order.extend(leftovers)
211
-
212
- return order
213
-
214
-
215
- def _collect_descendants(
216
- start: int,
217
- adj_func: Dict[int, List[int]],
218
- desc_cache: Dict[int, Set[int]],
219
- ) -> Set[int]:
220
- """收集从start开始的所有后代节点(使用缓存)"""
221
- if start in desc_cache:
222
- return desc_cache[start]
223
- visited: Set[int] = set()
224
- stack: List[int] = [start]
225
- visited.add(start)
226
- while stack:
227
- u = stack.pop()
228
- for v in adj_func.get(u, []):
229
- if v not in visited:
230
- visited.add(v)
231
- stack.append(v)
232
- desc_cache[start] = visited
233
- return visited
234
-
235
-
236
- def _process_candidate_scope(
237
- candidates: Optional[List[str]],
238
- all_records: List[Dict[str, Any]],
239
- root_funcs: List[int],
240
- func_ids: Set[int],
241
- adj_func: Dict[int, List[int]],
242
- desc_cache: Dict[int, Set[int]],
243
- ) -> tuple[List[int], Set[int]]:
244
- """处理候选根和作用域,返回(过滤后的根函数列表, 不可达函数集合)"""
245
- scope_unreachable_funcs: Set[int] = set()
246
- if not candidates:
247
- return root_funcs, scope_unreachable_funcs
248
-
249
- cand_ids: Set[int] = set()
250
- # 支持重载:同名/同限定名可能对应多个函数ID,需全部纳入候选
251
- key_set = set(candidates)
252
- for rec in all_records:
253
- if (rec.get("category") or "") != "function":
254
- continue
255
- nm = rec.get("name") or ""
256
- qn = rec.get("qualified_name") or ""
257
- if nm in key_set or qn in key_set:
258
- try:
259
- cand_ids.add(int(rec.get("id")))
260
- except Exception:
261
- continue
262
-
263
- if not cand_ids:
264
- return root_funcs, scope_unreachable_funcs
265
-
266
- filtered_roots = [rid for rid in root_funcs if rid in cand_ids]
267
- # 计算从候选根出发的可达函数集合(含根)
268
- reachable_all: Set[int] = set()
269
- for rid in filtered_roots:
270
- reachable_all.update(_collect_descendants(rid, adj_func, desc_cache))
271
- # 不可达函数(仅限函数类别)将被直接删除
272
- scope_unreachable_funcs = {fid for fid in func_ids if fid not in reachable_all}
273
- if scope_unreachable_funcs:
274
- typer.secho(
275
- f"[c2rust-library] 根据根列表,标记不可达函数删除: {len(scope_unreachable_funcs)} 个",
276
- fg=typer.colors.YELLOW,
277
- err=True,
278
- )
279
-
280
- return filtered_roots, scope_unreachable_funcs
281
-
282
-
283
- def _read_source_snippet(rec: Dict[str, Any], max_lines: int = DEFAULT_SOURCE_SNIPPET_MAX_LINES) -> str:
284
- """读取源码片段"""
285
- path = rec.get("file") or ""
286
- try:
287
- if not path:
288
- return ""
289
- p = Path(path)
290
- if not p.exists():
291
- return ""
292
- sl = int(rec.get("start_line") or 1)
293
- el = int(rec.get("end_line") or sl)
294
- if el < sl:
295
- el = sl
296
- lines = p.read_text(encoding="utf-8", errors="replace").splitlines()
297
- start_idx = max(sl - 1, 0)
298
- end_idx = min(el, len(lines))
299
- snippet_lines = lines[start_idx:end_idx]
300
- if len(snippet_lines) > max_lines:
301
- snippet_lines = snippet_lines[:max_lines]
302
- return "\n".join(snippet_lines)
303
- except Exception:
304
- return ""
305
-
306
-
307
- def _check_llm_availability() -> tuple[bool, Any, Any, Any]:
308
- """检查LLM可用性,返回(是否可用, PlatformRegistry, get_smart_platform_name, get_smart_model_name)
309
- 使用smart平台,适用于代码生成等复杂场景
310
- """
311
- try:
312
- from jarvis.jarvis_platform.registry import PlatformRegistry # type: ignore
313
- from jarvis.jarvis_utils.config import get_smart_platform_name, get_smart_model_name # type: ignore
314
- return True, PlatformRegistry, get_smart_platform_name, get_smart_model_name
315
- except Exception:
316
- return False, None, None, None
317
-
318
-
319
- def _normalize_disabled_libraries(disabled_libraries: Optional[List[str]]) -> tuple[List[str], str]:
320
- """规范化禁用库列表,返回(规范化列表, 显示字符串)"""
321
- disabled_norm: List[str] = []
322
- disabled_display: str = ""
323
- if isinstance(disabled_libraries, list):
324
- disabled_norm = [str(x).strip().lower() for x in disabled_libraries if str(x).strip()]
325
- disabled_display = ", ".join([str(x).strip() for x in disabled_libraries if str(x).strip()])
326
- return disabled_norm, disabled_display
327
-
328
-
329
- def _load_additional_notes(data_dir: Path) -> str:
330
- """从配置文件加载附加说明"""
331
- try:
332
- from jarvis.jarvis_c2rust.constants import CONFIG_JSON
333
- config_path = data_dir / CONFIG_JSON
334
- if config_path.exists():
335
- with config_path.open("r", encoding="utf-8") as f:
336
- config = json.load(f)
337
- if isinstance(config, dict):
338
- return str(config.get("additional_notes", "") or "").strip()
339
- except Exception:
340
- pass
341
- return ""
342
-
343
-
344
- def _normalize_list(items: Optional[List[str]]) -> List[str]:
345
- """规范化列表,去重并排序"""
346
- if not isinstance(items, list):
347
- return []
348
- vals: List[str] = []
349
- for x in items:
350
- try:
351
- s = str(x).strip()
352
- except Exception:
353
- continue
354
- if s:
355
- vals.append(s)
356
- try:
357
- vals = list(dict.fromkeys(vals))
358
- except Exception:
359
- vals = sorted(set(vals))
360
- return vals
361
-
362
-
363
- def _normalize_list_lower(items: Optional[List[str]]) -> List[str]:
364
- """规范化列表并转为小写"""
365
- return [s.lower() for s in _normalize_list(items)]
366
-
367
-
368
- def _make_checkpoint_key(
369
- sjsonl: Path,
370
- library_name: str,
371
- llm_group: Optional[str],
372
- candidates: Optional[List[str]],
373
- disabled_libraries: Optional[List[str]],
374
- max_funcs: Optional[int],
375
- ) -> Dict[str, Any]:
376
- """构建检查点键"""
377
- try:
378
- abs_sym = str(Path(sjsonl).resolve())
379
- except Exception:
380
- abs_sym = str(sjsonl)
381
- key: Dict[str, Any] = {
382
- "symbols": abs_sym,
383
- "library_name": str(library_name),
384
- "llm_group": str(llm_group or ""),
385
- "candidates": _normalize_list(candidates),
386
- "disabled_libraries": _normalize_list_lower(disabled_libraries),
387
- "max_funcs": (int(max_funcs) if isinstance(max_funcs, int) or (isinstance(max_funcs, float) and float(max_funcs).is_integer()) else None),
388
- }
389
- return key
390
-
391
-
392
- def _load_checkpoint_if_match(
393
- ckpt_path: Path,
394
- resume: bool,
395
- checkpoint_key: Dict[str, Any],
396
- ) -> Optional[Dict[str, Any]]:
397
- """加载匹配的检查点"""
398
- try:
399
- if not resume:
400
- return None
401
- if not ckpt_path.exists():
402
- return None
403
- obj = json.loads(ckpt_path.read_text(encoding="utf-8"))
404
- if not isinstance(obj, dict):
405
- return None
406
- if obj.get("key") != checkpoint_key:
407
- return None
408
- return obj
409
- except Exception:
410
- return None
411
-
412
-
413
- def _atomic_write(path: Path, content: str) -> None:
414
- """原子写入文件"""
415
- try:
416
- tmp = path.with_suffix(path.suffix + ".tmp")
417
- tmp.write_text(content, encoding="utf-8")
418
- tmp.replace(path)
419
- except Exception:
420
- try:
421
- path.write_text(content, encoding="utf-8")
422
- except Exception:
423
- pass
424
-
425
-
426
- def _create_llm_model(
427
- llm_group: Optional[str],
428
- disabled_display: str,
429
- _model_available: bool,
430
- PlatformRegistry: Any,
431
- get_smart_platform_name: Any,
432
- get_smart_model_name: Any,
433
- ) -> Optional[Any]:
434
- """创建LLM模型,使用smart平台,适用于代码生成等复杂场景"""
435
- if not _model_available:
436
- return None
437
- try:
438
- registry = PlatformRegistry.get_global_platform_registry() # type: ignore
439
- model = None
440
- if llm_group:
441
- try:
442
- platform_name = get_smart_platform_name(llm_group) # type: ignore
443
- if platform_name:
444
- model = registry.create_platform(platform_name) # type: ignore
445
- except Exception:
446
- model = None
447
- if model is None:
448
- model = registry.get_smart_platform() # type: ignore
449
- try:
450
- model.set_model_group(llm_group) # type: ignore
451
- except Exception:
452
- pass
453
- if llm_group:
454
- try:
455
- mn = get_smart_model_name(llm_group) # type: ignore
456
- if mn:
457
- model.set_model_name(mn) # type: ignore
458
- except Exception:
459
- pass
460
- model.set_system_prompt( # type: ignore
461
- "你是资深 C→Rust 迁移专家。任务:给定一个函数及其调用子树(依赖图摘要、函数签名、源码片段),"
462
- "判断是否可以使用一个或多个成熟的 Rust 库整体替代该子树的功能(允许库内多个 API 协同,允许多个库组合;不允许使用不成熟/不常见库)。"
463
- "如可替代,请给出 libraries 列表(库名),可选给出代表性 API/模块与实现备注 notes(如何用这些库协作实现)。"
464
- "输出格式:仅输出一个 <SUMMARY> 块,块内直接包含 JSON 对象(不需要额外的标签),字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),可选 library(str,首选主库), api(str) 或 apis(list),notes(str)。"
465
- )
466
- return model
467
- except Exception as e:
468
- typer.secho(
469
- f"[c2rust-library] 初始化 LLM 平台失败,将回退为保守策略: {e}",
470
- fg=typer.colors.YELLOW,
471
- err=True,
472
- )
473
- return None
474
-
475
-
476
- def _parse_agent_json_summary(text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
477
- """
478
- 解析Agent返回的JSON摘要
479
- 返回(解析结果, 错误信息)
480
- 如果解析成功,返回(data, None)
481
- 如果解析失败,返回(None, 错误信息)
482
- """
483
- if not isinstance(text, str) or not text.strip():
484
- return None, "摘要文本为空"
485
- import re as _re
486
- from jarvis.jarvis_utils.jsonnet_compat import loads as _json_loads
487
-
488
- # 提取 <SUMMARY> 块
489
- m_sum = _re.search(r"<SUMMARY>([\s\S]*?)</SUMMARY>", text, flags=_re.IGNORECASE)
490
- block = (m_sum.group(1) if m_sum else text).strip()
491
-
492
- if not block:
493
- return None, "未找到 <SUMMARY> 或 </SUMMARY> 标签,或标签内容为空"
494
-
495
- # 直接解析 <SUMMARY> 块内的内容为 JSON
496
- # jsonnet_compat.loads 会自动处理 markdown 代码块标记(如 ```json5、```json、``` 等)
497
- try:
498
- data = _json_loads(block)
499
- if isinstance(data, dict):
500
- return data, None
501
- return None, f"JSON 解析结果不是字典,而是 {type(data).__name__}"
502
- except Exception as json_err:
503
- return None, f"JSON 解析失败: {str(json_err)}"
504
-
505
-
506
- def _build_subtree_prompt(
507
- fid: int,
508
- desc: Set[int],
509
- by_id: Dict[int, Dict[str, Any]],
510
- adj_func: Dict[int, List[int]],
511
- disabled_display: str,
512
- additional_notes: str = "",
513
- ) -> str:
514
- """构建子树评估提示词"""
515
- root_rec = by_id.get(fid, {})
516
- root_name = root_rec.get("qualified_name") or root_rec.get("name") or f"sym_{fid}"
517
- root_sig = root_rec.get("signature") or ""
518
- root_lang = root_rec.get("language") or ""
519
- root_src = _read_source_snippet(root_rec)
520
-
521
- # 子树摘要(限制长度,避免超长)
522
- nodes_meta: List[str] = []
523
- for nid in sorted(desc):
524
- r = by_id.get(nid, {})
525
- nm = r.get("qualified_name") or r.get("name") or f"sym_{nid}"
526
- sg = r.get("signature") or ""
527
- if sg and sg != nm:
528
- nodes_meta.append(f"- {nm} | {sg}")
529
- else:
530
- nodes_meta.append(f"- {nm}")
531
- if len(nodes_meta) > MAX_SUBTREE_NODES_META:
532
- nodes_meta = nodes_meta[:MAX_SUBTREE_NODES_META] + [f"...({len(desc)-MAX_SUBTREE_NODES_META} more)"]
533
-
534
- # 选取部分代表性叶子/内部节点源码(最多 MAX_SOURCE_SAMPLES 个)
535
- samples: List[str] = []
536
- sample_ids: List[int] = [fid]
537
- for ch in adj_func.get(fid, [])[:MAX_CHILD_SAMPLES]:
538
- sample_ids.append(ch)
539
- for sid in sample_ids:
540
- rec = by_id.get(sid, {})
541
- nm = rec.get("qualified_name") or rec.get("name") or f"sym_{sid}"
542
- sg = rec.get("signature") or ""
543
- src = _read_source_snippet(rec, max_lines=SUBTREE_SOURCE_SNIPPET_MAX_LINES)
544
- samples.append(f"--- BEGIN {nm} ---\n{sg}\n{src}\n--- END {nm} ---")
545
-
546
- # 构建依赖图(子树内的调用有向边)
547
- def _label(nid: int) -> str:
548
- r = by_id.get(nid, {})
549
- return r.get("qualified_name") or r.get("name") or f"sym_{nid}"
550
-
551
- edges_list: List[str] = []
552
- for u in sorted(desc):
553
- for v in adj_func.get(u, []):
554
- if v in desc:
555
- edges_list.append(f"{_label(u)} -> {_label(v)}")
556
- edges_text: str
557
- if len(edges_list) > MAX_SUBTREE_EDGES:
558
- edges_text = "\n".join(edges_list[:MAX_SUBTREE_EDGES] + [f"...({len(edges_list) - MAX_SUBTREE_EDGES} more edges)"])
559
- else:
560
- edges_text = "\n".join(edges_list)
561
-
562
- # 适度提供 DOT(边数不大时),便于大模型直观看图
563
- dot_text = ""
564
- if len(edges_list) <= MAX_DOT_EDGES:
565
- dot_lines: List[str] = ["digraph subtree {", " rankdir=LR;"]
566
- for u in sorted(desc):
567
- for v in adj_func.get(u, []):
568
- if v in desc:
569
- dot_lines.append(f' "{_label(u)}" -> "{_label(v)}";')
570
- dot_lines.append("}")
571
- dot_text = "\n".join(dot_lines)
572
-
573
- disabled_hint = (
574
- f"重要约束:禁止使用以下库(若这些库为唯一可行选项则判定为不可替代):{disabled_display}\n"
575
- if disabled_display else ""
576
- )
577
-
578
- return (
579
- "请评估以下 C/C++ 函数子树是否可以由一个或多个成熟的 Rust 库整体替代(语义等价或更强)。"
580
- "允许库内多个 API 协同,允许多个库组合;如果必须依赖尚不成熟/冷门库或非 Rust 库,则判定为不可替代。\n"
581
- f"{disabled_hint}"
582
- "输出格式:仅输出一个 <SUMMARY> 块,块内直接包含 JSON 对象(不需要额外的标签),字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),"
583
- "可选字段: library(str,首选主库), api(str) 或 apis(list), notes(str: 简述如何由这些库协作实现的思路)。\n\n"
584
- f"根函数(被评估子树的根): {root_name}\n"
585
- f"签名: {root_sig}\n"
586
- f"语言: {root_lang}\n"
587
- "根函数源码片段(可能截断):\n"
588
- f"{root_src}\n\n"
589
- f"子树规模: {len(desc)} 个函数\n"
590
- "子树函数列表(名称|签名):\n"
591
- + "\n".join(nodes_meta)
592
- + "\n\n"
593
- "依赖图(调用边,caller -> callee):\n"
594
- f"{edges_text}\n\n"
595
- + (f"DOT 表示(边数较少时提供):\n```dot\n{dot_text}\n```\n\n" if dot_text else "")
596
- + "代表性源码样本(部分节点,可能截断,仅供辅助判断):\n"
597
- + "\n".join(samples)
598
- + "\n"
599
- + (f"\n【附加说明(用户自定义)】\n{additional_notes}\n" if additional_notes else "")
600
- )
601
-
602
-
603
- def _llm_evaluate_subtree(
604
- fid: int,
605
- desc: Set[int],
606
- by_id: Dict[int, Dict[str, Any]],
607
- adj_func: Dict[int, List[int]],
608
- disabled_norm: List[str],
609
- disabled_display: str,
610
- _model_available: bool,
611
- _new_model_func: Callable,
612
- additional_notes: str = "",
613
- ) -> Dict[str, Any]:
614
- """使用LLM评估子树是否可替代,支持最多3次重试"""
615
- if not _model_available:
616
- return {"replaceable": False}
617
- model = _new_model_func()
618
- if not model:
619
- return {"replaceable": False}
620
-
621
- base_prompt = _build_subtree_prompt(fid, desc, by_id, adj_func, disabled_display, additional_notes)
622
- last_parse_error = None
623
-
624
- for attempt in range(1, MAX_LLM_RETRIES + 1):
625
- try:
626
- # 构建当前尝试的提示词
627
- if attempt == 1:
628
- prompt = base_prompt
629
- else:
630
- # 重试时包含之前的错误信息
631
- error_hint = ""
632
- if last_parse_error:
633
- error_hint = (
634
- f"\n\n**格式错误详情(请根据以下错误修复输出格式):**\n- {last_parse_error}\n\n"
635
- + "请确保输出的JSON格式正确,包括正确的引号、逗号、大括号等。仅输出一个 <SUMMARY> 块,块内直接包含 JSON 对象(不需要额外的标签)。"
636
- )
637
- prompt = base_prompt + error_hint
638
-
639
- # 调用LLM
640
- result = model.chat_until_success(prompt) # type: ignore
641
- parsed, parse_error = _parse_agent_json_summary(result or "")
642
-
643
- if parse_error:
644
- # JSON解析失败,记录错误并准备重试
645
- last_parse_error = parse_error
646
- typer.secho(
647
- f"[c2rust-library] 第 {attempt}/{MAX_LLM_RETRIES} 次尝试:JSON解析失败: {parse_error}",
648
- fg=typer.colors.YELLOW,
649
- err=True,
650
- )
651
- # 打印原始内容以便调试
652
- result_text = str(result or "").strip()
653
- if result_text:
654
- typer.secho(
655
- f"[c2rust-library] 原始LLM响应内容(前1000字符):\n{result_text[:1000]}",
656
- fg=typer.colors.RED,
657
- err=True,
658
- )
659
- if len(result_text) > 1000:
660
- typer.secho(
661
- f"[c2rust-library] ... (还有 {len(result_text) - 1000} 个字符未显示)",
662
- fg=typer.colors.RED,
663
- err=True,
664
- )
665
- if attempt < MAX_LLM_RETRIES:
666
- continue # 继续重试
667
- else:
668
- # 最后一次尝试也失败,使用默认值
669
- typer.secho(
670
- f"[c2rust-library] 重试 {MAX_LLM_RETRIES} 次后JSON解析仍然失败: {parse_error},使用默认值",
671
- fg=typer.colors.YELLOW,
672
- err=True,
673
- )
674
- return {"replaceable": False}
675
-
676
- # 解析成功,检查是否为字典
677
- if not isinstance(parsed, dict):
678
- last_parse_error = f"解析结果不是字典,而是 {type(parsed).__name__}"
679
- typer.secho(
680
- f"[c2rust-library] 第 {attempt}/{MAX_LLM_RETRIES} 次尝试:{last_parse_error}",
681
- fg=typer.colors.YELLOW,
682
- err=True,
683
- )
684
- # 打印解析结果和原始内容以便调试
685
- typer.secho(
686
- f"[c2rust-library] 解析结果类型: {type(parsed).__name__}, 值: {repr(parsed)[:500]}",
687
- fg=typer.colors.RED,
688
- err=True,
689
- )
690
- result_text = str(result or "").strip()
691
- if result_text:
692
- typer.secho(
693
- f"[c2rust-library] 原始LLM响应内容(前1000字符):\n{result_text[:1000]}",
694
- fg=typer.colors.RED,
695
- err=True,
696
- )
697
- if attempt < MAX_LLM_RETRIES:
698
- continue # 继续重试
699
- else:
700
- typer.secho(
701
- f"[c2rust-library] 重试 {MAX_LLM_RETRIES} 次后结果格式仍然不正确,视为不可替代。",
702
- fg=typer.colors.YELLOW,
703
- err=True,
704
- )
705
- return {"replaceable": False}
706
-
707
- # 成功解析为字典,处理结果
708
- rep = bool(parsed.get("replaceable") is True)
709
- lib = str(parsed.get("library") or "").strip()
710
- api = str(parsed.get("api") or parsed.get("function") or "").strip()
711
- apis = parsed.get("apis")
712
- libs_raw = parsed.get("libraries")
713
- notes = str(parsed.get("notes") or "").strip()
714
- # 归一化 libraries
715
- libraries: List[str] = []
716
- if isinstance(libs_raw, list):
717
- libraries = [str(x).strip() for x in libs_raw if str(x).strip()]
718
- elif isinstance(libs_raw, str):
719
- libraries = [s.strip() for s in libs_raw.split(",") if s.strip()]
720
- conf = parsed.get("confidence")
721
- try:
722
- conf = float(conf)
723
- except Exception:
724
- conf = 0.0
725
- # 不强制要求具体 API 或特定库名;若缺省且存在 library 字段,则纳入 libraries
726
- if not libraries and lib:
727
- libraries = [lib]
728
-
729
- # 禁用库命中时,强制视为不可替代
730
- if disabled_norm:
731
- libs_lower = [lib_name.lower() for lib_name in libraries]
732
- lib_single_lower = lib.lower() if lib else ""
733
- banned_hit = any(lower_lib in disabled_norm for lower_lib in libs_lower) or (lib_single_lower and lib_single_lower in disabled_norm)
734
- if banned_hit:
735
- rep = False
736
- warn_libs = ", ".join(sorted(set([lib] + libraries))) if (libraries or lib) else "(未提供库名)"
737
- root_rec = by_id.get(fid, {})
738
- root_name = root_rec.get("qualified_name") or root_rec.get("name") or f"sym_{fid}"
739
- typer.secho(
740
- f"[c2rust-library] 评估结果包含禁用库,强制判定为不可替代: {root_name} | 命中库: {warn_libs}",
741
- fg=typer.colors.YELLOW,
742
- err=True,
743
- )
744
- if notes:
745
- notes = notes + f" | 禁用库命中: {warn_libs}"
746
- else:
747
- notes = f"禁用库命中: {warn_libs}"
748
-
749
- result_obj: Dict[str, Any] = {
750
- "replaceable": rep,
751
- "library": lib,
752
- "libraries": libraries,
753
- "api": api,
754
- "confidence": conf,
755
- }
756
- if isinstance(apis, list):
757
- result_obj["apis"] = apis
758
- if notes:
759
- result_obj["notes"] = notes
760
-
761
- # 成功获取结果,返回
762
- if attempt > 1:
763
- typer.secho(
764
- f"[c2rust-library] 第 {attempt} 次尝试成功获取评估结果",
765
- fg=typer.colors.GREEN,
766
- err=True,
767
- )
768
- return result_obj
769
-
770
- except Exception as e:
771
- # LLM调用异常,记录并准备重试
772
- last_parse_error = f"LLM调用异常: {str(e)}"
773
- typer.secho(
774
- f"[c2rust-library] 第 {attempt}/{MAX_LLM_RETRIES} 次尝试:LLM评估失败: {e}",
775
- fg=typer.colors.YELLOW,
776
- err=True,
777
- )
778
- if attempt < MAX_LLM_RETRIES:
779
- continue # 继续重试
780
- else:
781
- # 最后一次尝试也失败,返回默认值
782
- typer.secho(
783
- f"[c2rust-library] 重试 {MAX_LLM_RETRIES} 次后LLM评估仍然失败: {e},视为不可替代",
784
- fg=typer.colors.YELLOW,
785
- err=True,
786
- )
787
- return {"replaceable": False}
788
-
789
- # 理论上不会到达这里,但作为保险
790
- return {"replaceable": False}
791
-
792
-
793
- def _is_entry_function(
794
- rec_meta: Dict[str, Any],
795
- ) -> bool:
796
- """判断是否为入口函数"""
797
- nm = str(rec_meta.get("name") or "")
798
- qn = str(rec_meta.get("qualified_name") or "")
799
- # Configurable entry detection (avoid hard-coding 'main'):
800
- # Honor env vars: JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS / JARVIS_C2RUST_DELAY_ENTRIES / C2RUST_DELAY_ENTRIES
801
- import os
802
- entries_env = os.environ.get("JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS") or \
803
- os.environ.get("JARVIS_C2RUST_DELAY_ENTRIES") or \
804
- os.environ.get("C2RUST_DELAY_ENTRIES") or ""
805
- entries_set = set()
806
- if entries_env:
807
- try:
808
- import re as _re
809
- parts = _re.split(r"[,\s;]+", entries_env.strip())
810
- except Exception:
811
- parts = [p.strip() for p in entries_env.replace(";", ",").split(",")]
812
- entries_set = {p.strip().lower() for p in parts if p and p.strip()}
813
- if entries_set:
814
- is_entry = (nm.lower() in entries_set) or (qn.lower() in entries_set) or any(qn.lower().endswith(f"::{e}") for e in entries_set)
815
- else:
816
- is_entry = (nm.lower() == "main") or (qn.lower() == "main") or qn.lower().endswith("::main")
817
- return is_entry
818
-
819
-
820
- def _write_output_symbols(
821
- all_records: List[Dict[str, Any]],
822
- pruned_funcs: Set[int],
823
- selected_roots: List[Tuple[int, Dict[str, Any]]],
824
- out_symbols_path: Path,
825
- out_symbols_prune_path: Path,
826
- ) -> List[Dict[str, Any]]:
827
- """写出新符号表,返回替代映射列表"""
828
- now_ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
829
- kept_ids: Set[int] = set()
830
- for rec in all_records:
831
- fid = int(rec.get("id"))
832
- cat = rec.get("category") or ""
833
- if cat == "function":
834
- if fid in pruned_funcs:
835
- continue
836
- kept_ids.add(fid)
837
- else:
838
- kept_ids.add(fid)
839
-
840
- sel_root_ids = set(fid for fid, _ in selected_roots)
841
- replacements: List[Dict[str, Any]] = []
842
-
843
- with open(out_symbols_path, "w", encoding="utf-8") as fo, \
844
- open(out_symbols_prune_path, "w", encoding="utf-8") as fo2:
845
-
846
- for rec in all_records:
847
- fid = int(rec.get("id"))
848
- if fid not in kept_ids:
849
- continue
850
-
851
- rec_out = dict(rec)
852
- if (rec.get("category") or "") == "function" and fid in sel_root_ids:
853
- # 以库级替代为语义:不要求具体 API;将根 ref 设置为库占位符(支持多库组合)
854
- conf = 0.0
855
- api = ""
856
- apis = None
857
- libraries_out: List[str] = []
858
- notes_out: str = ""
859
- lib_single: str = ""
860
- for rf, rres in selected_roots:
861
- if rf == fid:
862
- api = str(rres.get("api") or rres.get("function") or "")
863
- apis = rres.get("apis")
864
- libs_val = rres.get("libraries")
865
- if isinstance(libs_val, list):
866
- libraries_out = [str(x) for x in libs_val if str(x)]
867
- lib_single = str(rres.get("library") or "").strip()
868
- try:
869
- conf = float(rres.get("confidence") or 0.0)
870
- except Exception:
871
- conf = 0.0
872
- notes_val = rres.get("notes")
873
- if isinstance(notes_val, str):
874
- notes_out = notes_val
875
- break
876
- # 若 libraries 存在则使用多库占位;否则若存在单个 library 字段则使用之;否则置空
877
- if libraries_out:
878
- lib_markers = [f"lib::{lb}" for lb in libraries_out]
879
- elif lib_single:
880
- lib_markers = [f"lib::{lib_single}"]
881
- else:
882
- lib_markers = []
883
- rec_out["ref"] = lib_markers
884
- try:
885
- rec_out["updated_at"] = now_ts
886
- except Exception:
887
- pass
888
- # 保存库替代元数据到符号表,供后续转译阶段作为上下文使用
889
- try:
890
- meta_apis = apis if isinstance(apis, list) else ([api] if api else [])
891
- lib_primary = libraries_out[0] if libraries_out else lib_single
892
- rec_out["lib_replacement"] = {
893
- "libraries": libraries_out,
894
- "library": lib_primary or "",
895
- "apis": meta_apis,
896
- "api": api,
897
- "confidence": float(conf) if isinstance(conf, (int, float)) else 0.0,
898
- "notes": notes_out,
899
- "mode": "llm",
900
- "updated_at": now_ts,
901
- }
902
- except Exception:
903
- # 忽略写入元数据失败,不阻塞主流程
904
- pass
905
- rep_obj: Dict[str, Any] = {
906
- "id": fid,
907
- "name": rec.get("name") or "",
908
- "qualified_name": rec.get("qualified_name") or "",
909
- "library": (libraries_out[0] if libraries_out else lib_single),
910
- "libraries": libraries_out,
911
- "function": api,
912
- "confidence": conf,
913
- "mode": "llm",
914
- }
915
- if isinstance(apis, list):
916
- rep_obj["apis"] = apis
917
- if notes_out:
918
- rep_obj["notes"] = notes_out
919
- replacements.append(rep_obj)
920
-
921
- line = json.dumps(rec_out, ensure_ascii=False) + "\n"
922
- fo.write(line)
923
- fo2.write(line)
924
- # 不覆写 symbols.jsonl(保留原始扫描/整理结果作为基线)
925
-
926
- return replacements
34
+ from typing import Any
35
+ from typing import Dict
36
+ from typing import List
37
+ from typing import Optional
38
+ from typing import Set
39
+ from typing import Tuple
40
+
41
+ from jarvis.jarvis_utils.output import PrettyOutput
42
+ from jarvis.jarvis_c2rust.constants import DEFAULT_CHECKPOINT_FILE
43
+ from jarvis.jarvis_c2rust.constants import DEFAULT_CHECKPOINT_INTERVAL
44
+ from jarvis.jarvis_c2rust.constants import MAX_NOTES_DISPLAY_LENGTH
45
+ from jarvis.jarvis_c2rust.library_replacer_checkpoint import create_checkpoint_state
46
+ from jarvis.jarvis_c2rust.library_replacer_checkpoint import load_checkpoint_if_match
47
+ from jarvis.jarvis_c2rust.library_replacer_checkpoint import make_checkpoint_key
48
+ from jarvis.jarvis_c2rust.library_replacer_checkpoint import periodic_checkpoint_save
49
+ from jarvis.jarvis_c2rust.library_replacer_llm import check_llm_availability
50
+ from jarvis.jarvis_c2rust.library_replacer_llm import create_llm_model
51
+ from jarvis.jarvis_c2rust.library_replacer_llm import llm_evaluate_subtree
52
+ from jarvis.jarvis_c2rust.library_replacer_loader import build_evaluation_order
53
+ from jarvis.jarvis_c2rust.library_replacer_loader import build_function_graph
54
+ from jarvis.jarvis_c2rust.library_replacer_loader import collect_descendants
55
+ from jarvis.jarvis_c2rust.library_replacer_loader import load_symbols
56
+ from jarvis.jarvis_c2rust.library_replacer_loader import process_candidate_scope
57
+ from jarvis.jarvis_c2rust.library_replacer_output import write_output_symbols
58
+ from jarvis.jarvis_c2rust.library_replacer_utils import is_entry_function
59
+ from jarvis.jarvis_c2rust.library_replacer_utils import load_additional_notes
60
+ from jarvis.jarvis_c2rust.library_replacer_utils import normalize_disabled_libraries
61
+ from jarvis.jarvis_c2rust.library_replacer_utils import resolve_symbols_jsonl_path
62
+ from jarvis.jarvis_c2rust.library_replacer_utils import setup_output_paths
63
+ from jarvis.jarvis_c2rust.scanner import compute_translation_order_jsonl
927
64
 
928
65
 
929
66
  def apply_library_replacement(
@@ -955,128 +92,140 @@ def apply_library_replacement(
955
92
  返回:
956
93
  Dict[str, Path]: {"symbols": 新符号表路径, "mapping": 替代映射路径, "symbols_prune": 兼容符号表路径, "order": 通用顺序路径, "order_prune": 剪枝阶段顺序路径}
957
94
  """
958
- sjsonl = _resolve_symbols_jsonl_path(db_path)
95
+ sjsonl = resolve_symbols_jsonl_path(db_path)
959
96
  if not sjsonl.exists():
960
97
  raise FileNotFoundError(f"未找到 symbols.jsonl: {sjsonl}")
961
98
 
962
99
  data_dir = sjsonl.parent
963
- out_symbols_path, out_mapping_path, out_symbols_prune_path, order_prune_path, alias_order_path = _setup_output_paths(
964
- data_dir, out_symbols_path, out_mapping_path
965
- )
100
+ (
101
+ out_symbols_path,
102
+ out_mapping_path,
103
+ out_symbols_prune_path,
104
+ order_prune_path,
105
+ alias_order_path,
106
+ ) = setup_output_paths(data_dir, out_symbols_path, out_mapping_path)
966
107
 
967
108
  # Checkpoint 默认路径
968
109
  if checkpoint_path is None:
969
110
  checkpoint_path = data_dir / DEFAULT_CHECKPOINT_FILE
970
111
 
971
112
  # 读取符号
972
- all_records, by_id, name_to_id, func_ids, id_refs_names = _load_symbols(sjsonl)
113
+ all_records, by_id, name_to_id, func_ids, id_refs_names = load_symbols(sjsonl)
973
114
 
974
115
  # 构造函数内边(id→id)
975
- adj_func = _build_function_graph(func_ids, id_refs_names, name_to_id)
116
+ adj_func = build_function_graph(func_ids, id_refs_names, name_to_id)
976
117
 
977
118
  # 构建评估顺序
978
- root_funcs = _build_evaluation_order(sjsonl, func_ids, adj_func)
119
+ root_funcs = build_evaluation_order(sjsonl, func_ids, adj_func)
979
120
 
980
121
  # 可达缓存(需在 candidates 使用前定义,避免前向引用)
981
122
  desc_cache: Dict[int, Set[int]] = {}
982
123
 
983
124
  # 如果传入 candidates,则仅评估这些节点(按上面的顺序过滤),并限定作用域:
984
125
  # - 仅保留从这些根可达的函数;对不可达函数直接删除(类型记录保留)
985
- root_funcs, scope_unreachable_funcs = _process_candidate_scope(
126
+ root_funcs, scope_unreachable_funcs = process_candidate_scope(
986
127
  candidates, all_records, root_funcs, func_ids, adj_func, desc_cache
987
128
  )
988
129
 
989
130
  # LLM 可用性
990
- _model_available, PlatformRegistry, get_normal_platform_name, get_normal_model_name = _check_llm_availability()
131
+ (
132
+ model_available,
133
+ PlatformRegistry,
134
+ get_smart_platform_name,
135
+ get_smart_model_name,
136
+ ) = check_llm_availability()
991
137
 
992
138
  # 预处理禁用库
993
- disabled_norm, disabled_display = _normalize_disabled_libraries(disabled_libraries)
139
+ disabled_norm, disabled_display = normalize_disabled_libraries(disabled_libraries)
994
140
 
995
141
  # 读取附加说明
996
- additional_notes = _load_additional_notes(data_dir)
142
+ additional_notes = load_additional_notes(data_dir)
997
143
 
998
144
  # 断点恢复支持:工具函数与关键键构造
999
- ckpt_path: Path = Path(checkpoint_path) if checkpoint_path is not None else (data_dir / DEFAULT_CHECKPOINT_FILE)
1000
- checkpoint_key = _make_checkpoint_key(sjsonl, library_name, llm_group, candidates, disabled_libraries, max_funcs)
145
+ ckpt_path: Path = (
146
+ Path(checkpoint_path)
147
+ if checkpoint_path is not None
148
+ else (data_dir / DEFAULT_CHECKPOINT_FILE)
149
+ )
150
+ checkpoint_key = make_checkpoint_key(
151
+ sjsonl, library_name, llm_group, candidates, disabled_libraries, max_funcs
152
+ )
1001
153
 
1002
- def _new_model() -> Optional[Any]:
1003
- return _create_llm_model(llm_group, disabled_display, _model_available, PlatformRegistry, get_normal_platform_name, get_normal_model_name)
154
+ def new_model() -> Optional[Any]:
155
+ return create_llm_model(
156
+ llm_group,
157
+ disabled_display,
158
+ model_available,
159
+ PlatformRegistry,
160
+ get_smart_platform_name,
161
+ get_smart_model_name,
162
+ )
1004
163
 
1005
164
  # 评估阶段:若某节点评估不可替代,则继续评估其子节点(递归/深度优先)
1006
165
  eval_counter = 0
1007
166
  pruned_dynamic: Set[int] = set() # 动态累计的"将被剪除"的函数集合(不含选中根)
1008
- selected_roots: List[Tuple[int, Dict[str, Any]]] = [] # 实时选中的可替代根(fid, LLM结果)
167
+ selected_roots: List[
168
+ Tuple[int, Dict[str, Any]]
169
+ ] = [] # 实时选中的可替代根(fid, LLM结果)
1009
170
  processed_roots: Set[int] = set() # 已处理(评估或跳过)的根集合
1010
171
  root_funcs_processed: Set[int] = set() # 已处理的初始根函数集合(用于进度显示)
1011
172
  last_ckpt_saved = 0 # 上次保存的计数
1012
173
 
1013
174
  # 若存在匹配的断点文件,则加载恢复
1014
- _loaded_ckpt = _load_checkpoint_if_match(ckpt_path, resume, checkpoint_key)
1015
- if resume and _loaded_ckpt:
175
+ loaded_ckpt = load_checkpoint_if_match(ckpt_path, resume, checkpoint_key)
176
+ if resume and loaded_ckpt:
1016
177
  try:
1017
- eval_counter = int(_loaded_ckpt.get("eval_counter") or 0)
178
+ eval_counter = int(loaded_ckpt.get("eval_counter") or 0)
1018
179
  except Exception:
1019
180
  pass
1020
181
  try:
1021
- processed_roots = set(int(x) for x in (_loaded_ckpt.get("processed_roots") or []))
182
+ processed_roots = set(
183
+ int(x) for x in (loaded_ckpt.get("processed_roots") or [])
184
+ )
1022
185
  except Exception:
1023
186
  processed_roots = set()
1024
187
  try:
1025
- pruned_dynamic = set(int(x) for x in (_loaded_ckpt.get("pruned_dynamic") or []))
188
+ pruned_dynamic = set(
189
+ int(x) for x in (loaded_ckpt.get("pruned_dynamic") or [])
190
+ )
1026
191
  except Exception:
1027
192
  pruned_dynamic = set()
1028
193
  try:
1029
194
  sr_list = []
1030
- for it in (_loaded_ckpt.get("selected_roots") or []):
195
+ for it in loaded_ckpt.get("selected_roots") or []:
1031
196
  if isinstance(it, dict) and "fid" in it and "res" in it:
1032
- sr_list.append((int(it["fid"]), it["res"]))
197
+ try:
198
+ fid_val = int(it["fid"])
199
+ res_val = it["res"]
200
+ if isinstance(res_val, dict):
201
+ sr_list.append((fid_val, res_val))
202
+ except (ValueError, TypeError, KeyError):
203
+ continue
1033
204
  selected_roots = sr_list
1034
- except Exception:
205
+ if selected_roots:
206
+ PrettyOutput.auto_print(
207
+ f"[c2rust-library] 从断点恢复 selected_roots: {len(selected_roots)} 个替代根"
208
+ )
209
+ else:
210
+ PrettyOutput.auto_print(
211
+ "[c2rust-library] 警告: 从断点恢复时 selected_roots 为空,可能导致 library_replacements.jsonl 为空"
212
+ )
213
+ except Exception as e:
1035
214
  selected_roots = []
215
+ PrettyOutput.auto_print(
216
+ f"[c2rust-library] 从断点恢复 selected_roots 时出错: {e},将使用空列表"
217
+ )
1036
218
  # 恢复已处理的初始根函数集合(从 processed_roots 中筛选出在 root_funcs 中的)
1037
219
  try:
1038
220
  root_funcs_processed = {fid for fid in processed_roots if fid in root_funcs}
1039
221
  except Exception:
1040
222
  root_funcs_processed = set()
1041
- typer.secho(
1042
- f"[c2rust-library] 已从断点恢复: 已评估={eval_counter}, 已处理根={len(processed_roots)}, 已剪除={len(pruned_dynamic)}, 已选中替代根={len(selected_roots)}",
1043
- fg=typer.colors.YELLOW,
1044
- err=True,
223
+ PrettyOutput.auto_print(
224
+ f"[c2rust-library] 已从断点恢复: 已评估={eval_counter}, 已处理根={len(processed_roots)}, 已剪除={len(pruned_dynamic)}, 已选中替代根={len(selected_roots)}"
1045
225
  )
1046
226
 
1047
- def _current_checkpoint_state() -> Dict[str, Any]:
1048
- try:
1049
- ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
1050
- except Exception:
1051
- ts = ""
1052
- return {
1053
- "key": checkpoint_key,
1054
- "eval_counter": eval_counter,
1055
- "processed_roots": sorted(list(processed_roots)),
1056
- "pruned_dynamic": sorted(list(pruned_dynamic)),
1057
- "selected_roots": [{"fid": fid, "res": res} for fid, res in selected_roots],
1058
- "timestamp": ts,
1059
- }
1060
-
1061
- def _periodic_checkpoint_save(force: bool = False) -> None:
1062
- nonlocal last_ckpt_saved
1063
- if not resume:
1064
- return
1065
- try:
1066
- interval = int(checkpoint_interval)
1067
- except Exception:
1068
- interval = DEFAULT_CHECKPOINT_INTERVAL
1069
- need_save = force or (interval <= 0) or ((eval_counter - last_ckpt_saved) >= interval)
1070
- if not need_save:
1071
- return
1072
- try:
1073
- _atomic_write(ckpt_path, json.dumps(_current_checkpoint_state(), ensure_ascii=False, indent=JSON_INDENT))
1074
- last_ckpt_saved = eval_counter
1075
- except Exception:
1076
- pass
1077
-
1078
- def _evaluate_node(fid: int, is_root_func: bool = False) -> None:
1079
- nonlocal eval_counter
227
+ def evaluate_node(fid: int, is_root_func: bool = False) -> None:
228
+ nonlocal eval_counter, last_ckpt_saved
1080
229
  # 限流
1081
230
  if max_funcs is not None and eval_counter >= max_funcs:
1082
231
  return
@@ -1085,7 +234,7 @@ def apply_library_replacement(
1085
234
  return
1086
235
 
1087
236
  # 构造子树并打印进度
1088
- desc = _collect_descendants(fid, adj_func, desc_cache)
237
+ desc = collect_descendants(fid, adj_func, desc_cache)
1089
238
  rec_meta = by_id.get(fid, {})
1090
239
  label = rec_meta.get("qualified_name") or rec_meta.get("name") or f"sym_{fid}"
1091
240
  # 计算进度:区分初始根函数和递归评估的子节点
@@ -1094,59 +243,94 @@ def apply_library_replacement(
1094
243
  if is_root_func:
1095
244
  # 初始根函数:显示 (当前根函数索引/总根函数数)
1096
245
  root_progress = len(root_funcs_processed) + 1
1097
- progress_info = f"({root_progress}/{total_roots})" if total_roots > 0 else ""
246
+ progress_info = (
247
+ f"({root_progress}/{total_roots})" if total_roots > 0 else ""
248
+ )
1098
249
  else:
1099
250
  # 递归评估的子节点:显示 (当前根函数索引/总根函数数, 总评估节点数)
1100
251
  root_progress = len(root_funcs_processed)
1101
252
  if total_roots > 0:
1102
- progress_info = f"({root_progress}/{total_roots}, 总评估={total_evaluated})"
253
+ progress_info = (
254
+ f"({root_progress}/{total_roots}, 总评估={total_evaluated})"
255
+ )
1103
256
  else:
1104
257
  progress_info = f"(总评估={total_evaluated})"
1105
- typer.secho(
1106
- f"[c2rust-library] {progress_info} 正在评估: {label} (ID: {fid}), 子树函数数={len(desc)}",
1107
- fg=typer.colors.CYAN,
1108
- err=True,
258
+ PrettyOutput.auto_print(
259
+ f"[c2rust-library] {progress_info} 正在评估: {label} (ID: {fid}), 子树函数数={len(desc)}"
1109
260
  )
1110
261
 
1111
262
  # 执行 LLM 评估
1112
- res = _llm_evaluate_subtree(
1113
- fid, desc, by_id, adj_func, disabled_norm, disabled_display,
1114
- _model_available, _new_model, additional_notes
263
+ res = llm_evaluate_subtree(
264
+ fid,
265
+ desc,
266
+ by_id,
267
+ adj_func,
268
+ disabled_norm,
269
+ disabled_display,
270
+ model_available,
271
+ new_model,
272
+ additional_notes,
1115
273
  )
1116
274
  eval_counter += 1
1117
275
  processed_roots.add(fid)
1118
276
  if is_root_func:
1119
277
  root_funcs_processed.add(fid)
1120
278
  res["mode"] = "llm"
1121
- _periodic_checkpoint_save()
279
+
280
+ # 更新检查点
281
+ checkpoint_state = create_checkpoint_state(
282
+ checkpoint_key,
283
+ eval_counter,
284
+ processed_roots,
285
+ pruned_dynamic,
286
+ selected_roots,
287
+ )
288
+ last_ckpt_saved = periodic_checkpoint_save(
289
+ ckpt_path,
290
+ checkpoint_state,
291
+ eval_counter,
292
+ last_ckpt_saved,
293
+ checkpoint_interval,
294
+ resume,
295
+ )
1122
296
 
1123
297
  # 若可替代,打印评估结果摘要(库/参考API/置信度/备注),并即时标记子孙剪除与后续跳过
1124
298
  try:
1125
299
  if res.get("replaceable") is True:
1126
- libs = res.get("libraries") or ([res.get("library")] if res.get("library") else [])
300
+ libs = res.get("libraries") or (
301
+ [res.get("library")] if res.get("library") else []
302
+ )
1127
303
  libs = [str(x) for x in libs if str(x)]
1128
304
  api = str(res.get("api") or "")
1129
305
  apis = res.get("apis")
1130
306
  notes = str(res.get("notes") or "")
1131
307
  conf = res.get("confidence")
1132
308
  try:
1133
- conf = float(conf)
309
+ conf = float(conf) if conf is not None else 0.0
1134
310
  except Exception:
1135
311
  conf = 0.0
1136
312
  libs_str = ", ".join(libs) if libs else "(未指定库)"
1137
- apis_str = ", ".join([str(a) for a in apis]) if isinstance(apis, list) else (api if api else "")
313
+ apis_str = (
314
+ ", ".join([str(a) for a in apis])
315
+ if isinstance(apis, list)
316
+ else (api if api else "")
317
+ )
1138
318
  # 计算进度:区分初始根函数和递归评估的子节点
1139
319
  total_roots = len(root_funcs)
1140
320
  if is_root_func:
1141
321
  # 初始根函数:显示 (当前根函数索引/总根函数数)
1142
322
  root_progress = len(root_funcs_processed)
1143
- progress_info = f"({root_progress}/{total_roots})" if total_roots > 0 else ""
323
+ progress_info = (
324
+ f"({root_progress}/{total_roots})" if total_roots > 0 else ""
325
+ )
1144
326
  else:
1145
327
  # 递归评估的子节点:显示 (当前根函数索引/总根函数数, 总评估节点数)
1146
328
  root_progress = len(root_funcs_processed)
1147
329
  total_evaluated = len(processed_roots)
1148
330
  if total_roots > 0:
1149
- progress_info = f"({root_progress}/{total_roots}, 总评估={total_evaluated})"
331
+ progress_info = (
332
+ f"({root_progress}/{total_roots}, 总评估={total_evaluated})"
333
+ )
1150
334
  else:
1151
335
  progress_info = f"(总评估={total_evaluated})"
1152
336
  msg = f"[c2rust-library] {progress_info} 可替换: {label} -> 库: {libs_str}"
@@ -1155,41 +339,68 @@ def apply_library_replacement(
1155
339
  msg += f"; 置信度: {conf:.2f}"
1156
340
  if notes:
1157
341
  msg += f"; 备注: {notes[:MAX_NOTES_DISPLAY_LENGTH]}"
1158
- typer.secho(msg, fg=typer.colors.GREEN, err=True)
1159
-
1160
- # 入口函数保护:不替代 main(保留进行转译),改为深入评估其子节点
1161
- if _is_entry_function(rec_meta):
1162
- typer.secho(
1163
- "[c2rust-library] 入口函数保护:跳过对 main 的库替代,继续评估其子节点。",
1164
- fg=typer.colors.YELLOW,
1165
- err=True,
342
+ PrettyOutput.auto_print(msg)
343
+
344
+ # 如果节点可替代,无论是否最终替代(如入口函数保护),都不评估其子节点
345
+ # 入口函数保护:不替代 main(保留进行转译),但需要剪除其子节点(因为功能可由库实现)
346
+ # 即时剪枝(不含根):无论是否为入口函数,只要可替代就剪除子节点
347
+ to_prune = set(desc)
348
+ to_prune.discard(fid)
349
+
350
+ newly = len(to_prune - pruned_dynamic)
351
+ pruned_dynamic.update(to_prune)
352
+
353
+ # 标记是否为入口函数,用于后续输出阶段判断是否修改 ref 字段
354
+ is_entry = is_entry_function(rec_meta)
355
+ if is_entry:
356
+ res["is_entry_function"] = True
357
+ PrettyOutput.auto_print(
358
+ f"[c2rust-library] 入口函数保护:{label} 保留转译(不修改 ref),但剪除其子节点(功能可由库实现)。"
359
+ f"替代信息将记录到 library_replacements.jsonl 供转译参考。"
1166
360
  )
1167
- for ch in adj_func.get(fid, []):
1168
- _evaluate_node(ch, is_root_func=False)
1169
361
  else:
1170
- # 即时剪枝(不含根)
1171
- to_prune = set(desc)
1172
- to_prune.discard(fid)
362
+ res["is_entry_function"] = False
363
+
364
+ # 无论是否为入口函数,都添加到 selected_roots(入口函数的替代信息需要记录供转译参考)
365
+ selected_roots.append((fid, res))
366
+
367
+ # 更新检查点
368
+ checkpoint_state = create_checkpoint_state(
369
+ checkpoint_key,
370
+ eval_counter,
371
+ processed_roots,
372
+ pruned_dynamic,
373
+ selected_roots,
374
+ )
375
+ last_ckpt_saved = periodic_checkpoint_save(
376
+ ckpt_path,
377
+ checkpoint_state,
378
+ eval_counter,
379
+ last_ckpt_saved,
380
+ checkpoint_interval,
381
+ resume,
382
+ )
1173
383
 
1174
- newly = len(to_prune - pruned_dynamic)
1175
- pruned_dynamic.update(to_prune)
1176
- selected_roots.append((fid, res))
1177
- _periodic_checkpoint_save()
1178
- typer.secho(
1179
- f"[c2rust-library] 即时标记剪除子节点(本次新增): +{newly} 个 (累计={len(pruned_dynamic)})",
1180
- fg=typer.colors.MAGENTA,
1181
- err=True,
1182
- )
384
+ PrettyOutput.auto_print(
385
+ f"[c2rust-library] 即时标记剪除子节点(本次新增): +{newly} 个 (累计={len(pruned_dynamic)})"
386
+ )
387
+ # 注意:无论是否入口函数,只要 replaceable 为 True,都不评估子节点
1183
388
  else:
1184
389
  # 若不可替代,继续评估其子节点(深度优先)
1185
390
  for ch in adj_func.get(fid, []):
1186
- _evaluate_node(ch, is_root_func=False)
1187
- except Exception:
1188
- pass
391
+ evaluate_node(ch, is_root_func=False)
392
+ except Exception as e:
393
+ PrettyOutput.auto_print(
394
+ f"[c2rust-library] 评估节点 {fid} ({label}) 时出错: {e}"
395
+ )
396
+ # 即使出错,也标记为已处理,避免无限循环
397
+ processed_roots.add(fid)
398
+ if is_root_func:
399
+ root_funcs_processed.add(fid)
1189
400
 
1190
401
  # 对每个候选根进行评估;若根不可替代将递归评估其子节点
1191
402
  for fid in root_funcs:
1192
- _evaluate_node(fid, is_root_func=True)
403
+ evaluate_node(fid, is_root_func=True)
1193
404
 
1194
405
  # 剪枝集合来自动态评估阶段的累计结果
1195
406
  pruned_funcs: Set[int] = set(pruned_dynamic)
@@ -1200,34 +411,56 @@ def apply_library_replacement(
1200
411
  pass
1201
412
 
1202
413
  # 写出新符号表
1203
- replacements = _write_output_symbols(
1204
- all_records, pruned_funcs, selected_roots,
1205
- out_symbols_path, out_symbols_prune_path
414
+ replacements = write_output_symbols(
415
+ all_records,
416
+ pruned_funcs,
417
+ selected_roots,
418
+ out_symbols_path,
419
+ out_symbols_prune_path,
1206
420
  )
1207
421
 
1208
422
  # 写出替代映射
1209
423
  with open(out_mapping_path, "w", encoding="utf-8") as fm:
1210
- for m in replacements:
1211
- fm.write(json.dumps(m, ensure_ascii=False) + "\n")
424
+ if replacements:
425
+ for m in replacements:
426
+ fm.write(json.dumps(m, ensure_ascii=False) + "\n")
427
+ else:
428
+ # 即使没有替代项,也记录统计信息,帮助调试
429
+ summary = {
430
+ "summary": {
431
+ "total_evaluated": eval_counter,
432
+ "total_processed_roots": len(processed_roots),
433
+ "total_selected_roots": len(selected_roots),
434
+ "total_pruned_funcs": len(pruned_funcs),
435
+ "note": "没有找到可替代的函数。可能原因:1) 所有函数都不可替代;2) 所有可替代的函数都是入口函数(被保护);3) 从断点恢复时 selected_roots 为空。",
436
+ }
437
+ }
438
+ fm.write(json.dumps(summary, ensure_ascii=False) + "\n")
439
+ PrettyOutput.auto_print(
440
+ f"[c2rust-library] 警告: 没有找到可替代的函数,library_replacements.jsonl 仅包含统计信息。"
441
+ f"已评估={eval_counter}, 已处理根={len(processed_roots)}, 已选中替代根={len(selected_roots)}"
442
+ )
1212
443
 
1213
444
  # 生成转译顺序(剪枝阶段与别名)
1214
445
  order_path = None
1215
446
  try:
1216
- compute_translation_order_jsonl(Path(out_symbols_path), out_path=order_prune_path)
447
+ compute_translation_order_jsonl(
448
+ Path(out_symbols_path), out_path=order_prune_path
449
+ )
1217
450
  shutil.copy2(order_prune_path, alias_order_path)
1218
451
  order_path = alias_order_path
1219
452
  except Exception as e:
1220
- typer.secho(f"[c2rust-library] 基于剪枝符号表生成翻译顺序失败: {e}", fg=typer.colors.YELLOW, err=True)
453
+ PrettyOutput.auto_print(f"[c2rust-library] 基于剪枝符号表生成翻译顺序失败: {e}")
1221
454
 
1222
455
  # 完成后清理断点(可选)
1223
456
  try:
1224
457
  if resume and clear_checkpoint_on_done and ckpt_path.exists():
1225
458
  ckpt_path.unlink()
1226
- typer.secho(f"[c2rust-library] 已清理断点文件: {ckpt_path}", fg=typer.colors.BLUE, err=True)
459
+ PrettyOutput.auto_print(f"[c2rust-library] 已清理断点文件: {ckpt_path}")
1227
460
  except Exception:
1228
461
  pass
1229
462
 
1230
- typer.secho(
463
+ PrettyOutput.auto_print(
1231
464
  "[c2rust-library] 库替代剪枝完成(LLM 子树评估):\n"
1232
465
  f"- 选中替代根: {len(selected_roots)} 个\n"
1233
466
  f"- 剪除函数: {len(pruned_funcs)} 个\n"
@@ -1235,8 +468,7 @@ def apply_library_replacement(
1235
468
  f"- 替代映射: {out_mapping_path}\n"
1236
469
  f"- 兼容符号表输出: {out_symbols_prune_path}\n"
1237
470
  + (f"- 转译顺序: {order_path}\n" if order_path else "")
1238
- + f"- 兼容顺序输出: {order_prune_path}",
1239
- fg=typer.colors.GREEN,
471
+ + f"- 兼容顺序输出: {order_prune_path}"
1240
472
  )
1241
473
 
1242
474
  result: Dict[str, Path] = {
@@ -1251,4 +483,4 @@ def apply_library_replacement(
1251
483
  return result
1252
484
 
1253
485
 
1254
- __all__ = ["apply_library_replacement"]
486
+ __all__ = ["apply_library_replacement"]