jarvis-ai-assistant 0.1.222__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +1143 -245
- jarvis/jarvis_agent/agent_manager.py +97 -0
- jarvis/jarvis_agent/builtin_input_handler.py +12 -10
- jarvis/jarvis_agent/config_editor.py +57 -0
- jarvis/jarvis_agent/edit_file_handler.py +392 -99
- jarvis/jarvis_agent/event_bus.py +48 -0
- jarvis/jarvis_agent/events.py +157 -0
- jarvis/jarvis_agent/file_context_handler.py +79 -0
- jarvis/jarvis_agent/file_methodology_manager.py +117 -0
- jarvis/jarvis_agent/jarvis.py +1117 -147
- jarvis/jarvis_agent/main.py +78 -34
- jarvis/jarvis_agent/memory_manager.py +195 -0
- jarvis/jarvis_agent/methodology_share_manager.py +174 -0
- jarvis/jarvis_agent/prompt_manager.py +82 -0
- jarvis/jarvis_agent/prompts.py +46 -9
- jarvis/jarvis_agent/protocols.py +4 -1
- jarvis/jarvis_agent/rewrite_file_handler.py +141 -0
- jarvis/jarvis_agent/run_loop.py +146 -0
- jarvis/jarvis_agent/session_manager.py +9 -9
- jarvis/jarvis_agent/share_manager.py +228 -0
- jarvis/jarvis_agent/shell_input_handler.py +23 -3
- jarvis/jarvis_agent/stdio_redirect.py +295 -0
- jarvis/jarvis_agent/task_analyzer.py +212 -0
- jarvis/jarvis_agent/task_manager.py +154 -0
- jarvis/jarvis_agent/task_planner.py +496 -0
- jarvis/jarvis_agent/tool_executor.py +8 -4
- jarvis/jarvis_agent/tool_share_manager.py +139 -0
- jarvis/jarvis_agent/user_interaction.py +42 -0
- jarvis/jarvis_agent/utils.py +54 -0
- jarvis/jarvis_agent/web_bridge.py +189 -0
- jarvis/jarvis_agent/web_output_sink.py +53 -0
- jarvis/jarvis_agent/web_server.py +751 -0
- jarvis/jarvis_c2rust/__init__.py +26 -0
- jarvis/jarvis_c2rust/cli.py +613 -0
- jarvis/jarvis_c2rust/collector.py +258 -0
- jarvis/jarvis_c2rust/library_replacer.py +1122 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +1300 -0
- jarvis/jarvis_c2rust/optimizer.py +960 -0
- jarvis/jarvis_c2rust/scanner.py +1681 -0
- jarvis/jarvis_c2rust/transpiler.py +2325 -0
- jarvis/jarvis_code_agent/build_validation_config.py +133 -0
- jarvis/jarvis_code_agent/code_agent.py +1605 -178
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +62 -0
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +102 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +59 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +69 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +50 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +93 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +129 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +54 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +154 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +363 -0
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +89 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +31 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +231 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +183 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +219 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +209 -0
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +451 -0
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +77 -0
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +48 -0
- jarvis/jarvis_code_agent/lint.py +275 -13
- jarvis/jarvis_code_agent/utils.py +142 -0
- jarvis/jarvis_code_analysis/checklists/loader.py +20 -6
- jarvis/jarvis_code_analysis/code_review.py +583 -548
- jarvis/jarvis_data/config_schema.json +339 -28
- jarvis/jarvis_git_squash/main.py +22 -13
- jarvis/jarvis_git_utils/git_commiter.py +171 -55
- jarvis/jarvis_mcp/sse_mcp_client.py +22 -15
- jarvis/jarvis_mcp/stdio_mcp_client.py +4 -4
- jarvis/jarvis_mcp/streamable_mcp_client.py +36 -16
- jarvis/jarvis_memory_organizer/memory_organizer.py +753 -0
- jarvis/jarvis_methodology/main.py +48 -63
- jarvis/jarvis_multi_agent/__init__.py +302 -43
- jarvis/jarvis_multi_agent/main.py +70 -24
- jarvis/jarvis_platform/ai8.py +40 -23
- jarvis/jarvis_platform/base.py +210 -49
- jarvis/jarvis_platform/human.py +11 -1
- jarvis/jarvis_platform/kimi.py +82 -76
- jarvis/jarvis_platform/openai.py +73 -1
- jarvis/jarvis_platform/registry.py +8 -15
- jarvis/jarvis_platform/tongyi.py +115 -101
- jarvis/jarvis_platform/yuanbao.py +89 -63
- jarvis/jarvis_platform_manager/main.py +194 -132
- jarvis/jarvis_platform_manager/service.py +122 -86
- jarvis/jarvis_rag/cli.py +156 -53
- jarvis/jarvis_rag/embedding_manager.py +155 -12
- jarvis/jarvis_rag/llm_interface.py +10 -13
- jarvis/jarvis_rag/query_rewriter.py +63 -12
- jarvis/jarvis_rag/rag_pipeline.py +222 -40
- jarvis/jarvis_rag/reranker.py +26 -3
- jarvis/jarvis_rag/retriever.py +270 -14
- jarvis/jarvis_sec/__init__.py +3605 -0
- jarvis/jarvis_sec/checkers/__init__.py +32 -0
- jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
- jarvis/jarvis_sec/cli.py +116 -0
- jarvis/jarvis_sec/report.py +257 -0
- jarvis/jarvis_sec/status.py +264 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +219 -0
- jarvis/jarvis_smart_shell/main.py +405 -137
- jarvis/jarvis_stats/__init__.py +13 -0
- jarvis/jarvis_stats/cli.py +387 -0
- jarvis/jarvis_stats/stats.py +711 -0
- jarvis/jarvis_stats/storage.py +612 -0
- jarvis/jarvis_stats/visualizer.py +282 -0
- jarvis/jarvis_tools/ask_user.py +1 -0
- jarvis/jarvis_tools/base.py +18 -2
- jarvis/jarvis_tools/clear_memory.py +239 -0
- jarvis/jarvis_tools/cli/main.py +220 -144
- jarvis/jarvis_tools/execute_script.py +52 -12
- jarvis/jarvis_tools/file_analyzer.py +17 -12
- jarvis/jarvis_tools/generate_new_tool.py +46 -24
- jarvis/jarvis_tools/read_code.py +277 -18
- jarvis/jarvis_tools/read_symbols.py +141 -0
- jarvis/jarvis_tools/read_webpage.py +86 -13
- jarvis/jarvis_tools/registry.py +294 -90
- jarvis/jarvis_tools/retrieve_memory.py +227 -0
- jarvis/jarvis_tools/save_memory.py +194 -0
- jarvis/jarvis_tools/search_web.py +62 -28
- jarvis/jarvis_tools/sub_agent.py +205 -0
- jarvis/jarvis_tools/sub_code_agent.py +217 -0
- jarvis/jarvis_tools/virtual_tty.py +330 -62
- jarvis/jarvis_utils/builtin_replace_map.py +4 -5
- jarvis/jarvis_utils/clipboard.py +90 -0
- jarvis/jarvis_utils/config.py +607 -50
- jarvis/jarvis_utils/embedding.py +3 -0
- jarvis/jarvis_utils/fzf.py +57 -0
- jarvis/jarvis_utils/git_utils.py +251 -29
- jarvis/jarvis_utils/globals.py +174 -17
- jarvis/jarvis_utils/http.py +58 -79
- jarvis/jarvis_utils/input.py +899 -153
- jarvis/jarvis_utils/methodology.py +210 -83
- jarvis/jarvis_utils/output.py +220 -137
- jarvis/jarvis_utils/utils.py +1906 -135
- jarvis_ai_assistant-0.7.0.dist-info/METADATA +465 -0
- jarvis_ai_assistant-0.7.0.dist-info/RECORD +192 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/entry_points.txt +8 -2
- jarvis/jarvis_git_details/main.py +0 -265
- jarvis/jarvis_platform/oyi.py +0 -357
- jarvis/jarvis_tools/edit_file.py +0 -255
- jarvis/jarvis_tools/rewrite_file.py +0 -195
- jarvis_ai_assistant-0.1.222.dist-info/METADATA +0 -767
- jarvis_ai_assistant-0.1.222.dist-info/RECORD +0 -110
- /jarvis/{jarvis_git_details → jarvis_memory_organizer}/__init__.py +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1122 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Library-based dependency replacer for C→Rust migration (LLM-only subtree evaluation).
|
|
4
|
+
|
|
5
|
+
要点:
|
|
6
|
+
- 不依赖 pruner,仅复用 scanner 的通用工具函数
|
|
7
|
+
- 将“依赖子树(根函数及其可达的函数集合)”的摘要与局部源码片段提供给 LLM,由 LLM 评估该子树是否可由“指定标准库/第三方 crate 的一个或多个成熟 API(可组合,多库协同)”整体替代
|
|
8
|
+
- 若可替代:将根函数的 ref 替换为该库 API(以 lib::<name> 形式的占位符,支持多库组合),并删除其所有子孙函数节点(类型不受影响)
|
|
9
|
+
- 支持禁用库约束:可传入 disabled_libraries(list[str]),若 LLM 建议命中禁用库,则强制判定为不可替代并记录备注
|
|
10
|
+
- 断点恢复(checkpoint/resume):可启用 resume,使用 library_replacer_checkpoint.json 记录 eval_counter/processed/pruned/selected 等信息,基于关键输入组合键进行匹配恢复;落盘采用原子写以防损坏
|
|
11
|
+
- 主库字段回退策略:当存在 libraries 列表优先选择第一个作为 primary;否则回退到单一 library 字段;均为空则置空
|
|
12
|
+
- 入口保护:默认跳过 main(可通过环境变量 JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS/JARVIS_C2RUST_DELAY_ENTRIES/C2RUST_DELAY_ENTRIES 配置多个入口名)
|
|
13
|
+
|
|
14
|
+
输入数据:
|
|
15
|
+
- symbols.jsonl(或传入的 .jsonl 路径):由 scanner 生成的统一符号表,字段参见 scanner.py
|
|
16
|
+
- 可选 candidates(名称或限定名列表):仅评估这些符号作为根,作用域限定为其可达子树
|
|
17
|
+
- 可选 disabled_libraries(list[str]):评估时禁止使用的库名(命中则视为不可替代)
|
|
18
|
+
|
|
19
|
+
输出:
|
|
20
|
+
- symbols_library_pruned.jsonl:剪枝后的符号表(默认名,可通过参数自定义)
|
|
21
|
+
- library_replacements.jsonl:替代根到库信息的映射(JSONL,每行一个 {id,name,qualified_name,library,libraries,function,apis?,confidence,notes?,mode})
|
|
22
|
+
- 兼容输出:
|
|
23
|
+
- symbols_prune.jsonl:与主输出等价
|
|
24
|
+
- symbols.jsonl:通用别名(用于后续流程统一读取)
|
|
25
|
+
- translation_order_prune.jsonl:剪枝阶段的转译顺序
|
|
26
|
+
- translation_order.jsonl:通用别名(与剪枝阶段一致)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import shutil
|
|
33
|
+
import time
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
|
36
|
+
|
|
37
|
+
import typer
|
|
38
|
+
|
|
39
|
+
# 依赖:仅使用 scanner 的工具函数,避免循环导入
|
|
40
|
+
from jarvis.jarvis_c2rust.scanner import (
|
|
41
|
+
compute_translation_order_jsonl,
|
|
42
|
+
find_root_function_ids,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _resolve_symbols_jsonl_path(hint: Path) -> Path:
|
|
47
|
+
"""解析symbols.jsonl路径"""
|
|
48
|
+
p = Path(hint)
|
|
49
|
+
if p.is_file() and p.suffix.lower() == ".jsonl":
|
|
50
|
+
return p
|
|
51
|
+
if p.is_dir():
|
|
52
|
+
return p / ".jarvis" / "c2rust" / "symbols.jsonl"
|
|
53
|
+
return Path(".") / ".jarvis" / "c2rust" / "symbols.jsonl"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _setup_output_paths(
|
|
57
|
+
data_dir: Path,
|
|
58
|
+
out_symbols_path: Optional[Path],
|
|
59
|
+
out_mapping_path: Optional[Path],
|
|
60
|
+
) -> tuple[Path, Path, Path, Path, Path]:
|
|
61
|
+
"""设置输出路径,返回(符号表路径, 映射路径, 兼容符号表路径, 顺序路径, 别名顺序路径)"""
|
|
62
|
+
if out_symbols_path is None:
|
|
63
|
+
out_symbols_path = data_dir / "symbols_library_pruned.jsonl"
|
|
64
|
+
else:
|
|
65
|
+
out_symbols_path = Path(out_symbols_path)
|
|
66
|
+
if out_mapping_path is None:
|
|
67
|
+
out_mapping_path = data_dir / "library_replacements.jsonl"
|
|
68
|
+
else:
|
|
69
|
+
out_mapping_path = Path(out_mapping_path)
|
|
70
|
+
|
|
71
|
+
# 兼容输出
|
|
72
|
+
out_symbols_prune_path = data_dir / "symbols_prune.jsonl"
|
|
73
|
+
order_prune_path = data_dir / "translation_order_prune.jsonl"
|
|
74
|
+
alias_order_path = data_dir / "translation_order.jsonl"
|
|
75
|
+
|
|
76
|
+
return out_symbols_path, out_mapping_path, out_symbols_prune_path, order_prune_path, alias_order_path
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _load_symbols(sjsonl: Path) -> tuple[List[Dict[str, Any]], Dict[int, Dict[str, Any]], Dict[str, int], Set[int], Dict[int, List[str]]]:
|
|
80
|
+
"""加载符号表,返回(所有记录, id到记录映射, 名称到id映射, 函数id集合, id到引用名称映射)"""
|
|
81
|
+
all_records: List[Dict[str, Any]] = []
|
|
82
|
+
by_id: Dict[int, Dict[str, Any]] = {}
|
|
83
|
+
name_to_id: Dict[str, int] = {}
|
|
84
|
+
func_ids: Set[int] = set()
|
|
85
|
+
id_refs_names: Dict[int, List[str]] = {}
|
|
86
|
+
|
|
87
|
+
with open(sjsonl, "r", encoding="utf-8") as f:
|
|
88
|
+
idx = 0
|
|
89
|
+
for line in f:
|
|
90
|
+
line = line.strip()
|
|
91
|
+
if not line:
|
|
92
|
+
continue
|
|
93
|
+
try:
|
|
94
|
+
obj = json.loads(line)
|
|
95
|
+
except Exception:
|
|
96
|
+
continue
|
|
97
|
+
idx += 1
|
|
98
|
+
fid = int(obj.get("id") or idx)
|
|
99
|
+
obj["id"] = fid
|
|
100
|
+
nm = obj.get("name") or ""
|
|
101
|
+
qn = obj.get("qualified_name") or ""
|
|
102
|
+
cat = obj.get("category") or "" # "function" | "type"
|
|
103
|
+
refs = obj.get("ref")
|
|
104
|
+
if not isinstance(refs, list):
|
|
105
|
+
refs = []
|
|
106
|
+
refs = [r for r in refs if isinstance(r, str) and r]
|
|
107
|
+
|
|
108
|
+
all_records.append(obj)
|
|
109
|
+
by_id[fid] = obj
|
|
110
|
+
id_refs_names[fid] = refs
|
|
111
|
+
if nm:
|
|
112
|
+
name_to_id.setdefault(nm, fid)
|
|
113
|
+
if qn:
|
|
114
|
+
name_to_id.setdefault(qn, fid)
|
|
115
|
+
if cat == "function":
|
|
116
|
+
func_ids.add(fid)
|
|
117
|
+
|
|
118
|
+
return all_records, by_id, name_to_id, func_ids, id_refs_names
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _build_function_graph(
|
|
122
|
+
func_ids: Set[int],
|
|
123
|
+
id_refs_names: Dict[int, List[str]],
|
|
124
|
+
name_to_id: Dict[str, int],
|
|
125
|
+
) -> Dict[int, List[int]]:
|
|
126
|
+
"""构建函数依赖图,返回id到依赖id列表的映射"""
|
|
127
|
+
adj_func: Dict[int, List[int]] = {}
|
|
128
|
+
for fid in func_ids:
|
|
129
|
+
internal: List[int] = []
|
|
130
|
+
for target in id_refs_names.get(fid, []):
|
|
131
|
+
tid = name_to_id.get(target)
|
|
132
|
+
if tid is not None and tid in func_ids and tid != fid:
|
|
133
|
+
internal.append(tid)
|
|
134
|
+
try:
|
|
135
|
+
internal = list(dict.fromkeys(internal))
|
|
136
|
+
except Exception:
|
|
137
|
+
internal = sorted(list(set(internal)))
|
|
138
|
+
adj_func[fid] = internal
|
|
139
|
+
return adj_func
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _build_evaluation_order(
|
|
143
|
+
sjsonl: Path,
|
|
144
|
+
func_ids: Set[int],
|
|
145
|
+
adj_func: Dict[int, List[int]],
|
|
146
|
+
) -> List[int]:
|
|
147
|
+
"""构建评估顺序(广度优先,父先子后)"""
|
|
148
|
+
# 评估队列:从所有无入边函数作为种子开始,按层次遍历整个图,使"父先于子"被评估;
|
|
149
|
+
# 若不存在无入边节点(如强连通环),则回退为全量函数集合。
|
|
150
|
+
try:
|
|
151
|
+
roots_all = find_root_function_ids(sjsonl)
|
|
152
|
+
except Exception:
|
|
153
|
+
roots_all = []
|
|
154
|
+
seeds = [rid for rid in roots_all if rid in func_ids]
|
|
155
|
+
if not seeds:
|
|
156
|
+
seeds = sorted(list(func_ids))
|
|
157
|
+
|
|
158
|
+
visited: Set[int] = set()
|
|
159
|
+
order: List[int] = []
|
|
160
|
+
q: List[int] = list(seeds)
|
|
161
|
+
qi = 0
|
|
162
|
+
while qi < len(q):
|
|
163
|
+
u = q[qi]
|
|
164
|
+
qi += 1
|
|
165
|
+
if u in visited or u not in func_ids:
|
|
166
|
+
continue
|
|
167
|
+
visited.add(u)
|
|
168
|
+
order.append(u)
|
|
169
|
+
for v in adj_func.get(u, []):
|
|
170
|
+
if v not in visited and v in func_ids:
|
|
171
|
+
q.append(v)
|
|
172
|
+
# 若存在未覆盖的孤立/循环组件,补充其节点(确保每个函数节点都将被作为"候选根"参与评估)
|
|
173
|
+
if len(visited) < len(func_ids):
|
|
174
|
+
leftovers = [fid for fid in sorted(func_ids) if fid not in visited]
|
|
175
|
+
order.extend(leftovers)
|
|
176
|
+
|
|
177
|
+
return order
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _collect_descendants(
|
|
181
|
+
start: int,
|
|
182
|
+
adj_func: Dict[int, List[int]],
|
|
183
|
+
desc_cache: Dict[int, Set[int]],
|
|
184
|
+
) -> Set[int]:
|
|
185
|
+
"""收集从start开始的所有后代节点(使用缓存)"""
|
|
186
|
+
if start in desc_cache:
|
|
187
|
+
return desc_cache[start]
|
|
188
|
+
visited: Set[int] = set()
|
|
189
|
+
stack: List[int] = [start]
|
|
190
|
+
visited.add(start)
|
|
191
|
+
while stack:
|
|
192
|
+
u = stack.pop()
|
|
193
|
+
for v in adj_func.get(u, []):
|
|
194
|
+
if v not in visited:
|
|
195
|
+
visited.add(v)
|
|
196
|
+
stack.append(v)
|
|
197
|
+
desc_cache[start] = visited
|
|
198
|
+
return visited
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _process_candidate_scope(
|
|
202
|
+
candidates: Optional[List[str]],
|
|
203
|
+
all_records: List[Dict[str, Any]],
|
|
204
|
+
root_funcs: List[int],
|
|
205
|
+
func_ids: Set[int],
|
|
206
|
+
adj_func: Dict[int, List[int]],
|
|
207
|
+
desc_cache: Dict[int, Set[int]],
|
|
208
|
+
) -> tuple[List[int], Set[int]]:
|
|
209
|
+
"""处理候选根和作用域,返回(过滤后的根函数列表, 不可达函数集合)"""
|
|
210
|
+
scope_unreachable_funcs: Set[int] = set()
|
|
211
|
+
if not candidates:
|
|
212
|
+
return root_funcs, scope_unreachable_funcs
|
|
213
|
+
|
|
214
|
+
cand_ids: Set[int] = set()
|
|
215
|
+
# 支持重载:同名/同限定名可能对应多个函数ID,需全部纳入候选
|
|
216
|
+
key_set = set(candidates)
|
|
217
|
+
for rec in all_records:
|
|
218
|
+
if (rec.get("category") or "") != "function":
|
|
219
|
+
continue
|
|
220
|
+
nm = rec.get("name") or ""
|
|
221
|
+
qn = rec.get("qualified_name") or ""
|
|
222
|
+
if nm in key_set or qn in key_set:
|
|
223
|
+
try:
|
|
224
|
+
cand_ids.add(int(rec.get("id")))
|
|
225
|
+
except Exception:
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
if not cand_ids:
|
|
229
|
+
return root_funcs, scope_unreachable_funcs
|
|
230
|
+
|
|
231
|
+
filtered_roots = [rid for rid in root_funcs if rid in cand_ids]
|
|
232
|
+
# 计算从候选根出发的可达函数集合(含根)
|
|
233
|
+
reachable_all: Set[int] = set()
|
|
234
|
+
for rid in filtered_roots:
|
|
235
|
+
reachable_all.update(_collect_descendants(rid, adj_func, desc_cache))
|
|
236
|
+
# 不可达函数(仅限函数类别)将被直接删除
|
|
237
|
+
scope_unreachable_funcs = {fid for fid in func_ids if fid not in reachable_all}
|
|
238
|
+
if scope_unreachable_funcs:
|
|
239
|
+
typer.secho(
|
|
240
|
+
f"[c2rust-library] 根据根列表,标记不可达函数删除: {len(scope_unreachable_funcs)} 个",
|
|
241
|
+
fg=typer.colors.YELLOW,
|
|
242
|
+
err=True,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
return filtered_roots, scope_unreachable_funcs
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _read_source_snippet(rec: Dict[str, Any], max_lines: int = 200) -> str:
|
|
249
|
+
"""读取源码片段"""
|
|
250
|
+
path = rec.get("file") or ""
|
|
251
|
+
try:
|
|
252
|
+
if not path:
|
|
253
|
+
return ""
|
|
254
|
+
p = Path(path)
|
|
255
|
+
if not p.exists():
|
|
256
|
+
return ""
|
|
257
|
+
sl = int(rec.get("start_line") or 1)
|
|
258
|
+
el = int(rec.get("end_line") or sl)
|
|
259
|
+
if el < sl:
|
|
260
|
+
el = sl
|
|
261
|
+
lines = p.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
262
|
+
start_idx = max(sl - 1, 0)
|
|
263
|
+
end_idx = min(el, len(lines))
|
|
264
|
+
snippet_lines = lines[start_idx:end_idx]
|
|
265
|
+
if len(snippet_lines) > max_lines:
|
|
266
|
+
snippet_lines = snippet_lines[:max_lines]
|
|
267
|
+
return "\n".join(snippet_lines)
|
|
268
|
+
except Exception:
|
|
269
|
+
return ""
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _check_llm_availability() -> tuple[bool, Any, Any, Any]:
|
|
273
|
+
"""检查LLM可用性,返回(是否可用, PlatformRegistry, get_normal_platform_name, get_normal_model_name)"""
|
|
274
|
+
try:
|
|
275
|
+
from jarvis.jarvis_platform.registry import PlatformRegistry # type: ignore
|
|
276
|
+
from jarvis.jarvis_utils.config import get_normal_platform_name, get_normal_model_name # type: ignore
|
|
277
|
+
return True, PlatformRegistry, get_normal_platform_name, get_normal_model_name
|
|
278
|
+
except Exception:
|
|
279
|
+
return False, None, None, None
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _normalize_disabled_libraries(disabled_libraries: Optional[List[str]]) -> tuple[List[str], str]:
|
|
283
|
+
"""规范化禁用库列表,返回(规范化列表, 显示字符串)"""
|
|
284
|
+
disabled_norm: List[str] = []
|
|
285
|
+
disabled_display: str = ""
|
|
286
|
+
if isinstance(disabled_libraries, list):
|
|
287
|
+
disabled_norm = [str(x).strip().lower() for x in disabled_libraries if str(x).strip()]
|
|
288
|
+
disabled_display = ", ".join([str(x).strip() for x in disabled_libraries if str(x).strip()])
|
|
289
|
+
return disabled_norm, disabled_display
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _normalize_list(items: Optional[List[str]]) -> List[str]:
|
|
293
|
+
"""规范化列表,去重并排序"""
|
|
294
|
+
if not isinstance(items, list):
|
|
295
|
+
return []
|
|
296
|
+
vals: List[str] = []
|
|
297
|
+
for x in items:
|
|
298
|
+
try:
|
|
299
|
+
s = str(x).strip()
|
|
300
|
+
except Exception:
|
|
301
|
+
continue
|
|
302
|
+
if s:
|
|
303
|
+
vals.append(s)
|
|
304
|
+
try:
|
|
305
|
+
vals = list(dict.fromkeys(vals))
|
|
306
|
+
except Exception:
|
|
307
|
+
vals = sorted(set(vals))
|
|
308
|
+
return vals
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _normalize_list_lower(items: Optional[List[str]]) -> List[str]:
|
|
312
|
+
"""规范化列表并转为小写"""
|
|
313
|
+
return [s.lower() for s in _normalize_list(items)]
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _make_checkpoint_key(
|
|
317
|
+
sjsonl: Path,
|
|
318
|
+
library_name: str,
|
|
319
|
+
llm_group: Optional[str],
|
|
320
|
+
candidates: Optional[List[str]],
|
|
321
|
+
disabled_libraries: Optional[List[str]],
|
|
322
|
+
max_funcs: Optional[int],
|
|
323
|
+
) -> Dict[str, Any]:
|
|
324
|
+
"""构建检查点键"""
|
|
325
|
+
try:
|
|
326
|
+
abs_sym = str(Path(sjsonl).resolve())
|
|
327
|
+
except Exception:
|
|
328
|
+
abs_sym = str(sjsonl)
|
|
329
|
+
key: Dict[str, Any] = {
|
|
330
|
+
"symbols": abs_sym,
|
|
331
|
+
"library_name": str(library_name),
|
|
332
|
+
"llm_group": str(llm_group or ""),
|
|
333
|
+
"candidates": _normalize_list(candidates),
|
|
334
|
+
"disabled_libraries": _normalize_list_lower(disabled_libraries),
|
|
335
|
+
"max_funcs": (int(max_funcs) if isinstance(max_funcs, int) or (isinstance(max_funcs, float) and float(max_funcs).is_integer()) else None),
|
|
336
|
+
}
|
|
337
|
+
return key
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _load_checkpoint_if_match(
|
|
341
|
+
ckpt_path: Path,
|
|
342
|
+
resume: bool,
|
|
343
|
+
checkpoint_key: Dict[str, Any],
|
|
344
|
+
) -> Optional[Dict[str, Any]]:
|
|
345
|
+
"""加载匹配的检查点"""
|
|
346
|
+
try:
|
|
347
|
+
if not resume:
|
|
348
|
+
return None
|
|
349
|
+
if not ckpt_path.exists():
|
|
350
|
+
return None
|
|
351
|
+
obj = json.loads(ckpt_path.read_text(encoding="utf-8"))
|
|
352
|
+
if not isinstance(obj, dict):
|
|
353
|
+
return None
|
|
354
|
+
if obj.get("key") != checkpoint_key:
|
|
355
|
+
return None
|
|
356
|
+
return obj
|
|
357
|
+
except Exception:
|
|
358
|
+
return None
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _atomic_write(path: Path, content: str) -> None:
|
|
362
|
+
"""原子写入文件"""
|
|
363
|
+
try:
|
|
364
|
+
tmp = path.with_suffix(path.suffix + ".tmp")
|
|
365
|
+
tmp.write_text(content, encoding="utf-8")
|
|
366
|
+
tmp.replace(path)
|
|
367
|
+
except Exception:
|
|
368
|
+
try:
|
|
369
|
+
path.write_text(content, encoding="utf-8")
|
|
370
|
+
except Exception:
|
|
371
|
+
pass
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _create_llm_model(
|
|
375
|
+
llm_group: Optional[str],
|
|
376
|
+
disabled_display: str,
|
|
377
|
+
_model_available: bool,
|
|
378
|
+
PlatformRegistry: Any,
|
|
379
|
+
get_normal_platform_name: Any,
|
|
380
|
+
get_normal_model_name: Any,
|
|
381
|
+
) -> Optional[Any]:
|
|
382
|
+
"""创建LLM模型"""
|
|
383
|
+
if not _model_available:
|
|
384
|
+
return None
|
|
385
|
+
try:
|
|
386
|
+
registry = PlatformRegistry.get_global_platform_registry() # type: ignore
|
|
387
|
+
model = None
|
|
388
|
+
if llm_group:
|
|
389
|
+
try:
|
|
390
|
+
platform_name = get_normal_platform_name(llm_group) # type: ignore
|
|
391
|
+
if platform_name:
|
|
392
|
+
model = registry.create_platform(platform_name) # type: ignore
|
|
393
|
+
except Exception:
|
|
394
|
+
model = None
|
|
395
|
+
if model is None:
|
|
396
|
+
model = registry.get_normal_platform() # type: ignore
|
|
397
|
+
try:
|
|
398
|
+
model.set_model_group(llm_group) # type: ignore
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
401
|
+
if llm_group:
|
|
402
|
+
try:
|
|
403
|
+
mn = get_normal_model_name(llm_group) # type: ignore
|
|
404
|
+
if mn:
|
|
405
|
+
model.set_model_name(mn) # type: ignore
|
|
406
|
+
except Exception:
|
|
407
|
+
pass
|
|
408
|
+
model.set_system_prompt( # type: ignore
|
|
409
|
+
"你是资深 C→Rust 迁移专家。任务:给定一个函数及其调用子树(依赖图摘要、函数签名、源码片段),"
|
|
410
|
+
"判断是否可以使用一个或多个成熟的 Rust 库整体替代该子树的功能(允许库内多个 API 协同,允许多个库组合;不允许使用不成熟/不常见库)。"
|
|
411
|
+
"如可替代,请给出 libraries 列表(库名),可选给出代表性 API/模块与实现备注 notes(如何用这些库协作实现)。"
|
|
412
|
+
"输出格式:仅输出一个 <yaml> 块,字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),可选 library(str,首选主库), api(str) 或 apis(list),notes(str)。"
|
|
413
|
+
)
|
|
414
|
+
return model
|
|
415
|
+
except Exception as e:
|
|
416
|
+
typer.secho(
|
|
417
|
+
f"[c2rust-library] 初始化 LLM 平台失败,将回退为保守策略: {e}",
|
|
418
|
+
fg=typer.colors.YELLOW,
|
|
419
|
+
err=True,
|
|
420
|
+
)
|
|
421
|
+
return None
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def _parse_agent_yaml_summary(text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
|
425
|
+
"""
|
|
426
|
+
解析Agent返回的YAML/JSON摘要
|
|
427
|
+
返回(解析结果, 错误信息)
|
|
428
|
+
如果解析成功,返回(data, None)
|
|
429
|
+
如果解析失败,返回(None, 错误信息)
|
|
430
|
+
"""
|
|
431
|
+
if not isinstance(text, str) or not text.strip():
|
|
432
|
+
return None, "摘要文本为空"
|
|
433
|
+
import re as _re
|
|
434
|
+
import json as _json
|
|
435
|
+
try:
|
|
436
|
+
import yaml # type: ignore
|
|
437
|
+
except Exception:
|
|
438
|
+
yaml = None # type: ignore
|
|
439
|
+
|
|
440
|
+
m_sum = _re.search(r"<SUMMARY>([\s\S]*?)</SUMMARY>", text, flags=_re.IGNORECASE)
|
|
441
|
+
block = (m_sum.group(1) if m_sum else text).strip()
|
|
442
|
+
|
|
443
|
+
m_yaml = _re.search(r"<yaml>([\s\S]*?)</yaml>", block, flags=_re.IGNORECASE)
|
|
444
|
+
if m_yaml:
|
|
445
|
+
raw = m_yaml.group(1).strip()
|
|
446
|
+
if raw and yaml:
|
|
447
|
+
try:
|
|
448
|
+
data = yaml.safe_load(raw)
|
|
449
|
+
if isinstance(data, dict):
|
|
450
|
+
return data, None
|
|
451
|
+
except Exception as yaml_err:
|
|
452
|
+
return None, f"YAML 解析失败: {str(yaml_err)}"
|
|
453
|
+
elif raw and not yaml:
|
|
454
|
+
return None, "PyYAML 未安装,无法解析 YAML"
|
|
455
|
+
|
|
456
|
+
m_code = _re.search(r"```(?:yaml|yml)\s*([\s\S]*?)```", block, flags=_re.IGNORECASE)
|
|
457
|
+
if m_code:
|
|
458
|
+
raw = m_code.group(1).strip()
|
|
459
|
+
if raw and yaml:
|
|
460
|
+
try:
|
|
461
|
+
data = yaml.safe_load(raw)
|
|
462
|
+
if isinstance(data, dict):
|
|
463
|
+
return data, None
|
|
464
|
+
except Exception as yaml_err:
|
|
465
|
+
return None, f"YAML 解析失败: {str(yaml_err)}"
|
|
466
|
+
elif raw and not yaml:
|
|
467
|
+
return None, "PyYAML 未安装,无法解析 YAML"
|
|
468
|
+
|
|
469
|
+
m_json = _re.search(r"\{[\s\S]*\}", block)
|
|
470
|
+
if m_json:
|
|
471
|
+
raw = m_json.group(0).strip()
|
|
472
|
+
try:
|
|
473
|
+
data = _json.loads(raw)
|
|
474
|
+
if isinstance(data, dict):
|
|
475
|
+
return data, None
|
|
476
|
+
except Exception as json_err:
|
|
477
|
+
return None, f"JSON 解析失败: {str(json_err)}"
|
|
478
|
+
|
|
479
|
+
# 宽松键值
|
|
480
|
+
def _kv(pattern: str) -> Optional[str]:
|
|
481
|
+
m = _re.search(pattern, block, flags=_re.IGNORECASE)
|
|
482
|
+
return m.group(1).strip() if m else None
|
|
483
|
+
|
|
484
|
+
rep_raw = _kv(r"replaceable\s*:\s*(.+)")
|
|
485
|
+
lib_raw = _kv(r"library\s*:\s*(.+)")
|
|
486
|
+
api_raw = _kv(r"(?:api|function)\s*:\s*(.+)")
|
|
487
|
+
conf_raw = _kv(r"confidence\s*:\s*([0-9\.\-eE]+)")
|
|
488
|
+
if any([rep_raw, lib_raw, api_raw, conf_raw]):
|
|
489
|
+
result: Dict[str, Any] = {}
|
|
490
|
+
if rep_raw is not None:
|
|
491
|
+
rep_s = rep_raw.strip().strip("\"'")
|
|
492
|
+
result["replaceable"] = rep_s.lower() in ("true", "yes", "y", "1")
|
|
493
|
+
if lib_raw is not None:
|
|
494
|
+
result["library"] = lib_raw.strip().strip("\"'")
|
|
495
|
+
if api_raw is not None:
|
|
496
|
+
result["api"] = api_raw.strip().strip("\"'")
|
|
497
|
+
if conf_raw is not None:
|
|
498
|
+
try:
|
|
499
|
+
result["confidence"] = float(conf_raw)
|
|
500
|
+
except Exception:
|
|
501
|
+
pass
|
|
502
|
+
return (result if result else None, None)
|
|
503
|
+
return None, "未找到有效的YAML/JSON格式或键值对"
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _build_subtree_prompt(
|
|
507
|
+
fid: int,
|
|
508
|
+
desc: Set[int],
|
|
509
|
+
by_id: Dict[int, Dict[str, Any]],
|
|
510
|
+
adj_func: Dict[int, List[int]],
|
|
511
|
+
disabled_display: str,
|
|
512
|
+
) -> str:
|
|
513
|
+
"""构建子树评估提示词"""
|
|
514
|
+
root_rec = by_id.get(fid, {})
|
|
515
|
+
root_name = root_rec.get("qualified_name") or root_rec.get("name") or f"sym_{fid}"
|
|
516
|
+
root_sig = root_rec.get("signature") or ""
|
|
517
|
+
root_lang = root_rec.get("language") or ""
|
|
518
|
+
root_src = _read_source_snippet(root_rec)
|
|
519
|
+
|
|
520
|
+
# 子树摘要(限制长度,避免超长)
|
|
521
|
+
nodes_meta: List[str] = []
|
|
522
|
+
for nid in sorted(desc):
|
|
523
|
+
r = by_id.get(nid, {})
|
|
524
|
+
nm = r.get("qualified_name") or r.get("name") or f"sym_{nid}"
|
|
525
|
+
sg = r.get("signature") or ""
|
|
526
|
+
if sg and sg != nm:
|
|
527
|
+
nodes_meta.append(f"- {nm} | {sg}")
|
|
528
|
+
else:
|
|
529
|
+
nodes_meta.append(f"- {nm}")
|
|
530
|
+
if len(nodes_meta) > 200:
|
|
531
|
+
nodes_meta = nodes_meta[:200] + [f"...({len(desc)-200} more)"]
|
|
532
|
+
|
|
533
|
+
# 选取部分代表性叶子/内部节点源码(最多 3 个)
|
|
534
|
+
samples: List[str] = []
|
|
535
|
+
sample_ids: List[int] = [fid]
|
|
536
|
+
for ch in adj_func.get(fid, [])[:2]:
|
|
537
|
+
sample_ids.append(ch)
|
|
538
|
+
for sid in sample_ids:
|
|
539
|
+
rec = by_id.get(sid, {})
|
|
540
|
+
nm = rec.get("qualified_name") or rec.get("name") or f"sym_{sid}"
|
|
541
|
+
sg = rec.get("signature") or ""
|
|
542
|
+
src = _read_source_snippet(rec, max_lines=120)
|
|
543
|
+
samples.append(f"--- BEGIN {nm} ---\n{sg}\n{src}\n--- END {nm} ---")
|
|
544
|
+
|
|
545
|
+
# 构建依赖图(子树内的调用有向边)
|
|
546
|
+
def _label(nid: int) -> str:
|
|
547
|
+
r = by_id.get(nid, {})
|
|
548
|
+
return r.get("qualified_name") or r.get("name") or f"sym_{nid}"
|
|
549
|
+
|
|
550
|
+
edges_list: List[str] = []
|
|
551
|
+
for u in sorted(desc):
|
|
552
|
+
for v in adj_func.get(u, []):
|
|
553
|
+
if v in desc:
|
|
554
|
+
edges_list.append(f"{_label(u)} -> {_label(v)}")
|
|
555
|
+
edges_text: str
|
|
556
|
+
if len(edges_list) > 400:
|
|
557
|
+
edges_text = "\n".join(edges_list[:400] + [f"...({len(edges_list) - 400} more edges)"])
|
|
558
|
+
else:
|
|
559
|
+
edges_text = "\n".join(edges_list)
|
|
560
|
+
|
|
561
|
+
# 适度提供 DOT(边数不大时),便于大模型直观看图
|
|
562
|
+
dot_text = ""
|
|
563
|
+
if len(edges_list) <= 200:
|
|
564
|
+
dot_lines: List[str] = ["digraph subtree {", " rankdir=LR;"]
|
|
565
|
+
for u in sorted(desc):
|
|
566
|
+
for v in adj_func.get(u, []):
|
|
567
|
+
if v in desc:
|
|
568
|
+
dot_lines.append(f' "{_label(u)}" -> "{_label(v)}";')
|
|
569
|
+
dot_lines.append("}")
|
|
570
|
+
dot_text = "\n".join(dot_lines)
|
|
571
|
+
|
|
572
|
+
disabled_hint = (
|
|
573
|
+
f"重要约束:禁止使用以下库(若这些库为唯一可行选项则判定为不可替代):{disabled_display}\n"
|
|
574
|
+
if disabled_display else ""
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
return (
|
|
578
|
+
"请评估以下 C/C++ 函数子树是否可以由一个或多个成熟的 Rust 库整体替代(语义等价或更强)。"
|
|
579
|
+
"允许库内多个 API 协同,允许多个库组合;如果必须依赖尚不成熟/冷门库或非 Rust 库,则判定为不可替代。\n"
|
|
580
|
+
f"{disabled_hint}"
|
|
581
|
+
"输出格式:仅输出一个 <yaml> 块,字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),"
|
|
582
|
+
"可选字段: library(str,首选主库), api(str) 或 apis(list), notes(str: 简述如何由这些库协作实现的思路)。\n\n"
|
|
583
|
+
f"根函数(被评估子树的根): {root_name}\n"
|
|
584
|
+
f"签名: {root_sig}\n"
|
|
585
|
+
f"语言: {root_lang}\n"
|
|
586
|
+
"根函数源码片段(可能截断):\n"
|
|
587
|
+
f"{root_src}\n\n"
|
|
588
|
+
f"子树规模: {len(desc)} 个函数\n"
|
|
589
|
+
"子树函数列表(名称|签名):\n"
|
|
590
|
+
+ "\n".join(nodes_meta)
|
|
591
|
+
+ "\n\n"
|
|
592
|
+
"依赖图(调用边,caller -> callee):\n"
|
|
593
|
+
f"{edges_text}\n\n"
|
|
594
|
+
+ (f"DOT 表示(边数较少时提供):\n```dot\n{dot_text}\n```\n\n" if dot_text else "")
|
|
595
|
+
+ "代表性源码样本(部分节点,可能截断,仅供辅助判断):\n"
|
|
596
|
+
+ "\n".join(samples)
|
|
597
|
+
+ "\n"
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def _llm_evaluate_subtree(
|
|
602
|
+
fid: int,
|
|
603
|
+
desc: Set[int],
|
|
604
|
+
by_id: Dict[int, Dict[str, Any]],
|
|
605
|
+
adj_func: Dict[int, List[int]],
|
|
606
|
+
disabled_norm: List[str],
|
|
607
|
+
disabled_display: str,
|
|
608
|
+
_model_available: bool,
|
|
609
|
+
_new_model_func: Callable,
|
|
610
|
+
) -> Dict[str, Any]:
|
|
611
|
+
"""使用LLM评估子树是否可替代"""
|
|
612
|
+
if not _model_available:
|
|
613
|
+
return {"replaceable": False}
|
|
614
|
+
model = _new_model_func()
|
|
615
|
+
if not model:
|
|
616
|
+
return {"replaceable": False}
|
|
617
|
+
|
|
618
|
+
prompt = _build_subtree_prompt(fid, desc, by_id, adj_func, disabled_display)
|
|
619
|
+
|
|
620
|
+
try:
|
|
621
|
+
result = model.chat_until_success(prompt) # type: ignore
|
|
622
|
+
parsed, parse_error = _parse_agent_yaml_summary(result or "")
|
|
623
|
+
if parse_error:
|
|
624
|
+
# YAML解析失败,将错误信息反馈给模型
|
|
625
|
+
print(f"[c2rust-lib-replace] YAML解析失败: {parse_error}")
|
|
626
|
+
# 更新提示词,包含解析错误信息
|
|
627
|
+
prompt_with_error = (
|
|
628
|
+
prompt
|
|
629
|
+
+ f"\n\n**格式错误详情(请根据以下错误修复输出格式):**\n- {parse_error}\n\n"
|
|
630
|
+
+ "请确保输出的YAML格式正确,包括正确的缩进、引号、冒号等。"
|
|
631
|
+
)
|
|
632
|
+
result = model.chat_until_success(prompt_with_error) # type: ignore
|
|
633
|
+
parsed, parse_error = _parse_agent_yaml_summary(result or "")
|
|
634
|
+
if parse_error:
|
|
635
|
+
# 仍然失败,使用默认值
|
|
636
|
+
print(f"[c2rust-lib-replace] 重试后YAML解析仍然失败: {parse_error},使用默认值")
|
|
637
|
+
parsed = None
|
|
638
|
+
if isinstance(parsed, dict):
|
|
639
|
+
rep = bool(parsed.get("replaceable") is True)
|
|
640
|
+
lib = str(parsed.get("library") or "").strip()
|
|
641
|
+
api = str(parsed.get("api") or parsed.get("function") or "").strip()
|
|
642
|
+
apis = parsed.get("apis")
|
|
643
|
+
libs_raw = parsed.get("libraries")
|
|
644
|
+
notes = str(parsed.get("notes") or "").strip()
|
|
645
|
+
# 归一化 libraries
|
|
646
|
+
libraries: List[str] = []
|
|
647
|
+
if isinstance(libs_raw, list):
|
|
648
|
+
libraries = [str(x).strip() for x in libs_raw if str(x).strip()]
|
|
649
|
+
elif isinstance(libs_raw, str):
|
|
650
|
+
libraries = [s.strip() for s in libs_raw.split(",") if s.strip()]
|
|
651
|
+
conf = parsed.get("confidence")
|
|
652
|
+
try:
|
|
653
|
+
conf = float(conf)
|
|
654
|
+
except Exception:
|
|
655
|
+
conf = 0.0
|
|
656
|
+
# 不强制要求具体 API 或特定库名;若缺省且存在 library 字段,则纳入 libraries
|
|
657
|
+
if not libraries and lib:
|
|
658
|
+
libraries = [lib]
|
|
659
|
+
|
|
660
|
+
# 禁用库命中时,强制视为不可替代
|
|
661
|
+
if disabled_norm:
|
|
662
|
+
libs_lower = [lib_name.lower() for lib_name in libraries]
|
|
663
|
+
lib_single_lower = lib.lower() if lib else ""
|
|
664
|
+
banned_hit = any(lower_lib in disabled_norm for lower_lib in libs_lower) or (lib_single_lower and lib_single_lower in disabled_norm)
|
|
665
|
+
if banned_hit:
|
|
666
|
+
rep = False
|
|
667
|
+
warn_libs = ", ".join(sorted(set([lib] + libraries))) if (libraries or lib) else "(未提供库名)"
|
|
668
|
+
root_rec = by_id.get(fid, {})
|
|
669
|
+
root_name = root_rec.get("qualified_name") or root_rec.get("name") or f"sym_{fid}"
|
|
670
|
+
typer.secho(
|
|
671
|
+
f"[c2rust-library] 评估结果包含禁用库,强制判定为不可替代: {root_name} | 命中库: {warn_libs}",
|
|
672
|
+
fg=typer.colors.YELLOW,
|
|
673
|
+
err=True,
|
|
674
|
+
)
|
|
675
|
+
if notes:
|
|
676
|
+
notes = notes + f" | 禁用库命中: {warn_libs}"
|
|
677
|
+
else:
|
|
678
|
+
notes = f"禁用库命中: {warn_libs}"
|
|
679
|
+
|
|
680
|
+
result_obj: Dict[str, Any] = {
|
|
681
|
+
"replaceable": rep,
|
|
682
|
+
"library": lib,
|
|
683
|
+
"libraries": libraries,
|
|
684
|
+
"api": api,
|
|
685
|
+
"confidence": conf,
|
|
686
|
+
}
|
|
687
|
+
if isinstance(apis, list):
|
|
688
|
+
result_obj["apis"] = apis
|
|
689
|
+
if notes:
|
|
690
|
+
result_obj["notes"] = notes
|
|
691
|
+
return result_obj
|
|
692
|
+
typer.secho("[c2rust-library] LLM 结果解析失败,视为不可替代。", fg=typer.colors.YELLOW, err=True)
|
|
693
|
+
return {"replaceable": False}
|
|
694
|
+
except Exception as e:
|
|
695
|
+
typer.secho(f"[c2rust-library] LLM 评估失败,视为不可替代: {e}", fg=typer.colors.YELLOW, err=True)
|
|
696
|
+
return {"replaceable": False}
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
def _is_entry_function(
|
|
700
|
+
rec_meta: Dict[str, Any],
|
|
701
|
+
) -> bool:
|
|
702
|
+
"""判断是否为入口函数"""
|
|
703
|
+
nm = str(rec_meta.get("name") or "")
|
|
704
|
+
qn = str(rec_meta.get("qualified_name") or "")
|
|
705
|
+
# Configurable entry detection (avoid hard-coding 'main'):
|
|
706
|
+
# Honor env vars: JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS / JARVIS_C2RUST_DELAY_ENTRIES / C2RUST_DELAY_ENTRIES
|
|
707
|
+
import os
|
|
708
|
+
entries_env = os.environ.get("JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS") or \
|
|
709
|
+
os.environ.get("JARVIS_C2RUST_DELAY_ENTRIES") or \
|
|
710
|
+
os.environ.get("C2RUST_DELAY_ENTRIES") or ""
|
|
711
|
+
entries_set = set()
|
|
712
|
+
if entries_env:
|
|
713
|
+
try:
|
|
714
|
+
import re as _re
|
|
715
|
+
parts = _re.split(r"[,\s;]+", entries_env.strip())
|
|
716
|
+
except Exception:
|
|
717
|
+
parts = [p.strip() for p in entries_env.replace(";", ",").split(",")]
|
|
718
|
+
entries_set = {p.strip().lower() for p in parts if p and p.strip()}
|
|
719
|
+
if entries_set:
|
|
720
|
+
is_entry = (nm.lower() in entries_set) or (qn.lower() in entries_set) or any(qn.lower().endswith(f"::{e}") for e in entries_set)
|
|
721
|
+
else:
|
|
722
|
+
is_entry = (nm.lower() == "main") or (qn.lower() == "main") or qn.lower().endswith("::main")
|
|
723
|
+
return is_entry
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _write_output_symbols(
|
|
727
|
+
all_records: List[Dict[str, Any]],
|
|
728
|
+
pruned_funcs: Set[int],
|
|
729
|
+
selected_roots: List[Tuple[int, Dict[str, Any]]],
|
|
730
|
+
out_symbols_path: Path,
|
|
731
|
+
out_symbols_prune_path: Path,
|
|
732
|
+
) -> List[Dict[str, Any]]:
|
|
733
|
+
"""写出新符号表,返回替代映射列表"""
|
|
734
|
+
now_ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
|
|
735
|
+
kept_ids: Set[int] = set()
|
|
736
|
+
for rec in all_records:
|
|
737
|
+
fid = int(rec.get("id"))
|
|
738
|
+
cat = rec.get("category") or ""
|
|
739
|
+
if cat == "function":
|
|
740
|
+
if fid in pruned_funcs:
|
|
741
|
+
continue
|
|
742
|
+
kept_ids.add(fid)
|
|
743
|
+
else:
|
|
744
|
+
kept_ids.add(fid)
|
|
745
|
+
|
|
746
|
+
sel_root_ids = set(fid for fid, _ in selected_roots)
|
|
747
|
+
replacements: List[Dict[str, Any]] = []
|
|
748
|
+
|
|
749
|
+
with open(out_symbols_path, "w", encoding="utf-8") as fo, \
|
|
750
|
+
open(out_symbols_prune_path, "w", encoding="utf-8") as fo2:
|
|
751
|
+
|
|
752
|
+
for rec in all_records:
|
|
753
|
+
fid = int(rec.get("id"))
|
|
754
|
+
if fid not in kept_ids:
|
|
755
|
+
continue
|
|
756
|
+
|
|
757
|
+
rec_out = dict(rec)
|
|
758
|
+
if (rec.get("category") or "") == "function" and fid in sel_root_ids:
|
|
759
|
+
# 以库级替代为语义:不要求具体 API;将根 ref 设置为库占位符(支持多库组合)
|
|
760
|
+
conf = 0.0
|
|
761
|
+
api = ""
|
|
762
|
+
apis = None
|
|
763
|
+
libraries_out: List[str] = []
|
|
764
|
+
notes_out: str = ""
|
|
765
|
+
lib_single: str = ""
|
|
766
|
+
for rf, rres in selected_roots:
|
|
767
|
+
if rf == fid:
|
|
768
|
+
api = str(rres.get("api") or rres.get("function") or "")
|
|
769
|
+
apis = rres.get("apis")
|
|
770
|
+
libs_val = rres.get("libraries")
|
|
771
|
+
if isinstance(libs_val, list):
|
|
772
|
+
libraries_out = [str(x) for x in libs_val if str(x)]
|
|
773
|
+
lib_single = str(rres.get("library") or "").strip()
|
|
774
|
+
try:
|
|
775
|
+
conf = float(rres.get("confidence") or 0.0)
|
|
776
|
+
except Exception:
|
|
777
|
+
conf = 0.0
|
|
778
|
+
notes_val = rres.get("notes")
|
|
779
|
+
if isinstance(notes_val, str):
|
|
780
|
+
notes_out = notes_val
|
|
781
|
+
break
|
|
782
|
+
# 若 libraries 存在则使用多库占位;否则若存在单个 library 字段则使用之;否则置空
|
|
783
|
+
if libraries_out:
|
|
784
|
+
lib_markers = [f"lib::{lb}" for lb in libraries_out]
|
|
785
|
+
elif lib_single:
|
|
786
|
+
lib_markers = [f"lib::{lib_single}"]
|
|
787
|
+
else:
|
|
788
|
+
lib_markers = []
|
|
789
|
+
rec_out["ref"] = lib_markers
|
|
790
|
+
try:
|
|
791
|
+
rec_out["updated_at"] = now_ts
|
|
792
|
+
except Exception:
|
|
793
|
+
pass
|
|
794
|
+
# 保存库替代元数据到符号表,供后续转译阶段作为上下文使用
|
|
795
|
+
try:
|
|
796
|
+
meta_apis = apis if isinstance(apis, list) else ([api] if api else [])
|
|
797
|
+
lib_primary = libraries_out[0] if libraries_out else lib_single
|
|
798
|
+
rec_out["lib_replacement"] = {
|
|
799
|
+
"libraries": libraries_out,
|
|
800
|
+
"library": lib_primary or "",
|
|
801
|
+
"apis": meta_apis,
|
|
802
|
+
"api": api,
|
|
803
|
+
"confidence": float(conf) if isinstance(conf, (int, float)) else 0.0,
|
|
804
|
+
"notes": notes_out,
|
|
805
|
+
"mode": "llm",
|
|
806
|
+
"updated_at": now_ts,
|
|
807
|
+
}
|
|
808
|
+
except Exception:
|
|
809
|
+
# 忽略写入元数据失败,不阻塞主流程
|
|
810
|
+
pass
|
|
811
|
+
rep_obj: Dict[str, Any] = {
|
|
812
|
+
"id": fid,
|
|
813
|
+
"name": rec.get("name") or "",
|
|
814
|
+
"qualified_name": rec.get("qualified_name") or "",
|
|
815
|
+
"library": (libraries_out[0] if libraries_out else lib_single),
|
|
816
|
+
"libraries": libraries_out,
|
|
817
|
+
"function": api,
|
|
818
|
+
"confidence": conf,
|
|
819
|
+
"mode": "llm",
|
|
820
|
+
}
|
|
821
|
+
if isinstance(apis, list):
|
|
822
|
+
rep_obj["apis"] = apis
|
|
823
|
+
if notes_out:
|
|
824
|
+
rep_obj["notes"] = notes_out
|
|
825
|
+
replacements.append(rep_obj)
|
|
826
|
+
|
|
827
|
+
line = json.dumps(rec_out, ensure_ascii=False) + "\n"
|
|
828
|
+
fo.write(line)
|
|
829
|
+
fo2.write(line)
|
|
830
|
+
# 不覆写 symbols.jsonl(保留原始扫描/整理结果作为基线)
|
|
831
|
+
|
|
832
|
+
return replacements
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
def apply_library_replacement(
|
|
836
|
+
db_path: Path,
|
|
837
|
+
library_name: str,
|
|
838
|
+
llm_group: Optional[str] = None,
|
|
839
|
+
candidates: Optional[List[str]] = None,
|
|
840
|
+
out_symbols_path: Optional[Path] = None,
|
|
841
|
+
out_mapping_path: Optional[Path] = None,
|
|
842
|
+
max_funcs: Optional[int] = None,
|
|
843
|
+
disabled_libraries: Optional[List[str]] = None,
|
|
844
|
+
resume: bool = True,
|
|
845
|
+
checkpoint_path: Optional[Path] = None,
|
|
846
|
+
checkpoint_interval: int = 1,
|
|
847
|
+
clear_checkpoint_on_done: bool = True,
|
|
848
|
+
non_interactive: bool = True,
|
|
849
|
+
) -> Dict[str, Path]:
|
|
850
|
+
"""
|
|
851
|
+
基于依赖图由 LLM 判定,对满足"整子树可由指定库单个 API 替代"的函数根进行替换并剪枝。
|
|
852
|
+
|
|
853
|
+
参数:
|
|
854
|
+
- db_path: 指向 symbols.jsonl 的路径或其所在目录
|
|
855
|
+
- library_name: 指定库名(如 'std'、'regex'),要求 LLM 仅在该库中选择 API
|
|
856
|
+
- llm_group: 可选,评估时使用的模型组
|
|
857
|
+
- candidates: 仅评估这些函数作为根(名称或限定名);缺省评估所有根函数(无入边)
|
|
858
|
+
- out_symbols_path/out_mapping_path: 输出文件路径(若省略使用默认)
|
|
859
|
+
- max_funcs: LLM 评估的最大根数量(限流/调试)
|
|
860
|
+
- disabled_libraries: 禁用的开源库名称列表(不允许在评估/建议中使用;在提示词中明确说明)
|
|
861
|
+
返回:
|
|
862
|
+
Dict[str, Path]: {"symbols": 新符号表路径, "mapping": 替代映射路径, "symbols_prune": 兼容符号表路径, "order": 通用顺序路径, "order_prune": 剪枝阶段顺序路径}
|
|
863
|
+
"""
|
|
864
|
+
sjsonl = _resolve_symbols_jsonl_path(db_path)
|
|
865
|
+
if not sjsonl.exists():
|
|
866
|
+
raise FileNotFoundError(f"未找到 symbols.jsonl: {sjsonl}")
|
|
867
|
+
|
|
868
|
+
data_dir = sjsonl.parent
|
|
869
|
+
out_symbols_path, out_mapping_path, out_symbols_prune_path, order_prune_path, alias_order_path = _setup_output_paths(
|
|
870
|
+
data_dir, out_symbols_path, out_mapping_path
|
|
871
|
+
)
|
|
872
|
+
|
|
873
|
+
# Checkpoint 默认路径
|
|
874
|
+
if checkpoint_path is None:
|
|
875
|
+
checkpoint_path = data_dir / "library_replacer_checkpoint.json"
|
|
876
|
+
|
|
877
|
+
# 读取符号
|
|
878
|
+
all_records, by_id, name_to_id, func_ids, id_refs_names = _load_symbols(sjsonl)
|
|
879
|
+
|
|
880
|
+
# 构造函数内边(id→id)
|
|
881
|
+
adj_func = _build_function_graph(func_ids, id_refs_names, name_to_id)
|
|
882
|
+
|
|
883
|
+
# 构建评估顺序
|
|
884
|
+
root_funcs = _build_evaluation_order(sjsonl, func_ids, adj_func)
|
|
885
|
+
|
|
886
|
+
# 可达缓存(需在 candidates 使用前定义,避免前向引用)
|
|
887
|
+
desc_cache: Dict[int, Set[int]] = {}
|
|
888
|
+
|
|
889
|
+
# 如果传入 candidates,则仅评估这些节点(按上面的顺序过滤),并限定作用域:
|
|
890
|
+
# - 仅保留从这些根可达的函数;对不可达函数直接删除(类型记录保留)
|
|
891
|
+
root_funcs, scope_unreachable_funcs = _process_candidate_scope(
|
|
892
|
+
candidates, all_records, root_funcs, func_ids, adj_func, desc_cache
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
# LLM 可用性
|
|
896
|
+
_model_available, PlatformRegistry, get_normal_platform_name, get_normal_model_name = _check_llm_availability()
|
|
897
|
+
|
|
898
|
+
# 预处理禁用库
|
|
899
|
+
disabled_norm, disabled_display = _normalize_disabled_libraries(disabled_libraries)
|
|
900
|
+
|
|
901
|
+
# 断点恢复支持:工具函数与关键键构造
|
|
902
|
+
ckpt_path: Path = Path(checkpoint_path) if checkpoint_path is not None else (data_dir / "library_replacer_checkpoint.json")
|
|
903
|
+
checkpoint_key = _make_checkpoint_key(sjsonl, library_name, llm_group, candidates, disabled_libraries, max_funcs)
|
|
904
|
+
|
|
905
|
+
def _new_model() -> Optional[Any]:
|
|
906
|
+
return _create_llm_model(llm_group, disabled_display, _model_available, PlatformRegistry, get_normal_platform_name, get_normal_model_name)
|
|
907
|
+
|
|
908
|
+
# 评估阶段:若某节点评估不可替代,则继续评估其子节点(递归/深度优先)
|
|
909
|
+
eval_counter = 0
|
|
910
|
+
pruned_dynamic: Set[int] = set() # 动态累计的"将被剪除"的函数集合(不含选中根)
|
|
911
|
+
selected_roots: List[Tuple[int, Dict[str, Any]]] = [] # 实时选中的可替代根(fid, LLM结果)
|
|
912
|
+
processed_roots: Set[int] = set() # 已处理(评估或跳过)的根集合
|
|
913
|
+
last_ckpt_saved = 0 # 上次保存的计数
|
|
914
|
+
|
|
915
|
+
# 若存在匹配的断点文件,则加载恢复
|
|
916
|
+
_loaded_ckpt = _load_checkpoint_if_match(ckpt_path, resume, checkpoint_key)
|
|
917
|
+
if resume and _loaded_ckpt:
|
|
918
|
+
try:
|
|
919
|
+
eval_counter = int(_loaded_ckpt.get("eval_counter") or 0)
|
|
920
|
+
except Exception:
|
|
921
|
+
pass
|
|
922
|
+
try:
|
|
923
|
+
processed_roots = set(int(x) for x in (_loaded_ckpt.get("processed_roots") or []))
|
|
924
|
+
except Exception:
|
|
925
|
+
processed_roots = set()
|
|
926
|
+
try:
|
|
927
|
+
pruned_dynamic = set(int(x) for x in (_loaded_ckpt.get("pruned_dynamic") or []))
|
|
928
|
+
except Exception:
|
|
929
|
+
pruned_dynamic = set()
|
|
930
|
+
try:
|
|
931
|
+
sr_list = []
|
|
932
|
+
for it in (_loaded_ckpt.get("selected_roots") or []):
|
|
933
|
+
if isinstance(it, dict) and "fid" in it and "res" in it:
|
|
934
|
+
sr_list.append((int(it["fid"]), it["res"]))
|
|
935
|
+
selected_roots = sr_list
|
|
936
|
+
except Exception:
|
|
937
|
+
selected_roots = []
|
|
938
|
+
typer.secho(
|
|
939
|
+
f"[c2rust-library] 已从断点恢复: 已评估={eval_counter}, 已处理根={len(processed_roots)}, 已剪除={len(pruned_dynamic)}, 已选中替代根={len(selected_roots)}",
|
|
940
|
+
fg=typer.colors.YELLOW,
|
|
941
|
+
err=True,
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
def _current_checkpoint_state() -> Dict[str, Any]:
|
|
945
|
+
try:
|
|
946
|
+
ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
|
|
947
|
+
except Exception:
|
|
948
|
+
ts = ""
|
|
949
|
+
return {
|
|
950
|
+
"key": checkpoint_key,
|
|
951
|
+
"eval_counter": eval_counter,
|
|
952
|
+
"processed_roots": sorted(list(processed_roots)),
|
|
953
|
+
"pruned_dynamic": sorted(list(pruned_dynamic)),
|
|
954
|
+
"selected_roots": [{"fid": fid, "res": res} for fid, res in selected_roots],
|
|
955
|
+
"timestamp": ts,
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
def _periodic_checkpoint_save(force: bool = False) -> None:
|
|
959
|
+
nonlocal last_ckpt_saved
|
|
960
|
+
if not resume:
|
|
961
|
+
return
|
|
962
|
+
try:
|
|
963
|
+
interval = int(checkpoint_interval)
|
|
964
|
+
except Exception:
|
|
965
|
+
interval = 1
|
|
966
|
+
need_save = force or (interval <= 0) or ((eval_counter - last_ckpt_saved) >= interval)
|
|
967
|
+
if not need_save:
|
|
968
|
+
return
|
|
969
|
+
try:
|
|
970
|
+
_atomic_write(ckpt_path, json.dumps(_current_checkpoint_state(), ensure_ascii=False, indent=2))
|
|
971
|
+
last_ckpt_saved = eval_counter
|
|
972
|
+
except Exception:
|
|
973
|
+
pass
|
|
974
|
+
|
|
975
|
+
def _evaluate_node(fid: int) -> None:
|
|
976
|
+
nonlocal eval_counter
|
|
977
|
+
# 限流
|
|
978
|
+
if max_funcs is not None and eval_counter >= max_funcs:
|
|
979
|
+
return
|
|
980
|
+
# 若该节点已被标记剪除或已处理,跳过
|
|
981
|
+
if fid in pruned_dynamic or fid in processed_roots or fid not in func_ids:
|
|
982
|
+
return
|
|
983
|
+
|
|
984
|
+
# 构造子树并打印进度
|
|
985
|
+
desc = _collect_descendants(fid, adj_func, desc_cache)
|
|
986
|
+
rec_meta = by_id.get(fid, {})
|
|
987
|
+
label = rec_meta.get("qualified_name") or rec_meta.get("name") or f"sym_{fid}"
|
|
988
|
+
typer.secho(
|
|
989
|
+
f"[c2rust-library] 正在评估: {label} (ID: {fid}), 子树函数数={len(desc)}",
|
|
990
|
+
fg=typer.colors.CYAN,
|
|
991
|
+
err=True,
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
# 执行 LLM 评估
|
|
995
|
+
res = _llm_evaluate_subtree(
|
|
996
|
+
fid, desc, by_id, adj_func, disabled_norm, disabled_display,
|
|
997
|
+
_model_available, _new_model
|
|
998
|
+
)
|
|
999
|
+
eval_counter += 1
|
|
1000
|
+
processed_roots.add(fid)
|
|
1001
|
+
res["mode"] = "llm"
|
|
1002
|
+
_periodic_checkpoint_save()
|
|
1003
|
+
|
|
1004
|
+
# 若可替代,打印评估结果摘要(库/参考API/置信度/备注),并即时标记子孙剪除与后续跳过
|
|
1005
|
+
try:
|
|
1006
|
+
if res.get("replaceable") is True:
|
|
1007
|
+
libs = res.get("libraries") or ([res.get("library")] if res.get("library") else [])
|
|
1008
|
+
libs = [str(x) for x in libs if str(x)]
|
|
1009
|
+
api = str(res.get("api") or "")
|
|
1010
|
+
apis = res.get("apis")
|
|
1011
|
+
notes = str(res.get("notes") or "")
|
|
1012
|
+
conf = res.get("confidence")
|
|
1013
|
+
try:
|
|
1014
|
+
conf = float(conf)
|
|
1015
|
+
except Exception:
|
|
1016
|
+
conf = 0.0
|
|
1017
|
+
libs_str = ", ".join(libs) if libs else "(未指定库)"
|
|
1018
|
+
apis_str = ", ".join([str(a) for a in apis]) if isinstance(apis, list) else (api if api else "")
|
|
1019
|
+
msg = f"[c2rust-library] 可替换: {label} -> 库: {libs_str}"
|
|
1020
|
+
if apis_str:
|
|
1021
|
+
msg += f"; 参考API: {apis_str}"
|
|
1022
|
+
msg += f"; 置信度: {conf:.2f}"
|
|
1023
|
+
if notes:
|
|
1024
|
+
msg += f"; 备注: {notes[:200]}"
|
|
1025
|
+
typer.secho(msg, fg=typer.colors.GREEN, err=True)
|
|
1026
|
+
|
|
1027
|
+
# 入口函数保护:不替代 main(保留进行转译),改为深入评估其子节点
|
|
1028
|
+
if _is_entry_function(rec_meta):
|
|
1029
|
+
typer.secho(
|
|
1030
|
+
"[c2rust-library] 入口函数保护:跳过对 main 的库替代,继续评估其子节点。",
|
|
1031
|
+
fg=typer.colors.YELLOW,
|
|
1032
|
+
err=True,
|
|
1033
|
+
)
|
|
1034
|
+
for ch in adj_func.get(fid, []):
|
|
1035
|
+
_evaluate_node(ch)
|
|
1036
|
+
else:
|
|
1037
|
+
# 即时剪枝(不含根)
|
|
1038
|
+
to_prune = set(desc)
|
|
1039
|
+
to_prune.discard(fid)
|
|
1040
|
+
|
|
1041
|
+
newly = len(to_prune - pruned_dynamic)
|
|
1042
|
+
pruned_dynamic.update(to_prune)
|
|
1043
|
+
selected_roots.append((fid, res))
|
|
1044
|
+
_periodic_checkpoint_save()
|
|
1045
|
+
typer.secho(
|
|
1046
|
+
f"[c2rust-library] 即时标记剪除子节点(本次新增): +{newly} 个 (累计={len(pruned_dynamic)})",
|
|
1047
|
+
fg=typer.colors.MAGENTA,
|
|
1048
|
+
err=True,
|
|
1049
|
+
)
|
|
1050
|
+
else:
|
|
1051
|
+
# 若不可替代,继续评估其子节点(深度优先)
|
|
1052
|
+
for ch in adj_func.get(fid, []):
|
|
1053
|
+
_evaluate_node(ch)
|
|
1054
|
+
except Exception:
|
|
1055
|
+
pass
|
|
1056
|
+
|
|
1057
|
+
# 对每个候选根进行评估;若根不可替代将递归评估其子节点
|
|
1058
|
+
for fid in root_funcs:
|
|
1059
|
+
_evaluate_node(fid)
|
|
1060
|
+
|
|
1061
|
+
# 剪枝集合来自动态评估阶段的累计结果
|
|
1062
|
+
pruned_funcs: Set[int] = set(pruned_dynamic)
|
|
1063
|
+
# 若限定候选根(candidates)已指定,则将不可达函数一并删除
|
|
1064
|
+
try:
|
|
1065
|
+
pruned_funcs.update(scope_unreachable_funcs)
|
|
1066
|
+
except Exception:
|
|
1067
|
+
pass
|
|
1068
|
+
|
|
1069
|
+
# 写出新符号表
|
|
1070
|
+
replacements = _write_output_symbols(
|
|
1071
|
+
all_records, pruned_funcs, selected_roots,
|
|
1072
|
+
out_symbols_path, out_symbols_prune_path
|
|
1073
|
+
)
|
|
1074
|
+
|
|
1075
|
+
# 写出替代映射
|
|
1076
|
+
with open(out_mapping_path, "w", encoding="utf-8") as fm:
|
|
1077
|
+
for m in replacements:
|
|
1078
|
+
fm.write(json.dumps(m, ensure_ascii=False) + "\n")
|
|
1079
|
+
|
|
1080
|
+
# 生成转译顺序(剪枝阶段与别名)
|
|
1081
|
+
order_path = None
|
|
1082
|
+
try:
|
|
1083
|
+
compute_translation_order_jsonl(Path(out_symbols_path), out_path=order_prune_path)
|
|
1084
|
+
shutil.copy2(order_prune_path, alias_order_path)
|
|
1085
|
+
order_path = alias_order_path
|
|
1086
|
+
except Exception as e:
|
|
1087
|
+
typer.secho(f"[c2rust-library] 基于剪枝符号表生成翻译顺序失败: {e}", fg=typer.colors.YELLOW, err=True)
|
|
1088
|
+
|
|
1089
|
+
# 完成后清理断点(可选)
|
|
1090
|
+
try:
|
|
1091
|
+
if resume and clear_checkpoint_on_done and ckpt_path.exists():
|
|
1092
|
+
ckpt_path.unlink()
|
|
1093
|
+
typer.secho(f"[c2rust-library] 已清理断点文件: {ckpt_path}", fg=typer.colors.BLUE, err=True)
|
|
1094
|
+
except Exception:
|
|
1095
|
+
pass
|
|
1096
|
+
|
|
1097
|
+
typer.secho(
|
|
1098
|
+
"[c2rust-library] 库替代剪枝完成(LLM 子树评估):\n"
|
|
1099
|
+
f"- 选中替代根: {len(selected_roots)} 个\n"
|
|
1100
|
+
f"- 剪除函数: {len(pruned_funcs)} 个\n"
|
|
1101
|
+
f"- 新符号表: {out_symbols_path}\n"
|
|
1102
|
+
f"- 替代映射: {out_mapping_path}\n"
|
|
1103
|
+
f"- 兼容符号表输出: {out_symbols_prune_path}\n"
|
|
1104
|
+
+ (f"- 转译顺序: {order_path}\n" if order_path else "")
|
|
1105
|
+
+ f"- 兼容顺序输出: {order_prune_path}",
|
|
1106
|
+
fg=typer.colors.GREEN,
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
result: Dict[str, Path] = {
|
|
1110
|
+
"symbols": Path(out_symbols_path),
|
|
1111
|
+
"mapping": Path(out_mapping_path),
|
|
1112
|
+
"symbols_prune": Path(out_symbols_prune_path),
|
|
1113
|
+
}
|
|
1114
|
+
if order_path:
|
|
1115
|
+
result["order"] = Path(order_path)
|
|
1116
|
+
if order_prune_path:
|
|
1117
|
+
result["order_prune"] = Path(order_prune_path)
|
|
1118
|
+
return result
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
__all__ = ["apply_library_replacement"]
|
|
1122
|
+
__all__ = ["apply_library_replacement"]
|