jarvis-ai-assistant 0.3.30__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +289 -87
- jarvis/jarvis_agent/agent_manager.py +17 -8
- jarvis/jarvis_agent/edit_file_handler.py +374 -86
- jarvis/jarvis_agent/event_bus.py +1 -1
- jarvis/jarvis_agent/file_context_handler.py +79 -0
- jarvis/jarvis_agent/jarvis.py +601 -43
- jarvis/jarvis_agent/main.py +32 -2
- jarvis/jarvis_agent/rewrite_file_handler.py +141 -0
- jarvis/jarvis_agent/run_loop.py +38 -5
- jarvis/jarvis_agent/share_manager.py +8 -1
- jarvis/jarvis_agent/stdio_redirect.py +295 -0
- jarvis/jarvis_agent/task_analyzer.py +5 -2
- jarvis/jarvis_agent/task_planner.py +496 -0
- jarvis/jarvis_agent/utils.py +5 -1
- jarvis/jarvis_agent/web_bridge.py +189 -0
- jarvis/jarvis_agent/web_output_sink.py +53 -0
- jarvis/jarvis_agent/web_server.py +751 -0
- jarvis/jarvis_c2rust/__init__.py +26 -0
- jarvis/jarvis_c2rust/cli.py +613 -0
- jarvis/jarvis_c2rust/collector.py +258 -0
- jarvis/jarvis_c2rust/library_replacer.py +1122 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +1300 -0
- jarvis/jarvis_c2rust/optimizer.py +960 -0
- jarvis/jarvis_c2rust/scanner.py +1681 -0
- jarvis/jarvis_c2rust/transpiler.py +2325 -0
- jarvis/jarvis_code_agent/build_validation_config.py +133 -0
- jarvis/jarvis_code_agent/code_agent.py +1171 -94
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +62 -0
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +102 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +59 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +69 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +50 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +93 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +129 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +54 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +154 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +363 -0
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +89 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +31 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +231 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +183 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +219 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +209 -0
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +451 -0
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +77 -0
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +48 -0
- jarvis/jarvis_code_agent/lint.py +270 -8
- jarvis/jarvis_code_agent/utils.py +142 -0
- jarvis/jarvis_code_analysis/code_review.py +483 -569
- jarvis/jarvis_data/config_schema.json +97 -8
- jarvis/jarvis_git_utils/git_commiter.py +38 -26
- jarvis/jarvis_mcp/sse_mcp_client.py +2 -2
- jarvis/jarvis_mcp/stdio_mcp_client.py +1 -1
- jarvis/jarvis_memory_organizer/memory_organizer.py +1 -1
- jarvis/jarvis_multi_agent/__init__.py +239 -25
- jarvis/jarvis_multi_agent/main.py +37 -1
- jarvis/jarvis_platform/base.py +103 -51
- jarvis/jarvis_platform/openai.py +26 -1
- jarvis/jarvis_platform/yuanbao.py +1 -1
- jarvis/jarvis_platform_manager/service.py +2 -2
- jarvis/jarvis_rag/cli.py +4 -4
- jarvis/jarvis_sec/__init__.py +3605 -0
- jarvis/jarvis_sec/checkers/__init__.py +32 -0
- jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
- jarvis/jarvis_sec/cli.py +116 -0
- jarvis/jarvis_sec/report.py +257 -0
- jarvis/jarvis_sec/status.py +264 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +219 -0
- jarvis/jarvis_stats/cli.py +1 -1
- jarvis/jarvis_stats/stats.py +1 -1
- jarvis/jarvis_stats/visualizer.py +1 -1
- jarvis/jarvis_tools/cli/main.py +1 -0
- jarvis/jarvis_tools/execute_script.py +46 -9
- jarvis/jarvis_tools/generate_new_tool.py +3 -1
- jarvis/jarvis_tools/read_code.py +275 -12
- jarvis/jarvis_tools/read_symbols.py +141 -0
- jarvis/jarvis_tools/read_webpage.py +5 -3
- jarvis/jarvis_tools/registry.py +73 -35
- jarvis/jarvis_tools/search_web.py +15 -11
- jarvis/jarvis_tools/sub_agent.py +24 -42
- jarvis/jarvis_tools/sub_code_agent.py +14 -13
- jarvis/jarvis_tools/virtual_tty.py +1 -1
- jarvis/jarvis_utils/config.py +187 -35
- jarvis/jarvis_utils/embedding.py +3 -0
- jarvis/jarvis_utils/git_utils.py +181 -6
- jarvis/jarvis_utils/globals.py +3 -3
- jarvis/jarvis_utils/http.py +1 -1
- jarvis/jarvis_utils/input.py +78 -2
- jarvis/jarvis_utils/methodology.py +25 -19
- jarvis/jarvis_utils/utils.py +644 -359
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/METADATA +85 -1
- jarvis_ai_assistant-0.7.0.dist-info/RECORD +192 -0
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/entry_points.txt +4 -0
- jarvis/jarvis_agent/config.py +0 -92
- jarvis/jarvis_tools/edit_file.py +0 -179
- jarvis/jarvis_tools/rewrite_file.py +0 -191
- jarvis_ai_assistant-0.3.30.dist-info/RECORD +0 -137
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,960 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Rust 代码优化器:对转译或生成后的 Rust 项目执行若干保守优化步骤。
|
|
4
|
+
|
|
5
|
+
目标与策略(保守、可回退):
|
|
6
|
+
1) unsafe 清理:
|
|
7
|
+
- 识别可移除的 `unsafe { ... }` 包裹,尝试移除后执行 `cargo check`
|
|
8
|
+
- 若编译失败,回滚该处修改,并在该块或相邻函数前添加 `/// SAFETY: ` 说明
|
|
9
|
+
2) 代码结构优化(重复代码提示/最小消除):
|
|
10
|
+
- 基于文本的简单函数重复检测(签名 + 主体文本),为重复体添加 TODO 文档提示
|
|
11
|
+
- 在 CodeAgent 阶段,允许最小化抽取公共辅助函数以消除重复(若易于安全完成)
|
|
12
|
+
3) 可见性优化(尽可能最小可见性):
|
|
13
|
+
- 对 `pub fn` 尝试降为 `pub(crate) fn`,变更后执行 `cargo check` 验证
|
|
14
|
+
- 若失败回滚
|
|
15
|
+
- 在 CodeAgent 阶段,允许在不破坏 API 的前提下进一步减少可见性(保持对外接口为 pub)
|
|
16
|
+
4) 文档补充:
|
|
17
|
+
- 为缺少文档的模块/函数添加基础占位文档
|
|
18
|
+
|
|
19
|
+
实现说明:
|
|
20
|
+
- 以文件为粒度进行优化,每次微小变更均伴随 cargo check 进行验证
|
|
21
|
+
- 所有修改保留最小必要的文本变动,失败立即回滚
|
|
22
|
+
- 结果摘要与日志输出到 <crate_dir>/.jarvis/c2rust/optimize_report.json
|
|
23
|
+
- 进度记录(断点续跑):<crate_dir>/.jarvis/c2rust/optimize_progress.json
|
|
24
|
+
- 字段 processed: 已优化完成的文件(相对 crate 根的路径,posix 斜杠)
|
|
25
|
+
|
|
26
|
+
限制:
|
|
27
|
+
- 未依赖 rust-analyzer/LSP,主要使用静态文本 + `cargo check` 验证
|
|
28
|
+
- 复杂语法与宏、条件编译等情况下可能存在漏检或误判,将尽量保守处理
|
|
29
|
+
- 提供 CodeAgent 驱动的“整体优化”阶段,参考 transpiler 的 CodeAgent 使用方式;该阶段输出补丁并进行一次 cargo check 验证
|
|
30
|
+
|
|
31
|
+
使用入口:
|
|
32
|
+
- optimize_project(crate_dir: Optional[Path], ...) 作为对外简单入口
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import json
|
|
38
|
+
import os
|
|
39
|
+
import re
|
|
40
|
+
import shutil
|
|
41
|
+
import subprocess
|
|
42
|
+
from dataclasses import dataclass, asdict
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from typing import Dict, List, Optional, Tuple, Iterable, Set
|
|
45
|
+
import fnmatch
|
|
46
|
+
|
|
47
|
+
# 引入 CodeAgent(参考 transpiler)
|
|
48
|
+
from jarvis.jarvis_code_agent.code_agent import CodeAgent
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class OptimizeOptions:
|
|
53
|
+
enable_unsafe_cleanup: bool = True
|
|
54
|
+
enable_structure_opt: bool = True
|
|
55
|
+
enable_visibility_opt: bool = True
|
|
56
|
+
enable_doc_opt: bool = True
|
|
57
|
+
max_checks: int = 0 # 0 表示不限;用于限制 cargo check 次数(防止过慢)
|
|
58
|
+
dry_run: bool = False
|
|
59
|
+
# 大项目分批优化控制
|
|
60
|
+
include_patterns: Optional[str] = None # 逗号分隔的 glob,相对 crate 根(支持 src/**.rs)
|
|
61
|
+
exclude_patterns: Optional[str] = None # 逗号分隔的 glob
|
|
62
|
+
max_files: int = 0 # 本次最多处理的文件数(0 不限)
|
|
63
|
+
resume: bool = True # 断点续跑:跳过已处理文件
|
|
64
|
+
reset_progress: bool = False # 重置进度(清空 processed 列表)
|
|
65
|
+
build_fix_retries: int = 3 # 构建失败时的修复重试次数
|
|
66
|
+
# Git 保护:优化前快照 commit,失败时自动 reset 回快照
|
|
67
|
+
git_guard: bool = True
|
|
68
|
+
llm_group: Optional[str] = None
|
|
69
|
+
cargo_test_timeout: int = 300 # cargo test 超时(秒)
|
|
70
|
+
non_interactive: bool = True
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class OptimizeStats:
|
|
75
|
+
files_scanned: int = 0
|
|
76
|
+
unsafe_removed: int = 0
|
|
77
|
+
unsafe_annotated: int = 0
|
|
78
|
+
duplicates_tagged: int = 0
|
|
79
|
+
visibility_downgraded: int = 0
|
|
80
|
+
docs_added: int = 0
|
|
81
|
+
cargo_checks: int = 0
|
|
82
|
+
errors: List[str] = None
|
|
83
|
+
|
|
84
|
+
def __post_init__(self):
|
|
85
|
+
if self.errors is None:
|
|
86
|
+
self.errors = []
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _run_cmd(cmd: List[str], cwd: Path, env: Optional[Dict[str, str]] = None, timeout: Optional[int] = None) -> Tuple[int, str, str]:
|
|
90
|
+
p = subprocess.Popen(
|
|
91
|
+
cmd,
|
|
92
|
+
cwd=str(cwd),
|
|
93
|
+
stdout=subprocess.PIPE,
|
|
94
|
+
stderr=subprocess.PIPE,
|
|
95
|
+
text=True,
|
|
96
|
+
env=dict(os.environ, **(env or {})),
|
|
97
|
+
)
|
|
98
|
+
try:
|
|
99
|
+
out, err = p.communicate(timeout=timeout if timeout and timeout > 0 else None)
|
|
100
|
+
return p.returncode, out, err
|
|
101
|
+
except subprocess.TimeoutExpired:
|
|
102
|
+
p.kill()
|
|
103
|
+
out, err = p.communicate()
|
|
104
|
+
err_msg = f"Command '{' '.join(cmd)}' timed out after {timeout} seconds."
|
|
105
|
+
if err:
|
|
106
|
+
err_msg += f"\n{err}"
|
|
107
|
+
return -1, out, err_msg
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _cargo_check(crate_dir: Path, stats: OptimizeStats, max_checks: int, timeout: Optional[int] = None) -> Tuple[bool, str]:
|
|
111
|
+
# 统一使用 cargo test 作为验证手段
|
|
112
|
+
if max_checks and stats.cargo_checks >= max_checks:
|
|
113
|
+
return False, "cargo test budget exhausted"
|
|
114
|
+
code, out, err = _run_cmd(["cargo", "test", "-q"], crate_dir, timeout=timeout)
|
|
115
|
+
stats.cargo_checks += 1
|
|
116
|
+
ok = code == 0
|
|
117
|
+
diag = err.strip() or out.strip()
|
|
118
|
+
# 取首行作为摘要
|
|
119
|
+
first_line = next((ln for ln in diag.splitlines() if ln.strip()), "")
|
|
120
|
+
return ok, first_line
|
|
121
|
+
|
|
122
|
+
def _cargo_check_full(crate_dir: Path, stats: OptimizeStats, max_checks: int, timeout: Optional[int] = None) -> Tuple[bool, str]:
|
|
123
|
+
"""
|
|
124
|
+
执行 cargo test,返回是否成功与完整输出(stdout+stderr)。
|
|
125
|
+
会计入 stats.cargo_checks,并受 max_checks 预算限制。
|
|
126
|
+
"""
|
|
127
|
+
if max_checks and stats.cargo_checks >= max_checks:
|
|
128
|
+
return False, "cargo test budget exhausted"
|
|
129
|
+
try:
|
|
130
|
+
res = subprocess.run(
|
|
131
|
+
["cargo", "test", "-q"],
|
|
132
|
+
capture_output=True,
|
|
133
|
+
text=True,
|
|
134
|
+
check=False,
|
|
135
|
+
cwd=str(crate_dir),
|
|
136
|
+
timeout=timeout if timeout and timeout > 0 else None,
|
|
137
|
+
)
|
|
138
|
+
stats.cargo_checks += 1
|
|
139
|
+
ok = (res.returncode == 0)
|
|
140
|
+
out = (res.stdout or "")
|
|
141
|
+
err = (res.stderr or "")
|
|
142
|
+
msg = (out + ("\n" + err if err else "")).strip()
|
|
143
|
+
return ok, msg
|
|
144
|
+
except subprocess.TimeoutExpired as e:
|
|
145
|
+
stats.cargo_checks += 1
|
|
146
|
+
out_s = e.stdout.decode("utf-8", errors="ignore") if e.stdout else ""
|
|
147
|
+
err_s = e.stderr.decode("utf-8", errors="ignore") if e.stderr else ""
|
|
148
|
+
msg = f"cargo test timed out after {timeout} seconds"
|
|
149
|
+
full_output = (out_s + ("\n" + err_s if err_s else "")).strip()
|
|
150
|
+
if full_output:
|
|
151
|
+
msg += f"\nOutput:\n{full_output}"
|
|
152
|
+
return False, msg
|
|
153
|
+
except Exception as e:
|
|
154
|
+
stats.cargo_checks += 1
|
|
155
|
+
return False, f"cargo test exception: {e}"
|
|
156
|
+
|
|
157
|
+
def _git_is_repo(root: Path) -> bool:
|
|
158
|
+
try:
|
|
159
|
+
code, out, err = _run_cmd(["git", "rev-parse", "--is-inside-work-tree"], root)
|
|
160
|
+
return code == 0 and (out.strip() == "true" or (not out.strip() and "true" in (err or "")))
|
|
161
|
+
except Exception:
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
def _git_toplevel(start: Path) -> Optional[Path]:
|
|
165
|
+
"""
|
|
166
|
+
返回包含 start 的 Git 仓库根目录(--show-toplevel)。若不在仓库中则返回 None。
|
|
167
|
+
"""
|
|
168
|
+
try:
|
|
169
|
+
code, out, err = _run_cmd(["git", "rev-parse", "--show-toplevel"], start)
|
|
170
|
+
if code == 0:
|
|
171
|
+
p = (out or "").strip()
|
|
172
|
+
if p:
|
|
173
|
+
return Path(p)
|
|
174
|
+
return None
|
|
175
|
+
except Exception:
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
def _git_head_commit(root: Path) -> Optional[str]:
|
|
179
|
+
try:
|
|
180
|
+
code, out, err = _run_cmd(["git", "rev-parse", "--verify", "HEAD"], root)
|
|
181
|
+
if code == 0:
|
|
182
|
+
return out.strip()
|
|
183
|
+
return None
|
|
184
|
+
except Exception:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
def _git_reset_hard(root: Path, commit: str) -> bool:
|
|
188
|
+
try:
|
|
189
|
+
code, _, _ = _run_cmd(["git", "reset", "--hard", commit], root)
|
|
190
|
+
if code != 0:
|
|
191
|
+
return False
|
|
192
|
+
return True
|
|
193
|
+
except Exception:
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _iter_rust_files(crate_dir: Path) -> Iterable[Path]:
|
|
198
|
+
src = crate_dir / "src"
|
|
199
|
+
if not src.exists():
|
|
200
|
+
# 仍尝试遍历整个 crate 目录,但优先 src
|
|
201
|
+
yield from crate_dir.rglob("*.rs")
|
|
202
|
+
return
|
|
203
|
+
# 遍历 src 优先
|
|
204
|
+
yield from src.rglob("*.rs")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _read_file(path: Path) -> str:
|
|
208
|
+
return path.read_text(encoding="utf-8")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _write_file(path: Path, content: str) -> None:
|
|
212
|
+
path.write_text(content, encoding="utf-8")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _backup_file(path: Path) -> Path:
|
|
216
|
+
bak = path.with_suffix(path.suffix + ".bak_opt")
|
|
217
|
+
shutil.copy2(path, bak)
|
|
218
|
+
return bak
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _restore_file_from_backup(path: Path, backup: Path) -> None:
|
|
222
|
+
shutil.move(str(backup), str(path))
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _remove_backup(backup: Path) -> None:
|
|
226
|
+
if backup.exists():
|
|
227
|
+
backup.unlink(missing_ok=True)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _ensure_report_dir(crate_dir: Path) -> Path:
|
|
231
|
+
report_dir = crate_dir / ".jarvis" / "c2rust"
|
|
232
|
+
report_dir.mkdir(parents=True, exist_ok=True)
|
|
233
|
+
return report_dir
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def detect_crate_dir(preferred: Optional[Path]) -> Path:
|
|
237
|
+
"""
|
|
238
|
+
选择 crate 目录策略:
|
|
239
|
+
- 若提供 preferred 且包含 Cargo.toml,则使用
|
|
240
|
+
- 否则:优先 <cwd>/<cwd.name>_rs;若存在 Cargo.toml 则用之
|
|
241
|
+
- 否则:在当前目录下递归寻找第一个包含 Cargo.toml 的目录
|
|
242
|
+
- 若失败:若当前目录有 Cargo.toml 则返回当前目录,否则抛错
|
|
243
|
+
"""
|
|
244
|
+
if preferred:
|
|
245
|
+
preferred = preferred.resolve()
|
|
246
|
+
if (preferred / "Cargo.toml").exists():
|
|
247
|
+
return preferred
|
|
248
|
+
|
|
249
|
+
cwd = Path(".").resolve()
|
|
250
|
+
candidate = cwd / f"{cwd.name}_rs"
|
|
251
|
+
if (candidate / "Cargo.toml").exists():
|
|
252
|
+
return candidate
|
|
253
|
+
|
|
254
|
+
# 搜索第一个包含 Cargo.toml 的目录(限制深度2以避免过慢)
|
|
255
|
+
for p in [cwd] + [d for d in cwd.iterdir() if d.is_dir()]:
|
|
256
|
+
if (p / "Cargo.toml").exists():
|
|
257
|
+
return p
|
|
258
|
+
|
|
259
|
+
if (cwd / "Cargo.toml").exists():
|
|
260
|
+
return cwd
|
|
261
|
+
raise FileNotFoundError("未找到 Cargo.toml,对应 crate 目录无法确定。")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class Optimizer:
|
|
265
|
+
def __init__(self, crate_dir: Path, options: OptimizeOptions):
|
|
266
|
+
self.crate_dir = crate_dir
|
|
267
|
+
self.options = options
|
|
268
|
+
self.stats = OptimizeStats()
|
|
269
|
+
# 进度文件
|
|
270
|
+
self.report_dir = _ensure_report_dir(self.crate_dir)
|
|
271
|
+
self.progress_path = self.report_dir / "optimize_progress.json"
|
|
272
|
+
self.processed: Set[str] = set()
|
|
273
|
+
self._target_files: List[Path] = []
|
|
274
|
+
self._load_or_reset_progress()
|
|
275
|
+
self._last_snapshot_commit: Optional[str] = None
|
|
276
|
+
self.log_prefix = "[c2rust-优化器]"
|
|
277
|
+
|
|
278
|
+
def _snapshot_commit(self) -> None:
|
|
279
|
+
"""
|
|
280
|
+
在启用 git_guard 时记录当前 HEAD commit(仅记录,不提交未暂存更改)。
|
|
281
|
+
统一在仓库根目录执行 git 命令,避免子目录导致的意外。
|
|
282
|
+
"""
|
|
283
|
+
if not self.options.git_guard:
|
|
284
|
+
return
|
|
285
|
+
try:
|
|
286
|
+
repo_root = _git_toplevel(self.crate_dir)
|
|
287
|
+
if repo_root is None:
|
|
288
|
+
return
|
|
289
|
+
head = _git_head_commit(repo_root)
|
|
290
|
+
if head:
|
|
291
|
+
self._last_snapshot_commit = head
|
|
292
|
+
except Exception:
|
|
293
|
+
# 忽略快照失败,不阻塞流程
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
def _reset_to_snapshot(self) -> bool:
|
|
297
|
+
"""
|
|
298
|
+
在启用 git_guard 且存在快照时,将工作区 reset --hard 回快照。
|
|
299
|
+
统一在仓库根目录执行 git 命令,避免子目录导致的意外。
|
|
300
|
+
返回是否成功执行 reset。
|
|
301
|
+
"""
|
|
302
|
+
if not self.options.git_guard:
|
|
303
|
+
return False
|
|
304
|
+
snap = getattr(self, "_last_snapshot_commit", None)
|
|
305
|
+
if not snap:
|
|
306
|
+
return False
|
|
307
|
+
repo_root = _git_toplevel(self.crate_dir)
|
|
308
|
+
if repo_root is None:
|
|
309
|
+
return False
|
|
310
|
+
ok = _git_reset_hard(repo_root, snap)
|
|
311
|
+
return ok
|
|
312
|
+
|
|
313
|
+
# ---------- 进度管理与文件选择 ----------
|
|
314
|
+
|
|
315
|
+
def _load_or_reset_progress(self) -> None:
|
|
316
|
+
if self.options.reset_progress:
|
|
317
|
+
try:
|
|
318
|
+
self.progress_path.write_text(json.dumps({"processed": []}, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
319
|
+
except Exception:
|
|
320
|
+
pass
|
|
321
|
+
self.processed = set()
|
|
322
|
+
return
|
|
323
|
+
try:
|
|
324
|
+
if self.progress_path.exists():
|
|
325
|
+
obj = json.loads(self.progress_path.read_text(encoding="utf-8"))
|
|
326
|
+
if isinstance(obj, dict):
|
|
327
|
+
arr = obj.get("processed") or []
|
|
328
|
+
if isinstance(arr, list):
|
|
329
|
+
self.processed = {str(x) for x in arr if isinstance(x, str)}
|
|
330
|
+
else:
|
|
331
|
+
self.processed = set()
|
|
332
|
+
except Exception:
|
|
333
|
+
self.processed = set()
|
|
334
|
+
|
|
335
|
+
def _save_progress_for_batch(self, files: List[Path]) -> None:
|
|
336
|
+
try:
|
|
337
|
+
rels = []
|
|
338
|
+
for p in files:
|
|
339
|
+
try:
|
|
340
|
+
rel = p.resolve().relative_to(self.crate_dir.resolve()).as_posix()
|
|
341
|
+
except Exception:
|
|
342
|
+
rel = str(p)
|
|
343
|
+
rels.append(rel)
|
|
344
|
+
self.processed.update(rels)
|
|
345
|
+
data = {"processed": sorted(self.processed)}
|
|
346
|
+
self.progress_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
347
|
+
except Exception:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
def _parse_patterns(self, s: Optional[str]) -> List[str]:
|
|
351
|
+
if not s or not isinstance(s, str):
|
|
352
|
+
return []
|
|
353
|
+
parts = [x.strip() for x in s.replace("\n", ",").split(",")]
|
|
354
|
+
return [x for x in parts if x]
|
|
355
|
+
|
|
356
|
+
def _match_any(self, rel: str, patterns: List[str]) -> bool:
|
|
357
|
+
if not patterns:
|
|
358
|
+
return False
|
|
359
|
+
return any(fnmatch.fnmatch(rel, pat) for pat in patterns)
|
|
360
|
+
|
|
361
|
+
def _compute_target_files(self) -> List[Path]:
|
|
362
|
+
include = self._parse_patterns(self.options.include_patterns)
|
|
363
|
+
exclude = self._parse_patterns(self.options.exclude_patterns)
|
|
364
|
+
maxn = int(self.options.max_files or 0)
|
|
365
|
+
take: List[Path] = []
|
|
366
|
+
for p in sorted(_iter_rust_files(self.crate_dir), key=lambda x: x.as_posix()):
|
|
367
|
+
try:
|
|
368
|
+
rel = p.resolve().relative_to(self.crate_dir.resolve()).as_posix()
|
|
369
|
+
except Exception:
|
|
370
|
+
rel = p.as_posix()
|
|
371
|
+
# include 过滤(若提供,则必须命中其一)
|
|
372
|
+
if include and not self._match_any(rel, include):
|
|
373
|
+
continue
|
|
374
|
+
# exclude 过滤
|
|
375
|
+
if exclude and self._match_any(rel, exclude):
|
|
376
|
+
continue
|
|
377
|
+
# resume:跳过已处理文件
|
|
378
|
+
if self.options.resume and rel in self.processed:
|
|
379
|
+
continue
|
|
380
|
+
take.append(p)
|
|
381
|
+
if maxn > 0 and len(take) >= maxn:
|
|
382
|
+
break
|
|
383
|
+
self._target_files = take
|
|
384
|
+
return take
|
|
385
|
+
|
|
386
|
+
# ---------- 主运行入口 ----------
|
|
387
|
+
|
|
388
|
+
def run(self) -> OptimizeStats:
|
|
389
|
+
report_path = self.report_dir / "optimize_report.json"
|
|
390
|
+
print(f"{self.log_prefix} 开始优化 Crate: {self.crate_dir}")
|
|
391
|
+
try:
|
|
392
|
+
# 计算本次批次的目标文件列表(按 include/exclude/resume/max_files)
|
|
393
|
+
targets = self._compute_target_files()
|
|
394
|
+
if not targets:
|
|
395
|
+
# 无文件可处理:仍然写出报告并返回
|
|
396
|
+
print(f"{self.log_prefix} 根据当前选项,无新文件需要处理。")
|
|
397
|
+
pass
|
|
398
|
+
else:
|
|
399
|
+
print(f"{self.log_prefix} 本次批次发现 {len(targets)} 个待处理文件。")
|
|
400
|
+
# 批次开始前记录快照
|
|
401
|
+
self._snapshot_commit()
|
|
402
|
+
|
|
403
|
+
if self.options.enable_unsafe_cleanup:
|
|
404
|
+
# 步骤前快照
|
|
405
|
+
print(f"\n{self.log_prefix} 第 1 步:unsafe 清理")
|
|
406
|
+
self._snapshot_commit()
|
|
407
|
+
self._opt_unsafe_cleanup(targets)
|
|
408
|
+
# Step build verification
|
|
409
|
+
if not self.options.dry_run:
|
|
410
|
+
print(f"{self.log_prefix} unsafe 清理后,正在验证构建...")
|
|
411
|
+
ok, diag_full = _cargo_check_full(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
412
|
+
if not ok:
|
|
413
|
+
# 循环最小修复
|
|
414
|
+
fixed = self._build_fix_loop(targets)
|
|
415
|
+
if not fixed:
|
|
416
|
+
first = (diag_full.splitlines()[0] if isinstance(diag_full, str) and diag_full else "failed")
|
|
417
|
+
self.stats.errors.append(f"test after unsafe_cleanup failed: {first}")
|
|
418
|
+
# 回滚到快照并结束
|
|
419
|
+
try:
|
|
420
|
+
self._reset_to_snapshot()
|
|
421
|
+
finally:
|
|
422
|
+
return self.stats
|
|
423
|
+
|
|
424
|
+
if self.options.enable_structure_opt:
|
|
425
|
+
# 步骤前快照
|
|
426
|
+
print(f"\n{self.log_prefix} 第 2 步:结构优化 (重复代码检测)")
|
|
427
|
+
self._snapshot_commit()
|
|
428
|
+
self._opt_structure_duplicates(targets)
|
|
429
|
+
# Step build verification
|
|
430
|
+
if not self.options.dry_run:
|
|
431
|
+
print(f"{self.log_prefix} 结构优化后,正在验证构建...")
|
|
432
|
+
ok, diag_full = _cargo_check_full(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
433
|
+
if not ok:
|
|
434
|
+
fixed = self._build_fix_loop(targets)
|
|
435
|
+
if not fixed:
|
|
436
|
+
first = (diag_full.splitlines()[0] if isinstance(diag_full, str) and diag_full else "failed")
|
|
437
|
+
self.stats.errors.append(f"test after structure_opt failed: {first}")
|
|
438
|
+
try:
|
|
439
|
+
self._reset_to_snapshot()
|
|
440
|
+
finally:
|
|
441
|
+
return self.stats
|
|
442
|
+
|
|
443
|
+
if self.options.enable_visibility_opt:
|
|
444
|
+
# 步骤前快照
|
|
445
|
+
print(f"\n{self.log_prefix} 第 3 步:可见性优化")
|
|
446
|
+
self._snapshot_commit()
|
|
447
|
+
self._opt_visibility(targets)
|
|
448
|
+
# Step build verification
|
|
449
|
+
if not self.options.dry_run:
|
|
450
|
+
print(f"{self.log_prefix} 可见性优化后,正在验证构建...")
|
|
451
|
+
ok, diag_full = _cargo_check_full(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
452
|
+
if not ok:
|
|
453
|
+
fixed = self._build_fix_loop(targets)
|
|
454
|
+
if not fixed:
|
|
455
|
+
first = (diag_full.splitlines()[0] if isinstance(diag_full, str) and diag_full else "failed")
|
|
456
|
+
self.stats.errors.append(f"test after visibility_opt failed: {first}")
|
|
457
|
+
try:
|
|
458
|
+
self._reset_to_snapshot()
|
|
459
|
+
finally:
|
|
460
|
+
return self.stats
|
|
461
|
+
|
|
462
|
+
if self.options.enable_doc_opt:
|
|
463
|
+
# 步骤前快照
|
|
464
|
+
print(f"\n{self.log_prefix} 第 4 步:文档补充")
|
|
465
|
+
self._snapshot_commit()
|
|
466
|
+
self._opt_docs(targets)
|
|
467
|
+
# Step build verification
|
|
468
|
+
if not self.options.dry_run:
|
|
469
|
+
print(f"{self.log_prefix} 文档补充后,正在验证构建...")
|
|
470
|
+
ok, diag_full = _cargo_check_full(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
471
|
+
if not ok:
|
|
472
|
+
fixed = self._build_fix_loop(targets)
|
|
473
|
+
if not fixed:
|
|
474
|
+
first = (diag_full.splitlines()[0] if isinstance(diag_full, str) and diag_full else "failed")
|
|
475
|
+
self.stats.errors.append(f"test after doc_opt failed: {first}")
|
|
476
|
+
try:
|
|
477
|
+
self._reset_to_snapshot()
|
|
478
|
+
finally:
|
|
479
|
+
return self.stats
|
|
480
|
+
|
|
481
|
+
# CodeAgent 驱动的整体优化(参考 transpiler 使用模式)
|
|
482
|
+
# 在静态优化后执行一次 CodeAgent 以最小化进一步提升(可选:dry_run 时跳过)
|
|
483
|
+
if not self.options.dry_run:
|
|
484
|
+
try:
|
|
485
|
+
print(f"\n{self.log_prefix} 第 5 步:CodeAgent 整体优化")
|
|
486
|
+
self._codeagent_optimize_crate(targets)
|
|
487
|
+
except Exception as _e:
|
|
488
|
+
self.stats.errors.append(f"codeagent: {_e}")
|
|
489
|
+
|
|
490
|
+
# 标记本批次文件为“已处理”
|
|
491
|
+
self._save_progress_for_batch(targets)
|
|
492
|
+
|
|
493
|
+
except Exception as e:
|
|
494
|
+
self.stats.errors.append(f"fatal: {e}")
|
|
495
|
+
finally:
|
|
496
|
+
# 写出简要报告
|
|
497
|
+
print(f"{self.log_prefix} 优化流程结束。报告已生成于: {report_path.relative_to(Path.cwd())}")
|
|
498
|
+
try:
|
|
499
|
+
_write_file(report_path, json.dumps(asdict(self.stats), ensure_ascii=False, indent=2))
|
|
500
|
+
except Exception:
|
|
501
|
+
pass
|
|
502
|
+
return self.stats
|
|
503
|
+
|
|
504
|
+
# ========== 1) unsafe cleanup ==========
|
|
505
|
+
|
|
506
|
+
_re_unsafe_block = re.compile(r"\bunsafe\s*\{", re.MULTILINE)
|
|
507
|
+
|
|
508
|
+
def _opt_unsafe_cleanup(self, files: List[Path]) -> None:
|
|
509
|
+
for i, path in enumerate(files):
|
|
510
|
+
try:
|
|
511
|
+
rel_path = path.relative_to(self.crate_dir)
|
|
512
|
+
except ValueError:
|
|
513
|
+
rel_path = path
|
|
514
|
+
print(f"{self.log_prefix} [unsafe 清理] 正在处理文件 {i + 1}/{len(files)}: {rel_path}")
|
|
515
|
+
try:
|
|
516
|
+
content = _read_file(path)
|
|
517
|
+
except Exception:
|
|
518
|
+
continue
|
|
519
|
+
self.stats.files_scanned += 1
|
|
520
|
+
|
|
521
|
+
# 简单逐处尝试:每次仅移除一个 unsafe 以保持回滚粒度
|
|
522
|
+
pos = 0
|
|
523
|
+
while True:
|
|
524
|
+
m = self._re_unsafe_block.search(content, pos)
|
|
525
|
+
if not m:
|
|
526
|
+
break
|
|
527
|
+
|
|
528
|
+
# 准备试移除(仅移除 "unsafe " 关键字,保留后续块)
|
|
529
|
+
start, end = m.span()
|
|
530
|
+
trial = content[:start] + "{" + content[end:] # 将 "unsafe {" 替换为 "{"
|
|
531
|
+
|
|
532
|
+
if self.options.dry_run:
|
|
533
|
+
# 仅统计
|
|
534
|
+
self.stats.unsafe_removed += 1 # 计为潜在可移除
|
|
535
|
+
pos = start + 1
|
|
536
|
+
continue
|
|
537
|
+
|
|
538
|
+
# 备份并写入尝试版
|
|
539
|
+
bak = _backup_file(path)
|
|
540
|
+
try:
|
|
541
|
+
_write_file(path, trial)
|
|
542
|
+
ok, diag = _cargo_check(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
543
|
+
if ok:
|
|
544
|
+
# 保留修改
|
|
545
|
+
content = trial
|
|
546
|
+
self.stats.unsafe_removed += 1
|
|
547
|
+
# 不需要移动 pos 太多,继续搜索后续位置
|
|
548
|
+
pos = start + 1
|
|
549
|
+
else:
|
|
550
|
+
# 回滚,并在 unsafe 前添加说明
|
|
551
|
+
_restore_file_from_backup(path, bak)
|
|
552
|
+
content = _read_file(path) # 还原后的内容
|
|
553
|
+
self._annotate_safety_comment(path, content, start, diag)
|
|
554
|
+
# 重新读取注释后的文本,以便继续
|
|
555
|
+
content = _read_file(path)
|
|
556
|
+
self.stats.unsafe_annotated += 1
|
|
557
|
+
pos = start + 1
|
|
558
|
+
finally:
|
|
559
|
+
_remove_backup(bak)
|
|
560
|
+
|
|
561
|
+
# 若最后的 content 与磁盘不同步(dry_run 时不会),这里无需写回
|
|
562
|
+
|
|
563
|
+
def _annotate_safety_comment(self, path: Path, content: str, unsafe_pos: int, diag: str) -> None:
|
|
564
|
+
"""
|
|
565
|
+
在 unsafe 块前注入一行文档注释,格式:
|
|
566
|
+
/// SAFETY: 自动清理失败,保留 unsafe。原因摘要: <diag>
|
|
567
|
+
"""
|
|
568
|
+
# 寻找 unsafe 所在行首
|
|
569
|
+
line_start = content.rfind("\n", 0, unsafe_pos)
|
|
570
|
+
if line_start == -1:
|
|
571
|
+
insert_at = 0
|
|
572
|
+
else:
|
|
573
|
+
insert_at = line_start + 1
|
|
574
|
+
|
|
575
|
+
annotation = f'/// SAFETY: 自动清理失败,保留 unsafe。原因摘要: {diag}\n'
|
|
576
|
+
new_content = content[:insert_at] + annotation + content[insert_at:]
|
|
577
|
+
|
|
578
|
+
if not self.options.dry_run:
|
|
579
|
+
_write_file(path, new_content)
|
|
580
|
+
|
|
581
|
+
# ========== 2) structure duplicates ==========
|
|
582
|
+
|
|
583
|
+
_re_fn = re.compile(
|
|
584
|
+
r"(?P<leading>\s*(?:pub(?:\([^\)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:extern\s+\"[^\"]*\"\s+)?fn\s+"
|
|
585
|
+
r"(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:->\s*[^ \t\r\n\{]+)?\s*)\{",
|
|
586
|
+
re.MULTILINE,
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
def _opt_structure_duplicates(self, files: List[Path]) -> None:
|
|
590
|
+
# 建立函数签名+主体的简易哈希,重复则为后出现者添加 TODO 注释
|
|
591
|
+
print(f"{self.log_prefix} [结构优化] 正在扫描 {len(files)} 个文件以查找重复函数...")
|
|
592
|
+
seen: Dict[str, Tuple[Path, int]] = {}
|
|
593
|
+
for path in files:
|
|
594
|
+
try:
|
|
595
|
+
content = _read_file(path)
|
|
596
|
+
except Exception:
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
for m in self._re_fn.finditer(content):
|
|
600
|
+
name = m.group("name")
|
|
601
|
+
body_start = m.end() - 1 # at '{'
|
|
602
|
+
body_end = self._find_matching_brace(content, body_start)
|
|
603
|
+
if body_end is None:
|
|
604
|
+
continue
|
|
605
|
+
sig = m.group(0)[: m.group(0).rfind("{")].strip()
|
|
606
|
+
body = content[body_start: body_end + 1]
|
|
607
|
+
key = f"{name}::{self._normalize_ws(sig)}::{self._normalize_ws(body)}"
|
|
608
|
+
if key not in seen:
|
|
609
|
+
seen[key] = (path, m.start())
|
|
610
|
+
else:
|
|
611
|
+
# 重复:在该函数前添加 TODO
|
|
612
|
+
if self.options.dry_run:
|
|
613
|
+
self.stats.duplicates_tagged += 1
|
|
614
|
+
continue
|
|
615
|
+
bak = _backup_file(path)
|
|
616
|
+
try:
|
|
617
|
+
insert_pos = content.rfind("\n", 0, m.start())
|
|
618
|
+
insert_at = 0 if insert_pos == -1 else insert_pos + 1
|
|
619
|
+
origin_path, _ = seen[key]
|
|
620
|
+
try:
|
|
621
|
+
origin_rel = origin_path.resolve().relative_to(self.crate_dir.resolve()).as_posix()
|
|
622
|
+
except Exception:
|
|
623
|
+
origin_rel = origin_path.as_posix()
|
|
624
|
+
todo = f'/// TODO: duplicate of {origin_rel}::{name}\n'
|
|
625
|
+
new_content = content[:insert_at] + todo + content[insert_at:]
|
|
626
|
+
_write_file(path, new_content)
|
|
627
|
+
content = new_content
|
|
628
|
+
self.stats.duplicates_tagged += 1
|
|
629
|
+
finally:
|
|
630
|
+
_remove_backup(bak)
|
|
631
|
+
|
|
632
|
+
def _find_matching_brace(self, s: str, open_pos: int) -> Optional[int]:
|
|
633
|
+
"""
|
|
634
|
+
给定 s[open_pos] == '{',返回匹配的 '}' 位置;简单计数器,忽略字符串/注释的复杂性(保守)
|
|
635
|
+
"""
|
|
636
|
+
if open_pos >= len(s) or s[open_pos] != "{":
|
|
637
|
+
return None
|
|
638
|
+
depth = 0
|
|
639
|
+
for i in range(open_pos, len(s)):
|
|
640
|
+
if s[i] == "{":
|
|
641
|
+
depth += 1
|
|
642
|
+
elif s[i] == "}":
|
|
643
|
+
depth -= 1
|
|
644
|
+
if depth == 0:
|
|
645
|
+
return i
|
|
646
|
+
return None
|
|
647
|
+
|
|
648
|
+
def _normalize_ws(self, s: str) -> str:
|
|
649
|
+
return re.sub(r"\s+", " ", s).strip()
|
|
650
|
+
|
|
651
|
+
# ========== 3) visibility optimization ==========
|
|
652
|
+
|
|
653
|
+
_re_pub_fn = re.compile(
|
|
654
|
+
r"(?P<prefix>\s*)pub\s+fn\s+(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*\(",
|
|
655
|
+
re.MULTILINE,
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
def _opt_visibility(self, files: List[Path]) -> None:
|
|
659
|
+
for i, path in enumerate(files):
|
|
660
|
+
try:
|
|
661
|
+
rel_path = path.relative_to(self.crate_dir)
|
|
662
|
+
except ValueError:
|
|
663
|
+
rel_path = path
|
|
664
|
+
print(f"{self.log_prefix} [可见性优化] 正在处理文件 {i + 1}/{len(files)}: {rel_path}")
|
|
665
|
+
try:
|
|
666
|
+
content = _read_file(path)
|
|
667
|
+
except Exception:
|
|
668
|
+
continue
|
|
669
|
+
|
|
670
|
+
for m in list(self._re_pub_fn.finditer(content)):
|
|
671
|
+
start, end = m.span()
|
|
672
|
+
name = m.group("name")
|
|
673
|
+
candidate = content[:start] + f"{m.group('prefix')}pub(crate) fn {name}(" + content[end:]
|
|
674
|
+
if self.options.dry_run:
|
|
675
|
+
self.stats.visibility_downgraded += 1
|
|
676
|
+
continue
|
|
677
|
+
bak = _backup_file(path)
|
|
678
|
+
try:
|
|
679
|
+
_write_file(path, candidate)
|
|
680
|
+
ok, _ = _cargo_check(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
681
|
+
if ok:
|
|
682
|
+
content = candidate
|
|
683
|
+
self.stats.visibility_downgraded += 1
|
|
684
|
+
else:
|
|
685
|
+
_restore_file_from_backup(path, bak)
|
|
686
|
+
content = _read_file(path)
|
|
687
|
+
finally:
|
|
688
|
+
_remove_backup(bak)
|
|
689
|
+
|
|
690
|
+
# ========== 4) doc augmentation ==========
|
|
691
|
+
|
|
692
|
+
_re_mod_doc = re.compile(r"(?m)^\s*//!") # 顶部模块文档
|
|
693
|
+
_re_any_doc = re.compile(r"(?m)^\s*///")
|
|
694
|
+
|
|
695
|
+
def _opt_docs(self, files: List[Path]) -> None:
|
|
696
|
+
for i, path in enumerate(files):
|
|
697
|
+
try:
|
|
698
|
+
rel_path = path.relative_to(self.crate_dir)
|
|
699
|
+
except ValueError:
|
|
700
|
+
rel_path = path
|
|
701
|
+
print(f"{self.log_prefix} [文档补充] 正在处理文件 {i + 1}/{len(files)}: {rel_path}")
|
|
702
|
+
try:
|
|
703
|
+
content = _read_file(path)
|
|
704
|
+
except Exception:
|
|
705
|
+
continue
|
|
706
|
+
|
|
707
|
+
changed = False
|
|
708
|
+
# 模块级文档:若文件开头不是文档,补充
|
|
709
|
+
if not self._re_mod_doc.search(content[:500]): # 仅检查开头部分
|
|
710
|
+
header = "//! TODO: Add module-level documentation\n"
|
|
711
|
+
content = header + content
|
|
712
|
+
changed = True
|
|
713
|
+
self.stats.docs_added += 1
|
|
714
|
+
|
|
715
|
+
# 函数文档:为未有文档注释的函数前补充
|
|
716
|
+
new_content = []
|
|
717
|
+
last_end = 0
|
|
718
|
+
for m in self._re_fn.finditer(content):
|
|
719
|
+
fn_start = m.start()
|
|
720
|
+
# 检查前一行是否有 /// 文档
|
|
721
|
+
line_start = content.rfind("\n", 0, fn_start)
|
|
722
|
+
prev_line_start = content.rfind("\n", 0, line_start - 1) if line_start > 0 else -1
|
|
723
|
+
segment_start = last_end
|
|
724
|
+
segment_end = line_start + 1 if line_start != -1 else 0
|
|
725
|
+
new_content.append(content[segment_start:segment_end])
|
|
726
|
+
|
|
727
|
+
doc_exists = False
|
|
728
|
+
if line_start != -1:
|
|
729
|
+
prev_line = content[prev_line_start + 1: line_start] if prev_line_start != -1 else content[:line_start]
|
|
730
|
+
if self._re_any_doc.search(prev_line):
|
|
731
|
+
doc_exists = True
|
|
732
|
+
|
|
733
|
+
if not doc_exists:
|
|
734
|
+
new_content.append("/// TODO: Add documentation\n")
|
|
735
|
+
changed = True
|
|
736
|
+
self.stats.docs_added += 1
|
|
737
|
+
|
|
738
|
+
new_content.append(content[segment_end: m.end()]) # 包含到函数体起始的部分
|
|
739
|
+
last_end = m.end()
|
|
740
|
+
|
|
741
|
+
new_content.append(content[last_end:])
|
|
742
|
+
new_s = "".join(new_content)
|
|
743
|
+
|
|
744
|
+
if changed and not self.options.dry_run:
|
|
745
|
+
_write_file(path, new_s)
|
|
746
|
+
|
|
747
|
+
# ========== 5) CodeAgent 整体优化(参考 transpiler 的 CodeAgent 使用方式) ==========
|
|
748
|
+
|
|
749
|
+
def _codeagent_optimize_crate(self, target_files: List[Path]) -> None:
|
|
750
|
+
"""
|
|
751
|
+
使用 CodeAgent 对 crate 进行一次保守的整体优化,输出补丁并进行一次 cargo check 验证。
|
|
752
|
+
仅限本批次的目标文件(target_files)范围内进行修改,以支持大项目分批优化。
|
|
753
|
+
包含:
|
|
754
|
+
- unsafe 清理与 SAFETY 注释补充(范围最小化)
|
|
755
|
+
- 重复代码最小消除(允许抽取公共辅助函数),或添加 TODO 标注
|
|
756
|
+
- 可见性最小化(尽量使用 pub(crate),保持对外接口为 pub)
|
|
757
|
+
- 文档补充(模块/函数缺失文档添加占位)
|
|
758
|
+
约束:
|
|
759
|
+
- 保持最小改动,避免大范围重构或格式化
|
|
760
|
+
- 不得删除公开 API;跨 crate 接口保持 pub
|
|
761
|
+
- 仅在 crate_dir 下进行修改(Cargo.toml、src/**/*.rs);不得改动其他目录
|
|
762
|
+
- 仅输出补丁(由 CodeAgent 控制),不输出解释
|
|
763
|
+
"""
|
|
764
|
+
crate = self.crate_dir.resolve()
|
|
765
|
+
file_list: List[str] = []
|
|
766
|
+
for p in target_files:
|
|
767
|
+
try:
|
|
768
|
+
rel = p.resolve().relative_to(crate).as_posix()
|
|
769
|
+
except Exception:
|
|
770
|
+
rel = p.as_posix()
|
|
771
|
+
file_list.append(rel)
|
|
772
|
+
|
|
773
|
+
prompt_lines: List[str] = [
|
|
774
|
+
"你是资深 Rust 代码工程师。请在当前 crate 下执行一次保守的整体优化,并以补丁形式输出修改:",
|
|
775
|
+
f"- crate 根目录:{crate}",
|
|
776
|
+
"",
|
|
777
|
+
"本次优化仅允许修改以下文件范围(严格限制):",
|
|
778
|
+
*[f"- {rel}" for rel in file_list],
|
|
779
|
+
"",
|
|
780
|
+
"优化目标(按优先级):",
|
|
781
|
+
"1) unsafe 清理:",
|
|
782
|
+
" - 移除不必要的 unsafe 包裹;若必须使用 unsafe,缩小范围并在紧邻位置添加 `/// SAFETY: ...` 文档注释说明理由。",
|
|
783
|
+
"2) 代码结构优化(重复消除/提示):",
|
|
784
|
+
" - 检测重复函数实现(签名+主体近似),如能安全抽取公共辅助函数进行复用,进行最小化重构;否则在重复处添加 `/// TODO: duplicate of ...` 注释。",
|
|
785
|
+
"3) 可见性优化:",
|
|
786
|
+
" - 优先将 `pub fn` 降为 `pub(crate) fn`;保持对外接口(跨 crate 使用的接口)为 `pub`;在 lib.rs 中的顶层导出保持现状。",
|
|
787
|
+
"4) 文档补充:",
|
|
788
|
+
" - 为缺少模块/函数文档的位置添加占位注释(//! 或 ///)。",
|
|
789
|
+
"",
|
|
790
|
+
"约束与范围:",
|
|
791
|
+
"- 仅修改上述列出的文件;除非必须(如修复引用路径),否则不要修改其他文件。",
|
|
792
|
+
"- 保持最小改动,不要进行与上述优化无关的重构或格式化。",
|
|
793
|
+
"- 修改后需保证 `cargo test` 可以通过;如需引入少量配套改动,请一并包含在补丁中以确保通过。",
|
|
794
|
+
"- 输出仅为补丁,不要输出解释或多余文本。",
|
|
795
|
+
"",
|
|
796
|
+
"自检要求:在每次输出补丁后,请使用 execute_script 工具在 crate 根目录执行 `cargo test -q` 进行验证;",
|
|
797
|
+
"若未通过,请继续输出新的补丁进行最小修复并再次自检,直至 `cargo test` 通过为止。"
|
|
798
|
+
]
|
|
799
|
+
prompt = "\n".join(prompt_lines)
|
|
800
|
+
prev_cwd = os.getcwd()
|
|
801
|
+
print(f"{self.log_prefix} [CodeAgent] 正在调用 CodeAgent 进行整体优化...")
|
|
802
|
+
try:
|
|
803
|
+
os.chdir(str(crate))
|
|
804
|
+
agent = CodeAgent(need_summary=False, non_interactive=self.options.non_interactive, plan=False, model_group=self.options.llm_group)
|
|
805
|
+
agent.run(prompt, prefix="[c2rust-optimizer][codeagent]", suffix="")
|
|
806
|
+
finally:
|
|
807
|
+
os.chdir(prev_cwd)
|
|
808
|
+
# 运行一次 cargo check 验证;若失败则进入本地最小修复循环
|
|
809
|
+
ok, diag = _cargo_check_full(self.crate_dir, self.stats, self.options.max_checks, timeout=self.options.cargo_test_timeout)
|
|
810
|
+
if not ok:
|
|
811
|
+
fixed = self._build_fix_loop(target_files)
|
|
812
|
+
if not fixed:
|
|
813
|
+
first = (diag.splitlines()[0] if isinstance(diag, str) and diag else "failed")
|
|
814
|
+
self.stats.errors.append(f"codeagent test failed: {first}")
|
|
815
|
+
try:
|
|
816
|
+
self._reset_to_snapshot()
|
|
817
|
+
finally:
|
|
818
|
+
return
|
|
819
|
+
|
|
820
|
+
def _build_fix_loop(self, scope_files: List[Path]) -> bool:
|
|
821
|
+
"""
|
|
822
|
+
循环执行 cargo check 并用 CodeAgent 进行最小修复,直到通过或达到重试上限或检查预算耗尽。
|
|
823
|
+
仅允许(优先)修改 scope_files(除非确有必要),以支持分批优化。
|
|
824
|
+
返回 True 表示修复成功构建通过;False 表示未能在限制内修复。
|
|
825
|
+
"""
|
|
826
|
+
maxr = int(self.options.build_fix_retries or 0)
|
|
827
|
+
if maxr <= 0:
|
|
828
|
+
return False
|
|
829
|
+
crate = self.crate_dir.resolve()
|
|
830
|
+
allowed: List[str] = []
|
|
831
|
+
for p in scope_files:
|
|
832
|
+
try:
|
|
833
|
+
rel = p.resolve().relative_to(crate).as_posix()
|
|
834
|
+
except Exception:
|
|
835
|
+
rel = p.as_posix()
|
|
836
|
+
allowed.append(rel)
|
|
837
|
+
|
|
838
|
+
attempt = 0
|
|
839
|
+
while True:
|
|
840
|
+
# 检查预算
|
|
841
|
+
if self.options.max_checks and self.stats.cargo_checks >= self.options.max_checks:
|
|
842
|
+
return False
|
|
843
|
+
# 执行构建
|
|
844
|
+
output = ""
|
|
845
|
+
try:
|
|
846
|
+
res = subprocess.run(
|
|
847
|
+
["cargo", "test", "-q"],
|
|
848
|
+
capture_output=True,
|
|
849
|
+
text=True,
|
|
850
|
+
check=False,
|
|
851
|
+
cwd=str(crate),
|
|
852
|
+
timeout=self.options.cargo_test_timeout if self.options.cargo_test_timeout > 0 else None,
|
|
853
|
+
)
|
|
854
|
+
self.stats.cargo_checks += 1
|
|
855
|
+
if res.returncode == 0:
|
|
856
|
+
print(f"{self.log_prefix} 构建修复成功。")
|
|
857
|
+
return True
|
|
858
|
+
output = ((res.stdout or "") + ("\n" + (res.stderr or ""))).strip()
|
|
859
|
+
except subprocess.TimeoutExpired as e:
|
|
860
|
+
self.stats.cargo_checks += 1
|
|
861
|
+
out_s = e.stdout.decode("utf-8", errors="ignore") if e.stdout else ""
|
|
862
|
+
err_s = e.stderr.decode("utf-8", errors="ignore") if e.stderr else ""
|
|
863
|
+
output = f"cargo test timed out after {self.options.cargo_test_timeout} seconds"
|
|
864
|
+
full_output = (out_s + ("\n" + err_s if err_s else "")).strip()
|
|
865
|
+
if full_output:
|
|
866
|
+
output += f"\nOutput:\n{full_output}"
|
|
867
|
+
except Exception as e:
|
|
868
|
+
self.stats.cargo_checks += 1
|
|
869
|
+
output = f"cargo test exception: {e}"
|
|
870
|
+
|
|
871
|
+
# 达到重试上限则失败
|
|
872
|
+
attempt += 1
|
|
873
|
+
if attempt > maxr:
|
|
874
|
+
print(f"{self.log_prefix} 构建修复重试次数已用尽。")
|
|
875
|
+
return False
|
|
876
|
+
|
|
877
|
+
print(f"{self.log_prefix} 构建失败。正在尝试使用 CodeAgent 进行修复 (第 {attempt}/{maxr} 次尝试)...")
|
|
878
|
+
# 生成最小修复提示
|
|
879
|
+
prompt_lines = [
|
|
880
|
+
"请根据以下测试/构建错误对 crate 进行最小必要的修复以通过 `cargo test`:",
|
|
881
|
+
f"- crate 根目录:{crate}",
|
|
882
|
+
"",
|
|
883
|
+
"本次修复优先且仅允许修改以下文件(除非确有必要,否则不要修改范围外文件):",
|
|
884
|
+
*[f"- {rel}" for rel in allowed],
|
|
885
|
+
"",
|
|
886
|
+
"约束与范围:",
|
|
887
|
+
"- 保持最小改动,不要进行与错误无关的重构或格式化;",
|
|
888
|
+
"- 仅输出补丁,不要输出解释或多余文本。",
|
|
889
|
+
"",
|
|
890
|
+
"自检要求:在每次输出补丁后,请使用 execute_script 工具在 crate 根目录执行 `cargo test -q` 进行验证;",
|
|
891
|
+
"若未通过,请继续输出新的补丁进行最小修复并再次自检,直至 `cargo test` 通过为止。",
|
|
892
|
+
"",
|
|
893
|
+
"构建错误如下:",
|
|
894
|
+
"<BUILD_ERROR>",
|
|
895
|
+
output,
|
|
896
|
+
"</BUILD_ERROR>",
|
|
897
|
+
]
|
|
898
|
+
prompt = "\n".join(prompt_lines)
|
|
899
|
+
prev_cwd = os.getcwd()
|
|
900
|
+
try:
|
|
901
|
+
os.chdir(str(crate))
|
|
902
|
+
agent = CodeAgent(need_summary=False, non_interactive=self.options.non_interactive, plan=False, model_group=self.options.llm_group)
|
|
903
|
+
agent.run(prompt, prefix=f"[c2rust-optimizer][build-fix iter={attempt}]", suffix="")
|
|
904
|
+
finally:
|
|
905
|
+
os.chdir(prev_cwd)
|
|
906
|
+
|
|
907
|
+
return False
|
|
908
|
+
|
|
909
|
+
def optimize_project(
|
|
910
|
+
crate_dir: Optional[Path] = None,
|
|
911
|
+
enable_unsafe_cleanup: bool = True,
|
|
912
|
+
enable_structure_opt: bool = True,
|
|
913
|
+
enable_visibility_opt: bool = True,
|
|
914
|
+
enable_doc_opt: bool = True,
|
|
915
|
+
max_checks: int = 0,
|
|
916
|
+
dry_run: bool = False,
|
|
917
|
+
include_patterns: Optional[str] = None,
|
|
918
|
+
exclude_patterns: Optional[str] = None,
|
|
919
|
+
max_files: int = 0,
|
|
920
|
+
resume: bool = True,
|
|
921
|
+
reset_progress: bool = False,
|
|
922
|
+
build_fix_retries: int = 3,
|
|
923
|
+
git_guard: bool = True,
|
|
924
|
+
llm_group: Optional[str] = None,
|
|
925
|
+
cargo_test_timeout: int = 300,
|
|
926
|
+
non_interactive: bool = True,
|
|
927
|
+
) -> Dict:
|
|
928
|
+
"""
|
|
929
|
+
对指定 crate 执行优化。返回结果摘要 dict。
|
|
930
|
+
- crate_dir: crate 根目录(包含 Cargo.toml);为 None 时自动检测
|
|
931
|
+
- enable_*: 各优化步骤开关
|
|
932
|
+
- max_checks: 限制 cargo check 调用次数(0 不限)
|
|
933
|
+
- dry_run: 不写回,仅统计潜在修改
|
|
934
|
+
- include_patterns/exclude_patterns: 逗号分隔的 glob;相对 crate 根(如 src/**/*.rs)
|
|
935
|
+
- max_files: 本次最多处理文件数(0 不限)
|
|
936
|
+
- resume: 启用断点续跑(跳过已处理文件)
|
|
937
|
+
- reset_progress: 清空进度(processed 列表)
|
|
938
|
+
"""
|
|
939
|
+
crate = detect_crate_dir(crate_dir)
|
|
940
|
+
opts = OptimizeOptions(
|
|
941
|
+
enable_unsafe_cleanup=enable_unsafe_cleanup,
|
|
942
|
+
enable_structure_opt=enable_structure_opt,
|
|
943
|
+
enable_visibility_opt=enable_visibility_opt,
|
|
944
|
+
enable_doc_opt=enable_doc_opt,
|
|
945
|
+
max_checks=max_checks,
|
|
946
|
+
dry_run=dry_run,
|
|
947
|
+
include_patterns=include_patterns,
|
|
948
|
+
exclude_patterns=exclude_patterns,
|
|
949
|
+
max_files=max_files,
|
|
950
|
+
resume=resume,
|
|
951
|
+
reset_progress=reset_progress,
|
|
952
|
+
build_fix_retries=build_fix_retries,
|
|
953
|
+
git_guard=git_guard,
|
|
954
|
+
llm_group=llm_group,
|
|
955
|
+
cargo_test_timeout=cargo_test_timeout,
|
|
956
|
+
non_interactive=non_interactive,
|
|
957
|
+
)
|
|
958
|
+
optimizer = Optimizer(crate, opts)
|
|
959
|
+
stats = optimizer.run()
|
|
960
|
+
return asdict(stats)
|