jarvis-ai-assistant 0.1.222__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +1143 -245
  3. jarvis/jarvis_agent/agent_manager.py +97 -0
  4. jarvis/jarvis_agent/builtin_input_handler.py +12 -10
  5. jarvis/jarvis_agent/config_editor.py +57 -0
  6. jarvis/jarvis_agent/edit_file_handler.py +392 -99
  7. jarvis/jarvis_agent/event_bus.py +48 -0
  8. jarvis/jarvis_agent/events.py +157 -0
  9. jarvis/jarvis_agent/file_context_handler.py +79 -0
  10. jarvis/jarvis_agent/file_methodology_manager.py +117 -0
  11. jarvis/jarvis_agent/jarvis.py +1117 -147
  12. jarvis/jarvis_agent/main.py +78 -34
  13. jarvis/jarvis_agent/memory_manager.py +195 -0
  14. jarvis/jarvis_agent/methodology_share_manager.py +174 -0
  15. jarvis/jarvis_agent/prompt_manager.py +82 -0
  16. jarvis/jarvis_agent/prompts.py +46 -9
  17. jarvis/jarvis_agent/protocols.py +4 -1
  18. jarvis/jarvis_agent/rewrite_file_handler.py +141 -0
  19. jarvis/jarvis_agent/run_loop.py +146 -0
  20. jarvis/jarvis_agent/session_manager.py +9 -9
  21. jarvis/jarvis_agent/share_manager.py +228 -0
  22. jarvis/jarvis_agent/shell_input_handler.py +23 -3
  23. jarvis/jarvis_agent/stdio_redirect.py +295 -0
  24. jarvis/jarvis_agent/task_analyzer.py +212 -0
  25. jarvis/jarvis_agent/task_manager.py +154 -0
  26. jarvis/jarvis_agent/task_planner.py +496 -0
  27. jarvis/jarvis_agent/tool_executor.py +8 -4
  28. jarvis/jarvis_agent/tool_share_manager.py +139 -0
  29. jarvis/jarvis_agent/user_interaction.py +42 -0
  30. jarvis/jarvis_agent/utils.py +54 -0
  31. jarvis/jarvis_agent/web_bridge.py +189 -0
  32. jarvis/jarvis_agent/web_output_sink.py +53 -0
  33. jarvis/jarvis_agent/web_server.py +751 -0
  34. jarvis/jarvis_c2rust/__init__.py +26 -0
  35. jarvis/jarvis_c2rust/cli.py +613 -0
  36. jarvis/jarvis_c2rust/collector.py +258 -0
  37. jarvis/jarvis_c2rust/library_replacer.py +1122 -0
  38. jarvis/jarvis_c2rust/llm_module_agent.py +1300 -0
  39. jarvis/jarvis_c2rust/optimizer.py +960 -0
  40. jarvis/jarvis_c2rust/scanner.py +1681 -0
  41. jarvis/jarvis_c2rust/transpiler.py +2325 -0
  42. jarvis/jarvis_code_agent/build_validation_config.py +133 -0
  43. jarvis/jarvis_code_agent/code_agent.py +1605 -178
  44. jarvis/jarvis_code_agent/code_analyzer/__init__.py +62 -0
  45. jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
  46. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
  47. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +102 -0
  48. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +59 -0
  49. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
  50. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +69 -0
  51. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +38 -0
  52. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +44 -0
  53. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +38 -0
  54. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +50 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +93 -0
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +129 -0
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +54 -0
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +154 -0
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
  60. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +363 -0
  61. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
  62. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
  63. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
  64. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
  65. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
  66. jarvis/jarvis_code_agent/code_analyzer/language_support.py +89 -0
  67. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +31 -0
  68. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +231 -0
  69. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +183 -0
  70. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +219 -0
  71. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +209 -0
  72. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +451 -0
  73. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +77 -0
  74. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +48 -0
  75. jarvis/jarvis_code_agent/lint.py +275 -13
  76. jarvis/jarvis_code_agent/utils.py +142 -0
  77. jarvis/jarvis_code_analysis/checklists/loader.py +20 -6
  78. jarvis/jarvis_code_analysis/code_review.py +583 -548
  79. jarvis/jarvis_data/config_schema.json +339 -28
  80. jarvis/jarvis_git_squash/main.py +22 -13
  81. jarvis/jarvis_git_utils/git_commiter.py +171 -55
  82. jarvis/jarvis_mcp/sse_mcp_client.py +22 -15
  83. jarvis/jarvis_mcp/stdio_mcp_client.py +4 -4
  84. jarvis/jarvis_mcp/streamable_mcp_client.py +36 -16
  85. jarvis/jarvis_memory_organizer/memory_organizer.py +753 -0
  86. jarvis/jarvis_methodology/main.py +48 -63
  87. jarvis/jarvis_multi_agent/__init__.py +302 -43
  88. jarvis/jarvis_multi_agent/main.py +70 -24
  89. jarvis/jarvis_platform/ai8.py +40 -23
  90. jarvis/jarvis_platform/base.py +210 -49
  91. jarvis/jarvis_platform/human.py +11 -1
  92. jarvis/jarvis_platform/kimi.py +82 -76
  93. jarvis/jarvis_platform/openai.py +73 -1
  94. jarvis/jarvis_platform/registry.py +8 -15
  95. jarvis/jarvis_platform/tongyi.py +115 -101
  96. jarvis/jarvis_platform/yuanbao.py +89 -63
  97. jarvis/jarvis_platform_manager/main.py +194 -132
  98. jarvis/jarvis_platform_manager/service.py +122 -86
  99. jarvis/jarvis_rag/cli.py +156 -53
  100. jarvis/jarvis_rag/embedding_manager.py +155 -12
  101. jarvis/jarvis_rag/llm_interface.py +10 -13
  102. jarvis/jarvis_rag/query_rewriter.py +63 -12
  103. jarvis/jarvis_rag/rag_pipeline.py +222 -40
  104. jarvis/jarvis_rag/reranker.py +26 -3
  105. jarvis/jarvis_rag/retriever.py +270 -14
  106. jarvis/jarvis_sec/__init__.py +3605 -0
  107. jarvis/jarvis_sec/checkers/__init__.py +32 -0
  108. jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
  109. jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
  110. jarvis/jarvis_sec/cli.py +116 -0
  111. jarvis/jarvis_sec/report.py +257 -0
  112. jarvis/jarvis_sec/status.py +264 -0
  113. jarvis/jarvis_sec/types.py +20 -0
  114. jarvis/jarvis_sec/workflow.py +219 -0
  115. jarvis/jarvis_smart_shell/main.py +405 -137
  116. jarvis/jarvis_stats/__init__.py +13 -0
  117. jarvis/jarvis_stats/cli.py +387 -0
  118. jarvis/jarvis_stats/stats.py +711 -0
  119. jarvis/jarvis_stats/storage.py +612 -0
  120. jarvis/jarvis_stats/visualizer.py +282 -0
  121. jarvis/jarvis_tools/ask_user.py +1 -0
  122. jarvis/jarvis_tools/base.py +18 -2
  123. jarvis/jarvis_tools/clear_memory.py +239 -0
  124. jarvis/jarvis_tools/cli/main.py +220 -144
  125. jarvis/jarvis_tools/execute_script.py +52 -12
  126. jarvis/jarvis_tools/file_analyzer.py +17 -12
  127. jarvis/jarvis_tools/generate_new_tool.py +46 -24
  128. jarvis/jarvis_tools/read_code.py +277 -18
  129. jarvis/jarvis_tools/read_symbols.py +141 -0
  130. jarvis/jarvis_tools/read_webpage.py +86 -13
  131. jarvis/jarvis_tools/registry.py +294 -90
  132. jarvis/jarvis_tools/retrieve_memory.py +227 -0
  133. jarvis/jarvis_tools/save_memory.py +194 -0
  134. jarvis/jarvis_tools/search_web.py +62 -28
  135. jarvis/jarvis_tools/sub_agent.py +205 -0
  136. jarvis/jarvis_tools/sub_code_agent.py +217 -0
  137. jarvis/jarvis_tools/virtual_tty.py +330 -62
  138. jarvis/jarvis_utils/builtin_replace_map.py +4 -5
  139. jarvis/jarvis_utils/clipboard.py +90 -0
  140. jarvis/jarvis_utils/config.py +607 -50
  141. jarvis/jarvis_utils/embedding.py +3 -0
  142. jarvis/jarvis_utils/fzf.py +57 -0
  143. jarvis/jarvis_utils/git_utils.py +251 -29
  144. jarvis/jarvis_utils/globals.py +174 -17
  145. jarvis/jarvis_utils/http.py +58 -79
  146. jarvis/jarvis_utils/input.py +899 -153
  147. jarvis/jarvis_utils/methodology.py +210 -83
  148. jarvis/jarvis_utils/output.py +220 -137
  149. jarvis/jarvis_utils/utils.py +1906 -135
  150. jarvis_ai_assistant-0.7.0.dist-info/METADATA +465 -0
  151. jarvis_ai_assistant-0.7.0.dist-info/RECORD +192 -0
  152. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/entry_points.txt +8 -2
  153. jarvis/jarvis_git_details/main.py +0 -265
  154. jarvis/jarvis_platform/oyi.py +0 -357
  155. jarvis/jarvis_tools/edit_file.py +0 -255
  156. jarvis/jarvis_tools/rewrite_file.py +0 -195
  157. jarvis_ai_assistant-0.1.222.dist-info/METADATA +0 -767
  158. jarvis_ai_assistant-0.1.222.dist-info/RECORD +0 -110
  159. /jarvis/{jarvis_git_details → jarvis_memory_organizer}/__init__.py +0 -0
  160. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/WHEEL +0 -0
  161. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/licenses/LICENSE +0 -0
  162. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1681 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ 使用 libclang 的 C/C++ 函数扫描器和调用图提取器。
5
+
6
+ 设计决策:
7
+ - 解析器: clang.cindex (libclang),用于生成包含精确类型和位置的健壮 C/C++ AST。
8
+
9
+ JSONL 文件
10
+ - symbols_raw.jsonl
11
+ 原始扫描产物:每个符号(函数或类型)一个 JSON 对象,统一模式:
12
+ 字段:
13
+ - id (int)
14
+ - category (str): "function" | "type"
15
+ - name (str)
16
+ - qualified_name (str)
17
+ - signature (str) # 函数签名;类型则可选或为空
18
+ - return_type (str) # 函数返回类型;类型则可选或为空
19
+ - params (list[{name, type}]) # 函数参数;类型则可选或为空
20
+ - kind (str) # 类型种类: struct/class/union/enum/typedef/type_alias
21
+ - underlying_type (str) # 针对 typedef/type_alias;其他为空
22
+ - ref (list[str]) # 统一的引用:被调用的函数或引用的类型
23
+ - file (str)
24
+ - start_line (int), start_col (int), end_line (int), end_col (int)
25
+ - language (str)
26
+ - created_at (str, ISO-like), updated_at (str, ISO-like)
27
+ - symbols.jsonl
28
+ 经过裁剪/评估后的符号表(由 prune 子命令或人工整理生成),用于后续转译与规划
29
+ - meta.json
30
+ {
31
+ "functions": N,
32
+ "types": M,
33
+ "symbols": N+M,
34
+ "generated_at": "...",
35
+ "schema_version": 1,
36
+ "source_root": "<abs path>"
37
+ }
38
+ 用法:
39
+ python -m jarvis.jarvis_c2rust.scanner --root /path/to/scan
40
+
41
+ 注意:
42
+ - 如果存在 compile_commands.json 文件,将会用它来提高解析准确性。
43
+ - 如果找不到 libclang,将引发一个信息丰富的错误,并提示设置环境变量:
44
+ - LIBCLANG_PATH (目录) 或 CLANG_LIBRARY_FILE (完整路径)
45
+ - LLVM_HOME (包含 lib/libclang.so 的前缀)
46
+ """
47
+
48
+ from __future__ import annotations
49
+
50
+
51
+ import json
52
+ import os
53
+
54
+ import sys
55
+ import time
56
+ from dataclasses import dataclass
57
+ from pathlib import Path
58
+ from typing import Any, Dict, Iterable, List, Optional, Set
59
+ import typer
60
+ import shutil
61
+
62
+ # ---------------------------
63
+ # libclang loader
64
+ # ---------------------------
65
+ def _try_import_libclang() -> Any:
66
+ """
67
+ Load clang.cindex and support libclang 16-21 (inclusive).
68
+ Resolution order:
69
+ 1) Respect CLANG_LIBRARY_FILE (must be one of 16-21)
70
+ 2) Respect LIBCLANG_PATH (pick libclang from that dir and verify major 16-21)
71
+ 3) Respect LLVM_HOME/lib/libclang.*
72
+ 4) Probe common locations for versions 16-21
73
+ If Python bindings or libclang are outside 16-21, raise with actionable hints.
74
+ """
75
+ SUPPORTED_MAJORS = {16, 17, 18, 19, 20, 21}
76
+
77
+ try:
78
+ from clang import cindex
79
+ except Exception as e:
80
+ raise RuntimeError(
81
+ "导入 clang.cindex 失败。本工具支持 clang 16-21。\n"
82
+ "修复方法:\n"
83
+ "- pip install 'clang>=16,<22'\n"
84
+ "- 确保已安装 libclang (16-21) (例如,apt install llvm-21 clang-21 libclang-21-dev)\n"
85
+ "- 设置环境变量 CLANG_LIBRARY_FILE 指向匹配的共享库,或 LIBCLANG_PATH 指向其目录。"
86
+ ) from e
87
+
88
+ # Verify Python clang bindings major version (if available)
89
+ py_major: Optional[int] = None
90
+ try:
91
+ import clang as _clang
92
+ import re as _re
93
+ v = getattr(_clang, "__version__", None)
94
+ if v:
95
+ m = _re.match(r"(\\d+)", str(v))
96
+ if m:
97
+ py_major = int(m.group(1))
98
+ except Exception:
99
+ py_major = None
100
+
101
+ # If version is known and not in supported set, fail; if unknown (None), proceed and rely on libclang probing
102
+ if py_major is not None and py_major not in SUPPORTED_MAJORS:
103
+ raise RuntimeError(
104
+ f"Python 'clang' 绑定的主版本必须是 {sorted(SUPPORTED_MAJORS)} 中的一个。\n"
105
+ "修复方法:\n"
106
+ "- pip install --upgrade 'clang>=16,<22'"
107
+ )
108
+
109
+ # Helper to probe libclang major version
110
+ def _probe_major_from_lib(path: str) -> Optional[int]:
111
+ try:
112
+ import ctypes
113
+ import re as _re
114
+ class CXString(ctypes.Structure):
115
+ _fields_ = [("data", ctypes.c_void_p), ("private_flags", ctypes.c_uint)]
116
+ lib = ctypes.CDLL(path)
117
+ # Ensure correct ctypes signatures to avoid mis-parsing strings
118
+ lib.clang_getClangVersion.restype = CXString
119
+ lib.clang_getCString.argtypes = [CXString]
120
+ lib.clang_getCString.restype = ctypes.c_char_p
121
+ lib.clang_disposeString.argtypes = [CXString]
122
+ s = lib.clang_getClangVersion()
123
+ cstr = lib.clang_getCString(s) # returns const char*
124
+ ver = ""
125
+ try:
126
+ if cstr is not None:
127
+ ver = cstr.decode("utf-8", "ignore")
128
+ else:
129
+ ver = ""
130
+ except Exception:
131
+ # Fallback if restype not honored by platform
132
+ try:
133
+ ptr = ctypes.cast(cstr, ctypes.c_char_p)
134
+ raw = getattr(ptr, "value", None)
135
+ ver = raw.decode("utf-8", "ignore") if raw is not None else ""
136
+ except Exception:
137
+ ver = ""
138
+ lib.clang_disposeString(s)
139
+ if ver:
140
+ m = _re.search(r"clang version (\d+)", ver)
141
+ if m:
142
+ return int(m.group(1))
143
+ except Exception:
144
+ return None
145
+ return None
146
+
147
+ def _ensure_supported_and_set(lib_path: str) -> bool:
148
+ major = _probe_major_from_lib(lib_path)
149
+ if major in SUPPORTED_MAJORS:
150
+ try:
151
+ cindex.Config.set_library_file(lib_path)
152
+ return True
153
+ except Exception:
154
+ return False
155
+ return False
156
+
157
+ # 1) CLANG_LIBRARY_FILE
158
+ lib_file = os.environ.get("CLANG_LIBRARY_FILE")
159
+ if lib_file and Path(lib_file).exists():
160
+ if _ensure_supported_and_set(lib_file):
161
+ return cindex
162
+ else:
163
+ raise RuntimeError(
164
+ f"环境变量 CLANG_LIBRARY_FILE 指向 '{lib_file}', 但它不是 libclang 16-21 版本。\n"
165
+ "请将其设置为受支持的 libclang (例如 /usr/lib/llvm-21/lib/libclang.so 或匹配的版本)。"
166
+ )
167
+
168
+ # 2) LIBCLANG_PATH
169
+ lib_dir = os.environ.get("LIBCLANG_PATH")
170
+ if lib_dir and Path(lib_dir).exists():
171
+ base = Path(lib_dir)
172
+ candidates: List[Path] = []
173
+
174
+ # Versioned shared libraries
175
+ for maj in (21, 20, 19, 18, 17, 16):
176
+ candidates.append(base / f"libclang.so.{maj}")
177
+ # Generic names
178
+ candidates.extend([
179
+ base / "libclang.so", # Linux
180
+ base / "libclang.dylib", # macOS
181
+ base / "libclang.dll", # Windows
182
+ ])
183
+ for cand in candidates:
184
+ if cand.exists() and _ensure_supported_and_set(str(cand)):
185
+ return cindex
186
+ # If a directory is given but no valid supported version found, error out explicitly
187
+ raise RuntimeError(
188
+ f"环境变量 LIBCLANG_PATH={lib_dir} 不包含 libclang 16-21。\n"
189
+ "期望找到 libclang.so.[16-21] (Linux) 或来自 llvm@16..@21 的 libclang.dylib (macOS)。"
190
+ )
191
+
192
+ # 3) LLVM_HOME
193
+ llvm_home = os.environ.get("LLVM_HOME")
194
+ if llvm_home:
195
+ p = Path(llvm_home) / "lib"
196
+ candidates_llvm: List[Path] = []
197
+ for maj in (21, 20, 19, 18, 17, 16):
198
+ candidates_llvm.append(p / f"libclang.so.{maj}")
199
+ candidates_llvm.extend([
200
+ p / "libclang.so",
201
+ p / "libclang.dylib",
202
+ p / "libclang.dll",
203
+ ])
204
+ for cand in candidates_llvm:
205
+ if cand.exists() and _ensure_supported_and_set(str(cand)):
206
+ return cindex
207
+
208
+ # 4) Common locations for versions 16-21
209
+ import platform as _platform
210
+ sys_name = _platform.system()
211
+ path_candidates: List[Path] = []
212
+ if sys_name == "Linux":
213
+ for maj in (21, 20, 19, 18, 17, 16):
214
+ path_candidates.extend([
215
+ Path(f"/usr/lib/llvm-{maj}/lib/libclang.so.{maj}"),
216
+ Path(f"/usr/lib/llvm-{maj}/lib/libclang.so"),
217
+ ])
218
+ # Generic fallbacks
219
+ path_candidates.extend([
220
+ Path("/usr/local/lib/libclang.so.21"),
221
+ Path("/usr/local/lib/libclang.so.20"),
222
+ Path("/usr/local/lib/libclang.so.19"),
223
+ Path("/usr/local/lib/libclang.so.18"),
224
+ Path("/usr/local/lib/libclang.so.17"),
225
+ Path("/usr/local/lib/libclang.so.16"),
226
+ Path("/usr/local/lib/libclang.so"),
227
+ Path("/usr/lib/libclang.so.21"),
228
+ Path("/usr/lib/libclang.so.20"),
229
+ Path("/usr/lib/libclang.so.19"),
230
+ Path("/usr/lib/libclang.so.18"),
231
+ Path("/usr/lib/libclang.so.17"),
232
+ Path("/usr/lib/libclang.so.16"),
233
+ Path("/usr/lib/libclang.so"),
234
+ ])
235
+ elif sys_name == "Darwin":
236
+ # Homebrew llvm@N formulas
237
+ for maj in (21, 20, 19, 18, 17, 16):
238
+ path_candidates.append(Path(f"/opt/homebrew/opt/llvm@{maj}/lib/libclang.dylib"))
239
+ path_candidates.append(Path(f"/usr/local/opt/llvm@{maj}/lib/libclang.dylib"))
240
+ # Generic llvm formula path (may be symlinked to a specific version)
241
+ path_candidates.extend([
242
+ Path("/opt/homebrew/opt/llvm/lib/libclang.dylib"),
243
+ Path("/usr/local/opt/llvm/lib/libclang.dylib"),
244
+ ])
245
+ else:
246
+ # Best-effort on other systems (Windows)
247
+ path_candidates = [
248
+ Path("C:/Program Files/LLVM/bin/libclang.dll"),
249
+ ]
250
+
251
+ # Include additional globbed candidates for distributions that install versioned sonames like libclang.so.21.1.4
252
+ try:
253
+ extra_glob_dirs = [
254
+ Path("/usr/lib"),
255
+ Path("/usr/local/lib"),
256
+ Path("/lib"),
257
+ Path("/usr/lib64"),
258
+ Path("/lib64"),
259
+ Path("/usr/lib/x86_64-linux-gnu"),
260
+ ]
261
+ extra_globs: List[Path] = []
262
+ for d in extra_glob_dirs:
263
+ try:
264
+ extra_globs.extend(d.glob("libclang.so.*"))
265
+ except Exception:
266
+ pass
267
+ # Deduplicate while preserving order (Path is hashable)
268
+ seen = set()
269
+ merged_candidates: List[Path] = []
270
+ for p in list(path_candidates) + extra_globs:
271
+ if p not in seen:
272
+ merged_candidates.append(p)
273
+ seen.add(p)
274
+ except Exception:
275
+ merged_candidates = list(path_candidates)
276
+
277
+ for cand in merged_candidates:
278
+ if cand.exists() and _ensure_supported_and_set(str(cand)):
279
+ return cindex
280
+
281
+ # Final fallback: try using system default resolution without explicitly setting the library file.
282
+ # Some distributions (e.g., Arch) place libclang in standard linker paths (/usr/lib/libclang.so),
283
+ # which clang.cindex can locate without Config.set_library_file.
284
+ try:
285
+ _ = cindex.Index.create()
286
+ return cindex
287
+ except Exception:
288
+ pass
289
+
290
+ # If we got here, we failed to locate a supported libclang 16-21
291
+ raise RuntimeError(
292
+ "未能定位到 libclang 16-21。本工具支持 clang 16-21 版本。\n"
293
+ "修复选项:\n"
294
+ "- 在 Ubuntu/Debian 上: sudo apt-get install -y llvm-21 clang-21 libclang-21-dev (或 20/19/18/17/16)。\n"
295
+ "- 在 macOS (Homebrew) 上: brew install llvm@21 (或 @20/@19/@18/@17/@16)。\n"
296
+ "- 在 Arch Linux 上: 确保 clang 提供了 /usr/lib/libclang.so (通常是这样) 或显式设置 CLANG_LIBRARY_FILE。\n"
297
+ "- 然后设置环境变量 (如果未自动检测到):\n"
298
+ " export CLANG_LIBRARY_FILE=/usr/lib/llvm-21/lib/libclang.so # Linux (请调整版本)\n"
299
+ " export CLANG_LIBRARY_FILE=/opt/homebrew/opt/llvm@21/lib/libclang.dylib # macOS (请调整版本)\n"
300
+ )
301
+ # ---------------------------
302
+ # Data structures
303
+ # ---------------------------
304
+ @dataclass
305
+ class FunctionInfo:
306
+ name: str
307
+ qualified_name: str
308
+ signature: str
309
+ return_type: str
310
+ params: List[Dict[str, str]]
311
+ calls: List[str]
312
+ file: str
313
+ start_line: int
314
+ start_col: int
315
+ end_line: int
316
+ end_col: int
317
+ language: str
318
+
319
+
320
+
321
+
322
+
323
+
324
+ # ---------------------------
325
+ # Compile commands loader
326
+ # ---------------------------
327
+ def find_compile_commands(start: Path) -> Optional[Path]:
328
+ """
329
+ Search upward from 'start' for compile_commands.json
330
+ """
331
+ cur = start.resolve()
332
+ root = cur.anchor
333
+ while True:
334
+ candidate = cur / "compile_commands.json"
335
+ if candidate.exists():
336
+ return candidate
337
+ if str(cur) == root:
338
+ break
339
+ cur = cur.parent
340
+ return None
341
+
342
+
343
+ def load_compile_commands(cc_path: Path) -> Dict[str, List[str]]:
344
+ """
345
+ Load compile_commands.json and return a mapping:
346
+ file(abs path str) -> compile args (list[str], without compiler executable)
347
+ """
348
+ try:
349
+ data = json.loads(cc_path.read_text(encoding="utf-8"))
350
+ except Exception:
351
+ return {}
352
+
353
+ mapping: Dict[str, List[str]] = {}
354
+ for entry in data:
355
+ file_path = Path(entry.get("file", "")).resolve()
356
+ if not file_path:
357
+ continue
358
+ if "arguments" in entry and isinstance(entry["arguments"], list):
359
+ # arguments usually includes the compiler as argv[0]
360
+ args = entry["arguments"][1:] if entry["arguments"] else []
361
+ else:
362
+ # fallback to split command string
363
+ cmd = entry.get("command", "")
364
+ import shlex
365
+ parts = shlex.split(cmd) if cmd else []
366
+ args = parts[1:] if parts else []
367
+
368
+ # Clean args: drop compile-only/output flags that confuse libclang
369
+ cleaned: List[str] = []
370
+ skip_next = False
371
+ for a in args:
372
+ if skip_next:
373
+ skip_next = False
374
+ continue
375
+ if a in ("-c",):
376
+ continue
377
+ if a in ("-o", "-MF"):
378
+ skip_next = True
379
+ continue
380
+ if a.startswith("-o"):
381
+ continue
382
+ cleaned.append(a)
383
+ mapping[str(file_path)] = cleaned
384
+ return mapping
385
+
386
+ # ---------------------------
387
+ # File discovery
388
+ # ---------------------------
389
+ SOURCE_EXTS: Set[str] = {
390
+ ".c", ".cc", ".cpp", ".cxx", ".C",
391
+ ".h", ".hh", ".hpp", ".hxx",
392
+ }
393
+
394
+ def iter_source_files(root: Path) -> Iterable[Path]:
395
+ for p in root.rglob("*"):
396
+ if not p.is_file():
397
+ continue
398
+ if p.suffix in SOURCE_EXTS:
399
+ yield p.resolve()
400
+
401
+
402
+ # ---------------------------
403
+ # AST utilities
404
+ # ---------------------------
405
+ def get_qualified_name(cursor) -> str:
406
+ parts = []
407
+ cur = cursor.semantic_parent
408
+ while cur is not None and cur.kind.name != "TRANSLATION_UNIT":
409
+ if cur.spelling:
410
+ parts.append(cur.spelling)
411
+ cur = cur.semantic_parent
412
+ parts.reverse()
413
+ base = "::".join(parts)
414
+ if base:
415
+ return f"{base}::{cursor.spelling}"
416
+ return cursor.spelling or ""
417
+
418
+
419
+ def collect_params(cursor) -> List[Dict[str, str]]:
420
+ params = []
421
+ for c in cursor.get_children():
422
+ # In libclang, parameters are PARM_DECL
423
+ if c.kind.name == "PARM_DECL":
424
+ t = ""
425
+ try:
426
+ t = c.type.spelling or ""
427
+ except Exception:
428
+ t = ""
429
+ params.append({"name": c.spelling or "", "type": t})
430
+ return params
431
+
432
+
433
+ def collect_calls(cursor) -> List[str]:
434
+ """
435
+ Collect called function names within a function definition.
436
+ """
437
+ calls: List[str] = []
438
+
439
+ def walk(node):
440
+ for ch in node.get_children():
441
+ kind = ch.kind.name
442
+ if kind == "CALL_EXPR":
443
+ # Get referenced function if available
444
+ name = ""
445
+ try:
446
+ if ch.referenced is not None and ch.referenced.spelling:
447
+ # Prefer qualified if possible
448
+ qn = get_qualified_name(ch.referenced)
449
+ name = qn or ch.referenced.spelling
450
+ else:
451
+ # fallback to displayname
452
+ name = ch.displayname or ""
453
+ except Exception:
454
+ name = ch.displayname or ""
455
+ if name:
456
+ calls.append(name)
457
+ # Recurse
458
+ walk(ch)
459
+
460
+ walk(cursor)
461
+ return calls
462
+
463
+
464
+ def is_function_like(cursor) -> bool:
465
+ return cursor.kind.name in {
466
+ "FUNCTION_DECL",
467
+ "CXX_METHOD",
468
+ "CONSTRUCTOR",
469
+ "DESTRUCTOR",
470
+ "FUNCTION_TEMPLATE",
471
+ }
472
+
473
+
474
+ def lang_from_cursor(cursor) -> str:
475
+ try:
476
+ return str(cursor.language.name)
477
+ except Exception:
478
+ # Guess by extension
479
+ f = cursor.location.file
480
+ if f is not None:
481
+ ext = os.path.splitext(str(f))[1].lower()
482
+ if ext in (".c",):
483
+ return "C"
484
+ return "CXX"
485
+ return "UNKNOWN"
486
+
487
+
488
+ # ---------------------------
489
+ # Scanner core
490
+ # ---------------------------
491
+ def scan_file(cindex, file_path: Path, args: List[str]) -> List[FunctionInfo]:
492
+ index = cindex.Index.create()
493
+ tu = index.parse(
494
+ str(file_path),
495
+ args=args,
496
+ options=0, # need bodies to collect calls
497
+ )
498
+ functions: List[FunctionInfo] = []
499
+
500
+ def visit(node):
501
+ # Only consider functions with definitions in this file
502
+ if is_function_like(node) and node.is_definition():
503
+ loc_file = node.location.file
504
+ if loc_file is not None and Path(loc_file.name).resolve() == file_path.resolve():
505
+ try:
506
+ name = node.spelling or ""
507
+ qualified_name = get_qualified_name(node)
508
+ signature = node.displayname or name
509
+ try:
510
+ return_type = node.result_type.spelling # not available for constructors/destructors
511
+ except Exception:
512
+ return_type = ""
513
+ params = collect_params(node)
514
+ calls = collect_calls(node)
515
+ extent = node.extent
516
+ start_line = extent.start.line
517
+ start_col = extent.start.column
518
+ end_line = extent.end.line
519
+ end_col = extent.end.column
520
+ language = lang_from_cursor(node)
521
+ fi = FunctionInfo(
522
+ name=name,
523
+ qualified_name=qualified_name,
524
+ signature=signature,
525
+ return_type=return_type,
526
+ params=params,
527
+ calls=calls,
528
+ file=str(file_path),
529
+ start_line=start_line,
530
+ start_col=start_col,
531
+ end_line=end_line,
532
+ end_col=end_col,
533
+ language=language,
534
+ )
535
+ functions.append(fi)
536
+ except Exception:
537
+ # Be robust, continue scanning
538
+ pass
539
+
540
+ for ch in node.get_children():
541
+ visit(ch)
542
+
543
+ visit(tu.cursor)
544
+ return functions
545
+
546
+
547
+ def scan_directory(scan_root: Path, db_path: Optional[Path] = None) -> Path:
548
+ """
549
+ Scan a directory for C/C++ symbols and store results into JSONL/JSON.
550
+
551
+ Returns the path to symbols_raw.jsonl.
552
+ - symbols_raw.jsonl: one JSON object per symbol (category: function/type),原始扫描产物
553
+ - symbols.jsonl: 与原始产物等价的初始基线(便于未执行 prune 时直接进入后续流程)
554
+ - meta.json: summary counts and timestamp
555
+ """
556
+ scan_root = scan_root.resolve()
557
+ out_dir = scan_root / ".jarvis" / "c2rust"
558
+ out_dir.mkdir(parents=True, exist_ok=True)
559
+
560
+ # JSONL/JSON outputs (symbols only)
561
+ symbols_raw_jsonl = out_dir / "symbols_raw.jsonl"
562
+ symbols_curated_jsonl = out_dir / "symbols.jsonl"
563
+ meta_json = out_dir / "meta.json"
564
+
565
+ # Prepare libclang
566
+ cindex = _try_import_libclang()
567
+ # Fallback safeguard: if loader returned None, try importing directly
568
+ if cindex is None:
569
+ try:
570
+ from clang import cindex as _ci
571
+ cindex = _ci
572
+ except Exception as e:
573
+ raise RuntimeError(f"Failed to load libclang bindings: {e}")
574
+
575
+ # Preflight check: verify libclang/python bindings compatibility before scanning
576
+ try:
577
+ _ = cindex.Index.create()
578
+ except Exception as e:
579
+ msg = str(e)
580
+ if "undefined symbol" in msg:
581
+ # Try to suggest a better libclang candidate that contains the missing symbol
582
+ def _has_symbol(lib_path: str, symbol: str) -> bool:
583
+ try:
584
+ import ctypes
585
+ lib = ctypes.CDLL(lib_path)
586
+ getattr(lib, symbol)
587
+ return True
588
+ except Exception:
589
+ return False
590
+
591
+ # Build candidate search dirs (Linux/macOS)
592
+ import platform as _platform
593
+ sys_name = _platform.system()
594
+ lib_candidates = []
595
+ if sys_name == "Linux":
596
+ lib_candidates = [
597
+ "/usr/lib/llvm-21/lib/libclang.so",
598
+ "/usr/lib/llvm-20/lib/libclang.so",
599
+ "/usr/lib/llvm-19/lib/libclang.so",
600
+ "/usr/lib/llvm-18/lib/libclang.so",
601
+ "/usr/lib/llvm-17/lib/libclang.so",
602
+ "/usr/lib/llvm-16/lib/libclang.so",
603
+ "/usr/lib/libclang.so",
604
+ "/usr/local/lib/libclang.so",
605
+ ]
606
+ elif sys_name == "Darwin":
607
+ # Homebrew locations
608
+ lib_candidates = [
609
+ "/opt/homebrew/opt/llvm/lib/libclang.dylib",
610
+ "/usr/local/opt/llvm/lib/libclang.dylib",
611
+ ]
612
+
613
+ good = [p for p in lib_candidates if Path(p).exists() and _has_symbol(p, "clang_getOffsetOfBase")]
614
+ hint = ""
615
+ if good:
616
+ hint = f"\n建议的包含所需符号的库:\n export CLANG_LIBRARY_FILE={good[0]}\n然后重新运行: jarvis-c2rust scan -r {scan_root}"
617
+
618
+ typer.secho(
619
+ "[c2rust-scanner] 检测到 libclang/python 绑定不匹配 (未定义符号)。"
620
+ f"\n详情: {msg}"
621
+ "\n这通常意味着您的 Python 'clang' 绑定版本高于已安装的 libclang。"
622
+ "\n修复选项:\n"
623
+ "- 安装/更新 libclang 以匹配您 Python 'clang' 的主版本 (例如 16-21)。\n"
624
+ "- 或将 Python 'clang' 版本固定为与系统 libclang 匹配 (例如 pip install 'clang>=16,<22')。\n"
625
+ "- 或设置 CLANG_LIBRARY_FILE 指向匹配的 libclang 共享库。\n"
626
+ f"{hint}",
627
+ fg=typer.colors.RED,
628
+ err=True,
629
+ )
630
+ raise typer.Exit(code=2)
631
+ else:
632
+ # Other initialization errors: surface and exit
633
+ typer.secho(f"[c2rust-scanner] libclang 初始化失败: {e}", fg=typer.colors.RED, err=True)
634
+ raise typer.Exit(code=2)
635
+
636
+ # compile_commands
637
+ cc_file = find_compile_commands(scan_root)
638
+ cc_args_map: Dict[str, List[str]] = {}
639
+ if cc_file:
640
+ cc_args_map = load_compile_commands(cc_file)
641
+
642
+ # default args: at least include root dir to help header resolution
643
+ default_args = ["-I", str(scan_root)]
644
+
645
+ files = list(iter_source_files(scan_root))
646
+ total_files = len(files)
647
+ print(f"[c2rust-scanner] 正在扫描 {scan_root} 目录下的 {total_files} 个文件")
648
+
649
+ scanned = 0
650
+ total_functions = 0
651
+ total_types = 0
652
+
653
+ # JSONL writers
654
+ now_ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
655
+ sym_id_seq = 1
656
+
657
+ def _fn_record(fn: FunctionInfo, id_val: int) -> Dict[str, Any]:
658
+ return {
659
+ "id": id_val,
660
+ "name": fn.name,
661
+ "qualified_name": fn.qualified_name,
662
+ "signature": fn.signature,
663
+ "return_type": fn.return_type,
664
+ "params": fn.params,
665
+ "ref": fn.calls, # unified field: referenced functions/types
666
+ "file": fn.file,
667
+ "start_line": fn.start_line,
668
+ "start_col": fn.start_col,
669
+ "end_line": fn.end_line,
670
+ "end_col": fn.end_col,
671
+ "language": fn.language,
672
+ "created_at": now_ts,
673
+ "updated_at": now_ts,
674
+ }
675
+
676
+ def _tp_record(tp: TypeInfo, id_val: int) -> Dict[str, Any]:
677
+ # For types, 'ref' 表示引用到的类型集合;当前最小实现:若为typedef/alias则包含 underlying_type
678
+ refs: List[str] = []
679
+ if tp.underlying_type:
680
+ try:
681
+ s = str(tp.underlying_type).strip()
682
+ if s:
683
+ refs.append(s)
684
+ except Exception:
685
+ pass
686
+ return {
687
+ "id": id_val,
688
+ "name": tp.name,
689
+ "qualified_name": tp.qualified_name,
690
+ "kind": tp.kind,
691
+ "underlying_type": tp.underlying_type,
692
+ "ref": refs,
693
+ "file": tp.file,
694
+ "start_line": tp.start_line,
695
+ "start_col": tp.start_col,
696
+ "end_line": tp.end_line,
697
+ "end_col": tp.end_col,
698
+ "language": tp.language,
699
+ "created_at": now_ts,
700
+ "updated_at": now_ts,
701
+ }
702
+
703
+ # Unified symbol records (functions and types)
704
+ def _sym_record_from_function(fn: FunctionInfo, id_val: int) -> Dict[str, Any]:
705
+ return {
706
+ "id": id_val,
707
+ "category": "function",
708
+ "name": fn.name,
709
+ "qualified_name": fn.qualified_name,
710
+ "signature": fn.signature,
711
+ "return_type": fn.return_type,
712
+ "params": fn.params,
713
+ "ref": fn.calls,
714
+ "file": fn.file,
715
+ "start_line": fn.start_line,
716
+ "start_col": fn.start_col,
717
+ "end_line": fn.end_line,
718
+ "end_col": fn.end_col,
719
+ "language": fn.language,
720
+ "created_at": now_ts,
721
+ "updated_at": now_ts,
722
+ }
723
+
724
+ def _sym_record_from_type(tp: TypeInfo, id_val: int) -> Dict[str, Any]:
725
+ refs_t: List[str] = []
726
+ if tp.underlying_type:
727
+ try:
728
+ s = str(tp.underlying_type).strip()
729
+ if s:
730
+ refs_t.append(s)
731
+ except Exception:
732
+ pass
733
+ return {
734
+ "id": id_val,
735
+ "category": "type",
736
+ "name": tp.name,
737
+ "qualified_name": tp.qualified_name,
738
+ "kind": tp.kind,
739
+ "underlying_type": tp.underlying_type,
740
+ "ref": refs_t,
741
+ "file": tp.file,
742
+ "start_line": tp.start_line,
743
+ "start_col": tp.start_col,
744
+ "end_line": tp.end_line,
745
+ "end_col": tp.end_col,
746
+ "language": tp.language,
747
+ "created_at": now_ts,
748
+ "updated_at": now_ts,
749
+ }
750
+
751
+ # Open JSONL file (symbols only)
752
+ f_sym = symbols_raw_jsonl.open("w", encoding="utf-8")
753
+ try:
754
+ for p in files:
755
+ # prefer compile_commands args if available
756
+ args = cc_args_map.get(str(p), default_args)
757
+ try:
758
+ funcs = scan_file(cindex, p, args)
759
+ except Exception as e:
760
+ # If we hit undefined symbol, it's a libclang/python bindings mismatch; abort with guidance
761
+ msg = str(e)
762
+ if "undefined symbol" in msg:
763
+ def _has_symbol(lib_path: str, symbol: str) -> bool:
764
+ try:
765
+ import ctypes
766
+ lib = ctypes.CDLL(lib_path)
767
+ getattr(lib, symbol)
768
+ return True
769
+ except Exception:
770
+ return False
771
+
772
+ import platform as _platform
773
+ sys_name = _platform.system()
774
+ lib_candidates2: List[str] = []
775
+ if sys_name == "Linux":
776
+ lib_candidates2 = [
777
+ "/usr/lib/llvm-20/lib/libclang.so",
778
+ "/usr/lib/llvm-19/lib/libclang.so",
779
+ "/usr/lib/llvm-18/lib/libclang.so",
780
+ "/usr/lib/libclang.so",
781
+ "/usr/local/lib/libclang.so",
782
+ ]
783
+ elif sys_name == "Darwin":
784
+ lib_candidates2 = [
785
+ "/opt/homebrew/opt/llvm/lib/libclang.dylib",
786
+ "/usr/local/opt/llvm/lib/libclang.dylib",
787
+ ]
788
+
789
+ good = [lp for lp in lib_candidates2 if Path(lp).exists() and _has_symbol(lp, "clang_getOffsetOfBase")]
790
+ hint = ""
791
+ if good:
792
+ hint = f"\n建议的包含所需符号的库:\n export CLANG_LIBRARY_FILE={good[0]}\n然后重新运行: jarvis-c2rust scan -r {scan_root}"
793
+
794
+ typer.secho(
795
+ "[c2rust-scanner] 解析期间检测到 libclang/python 绑定不匹配 (未定义符号)。"
796
+ f"\n详情: {msg}"
797
+ "\n这通常意味着您的 Python 'clang' 绑定版本高于已安装的 libclang。"
798
+ "\n修复选项:\n"
799
+ "- 安装/更新 libclang 以匹配您 Python 'clang' 的主版本 (例如 19/20)。\n"
800
+ "- 或将 Python 'clang' 版本固定为与系统 libclang 匹配 (例如 pip install 'clang==18.*')。\n"
801
+ "- 或设置 CLANG_LIBRARY_FILE 指向匹配的 libclang 共享库。\n"
802
+ f"{hint}",
803
+ fg=typer.colors.RED,
804
+ err=True,
805
+ )
806
+ raise typer.Exit(code=2)
807
+
808
+ # Try without args as fallback for regular parse errors
809
+ try:
810
+ funcs = scan_file(cindex, p, [])
811
+ except Exception:
812
+ print(f"[c2rust-scanner] 解析 {p} 失败: {e}", file=sys.stderr)
813
+ continue
814
+
815
+ # Write JSONL
816
+ for fn in funcs:
817
+ # write unified symbol record
818
+ srec = _sym_record_from_function(fn, sym_id_seq)
819
+ f_sym.write(json.dumps(srec, ensure_ascii=False) + "\n")
820
+ # increase sequences
821
+ sym_id_seq += 1
822
+ total_functions += len(funcs)
823
+
824
+ # Scan types in this file
825
+ try:
826
+ types = scan_types_file(cindex, p, args)
827
+ except Exception:
828
+ try:
829
+ types = scan_types_file(cindex, p, [])
830
+ except Exception:
831
+ types = []
832
+
833
+ for t in types:
834
+ # write unified symbol record
835
+ srec_t = _sym_record_from_type(t, sym_id_seq)
836
+ f_sym.write(json.dumps(srec_t, ensure_ascii=False) + "\n")
837
+ # increase sequences
838
+ sym_id_seq += 1
839
+ total_types += len(types)
840
+
841
+ scanned += 1
842
+ if scanned % 20 == 0 or scanned == total_files:
843
+ print(f"[c2rust-scanner] 进度: {scanned}/{total_files} 个文件, {total_functions} 个函数, {total_types} 个类型")
844
+ finally:
845
+ try:
846
+ f_sym.close()
847
+ except Exception:
848
+ pass
849
+
850
+ # Write meta.json
851
+ meta = {
852
+ "functions": total_functions,
853
+ "types": total_types,
854
+ "symbols": total_functions + total_types,
855
+ "generated_at": now_ts,
856
+ "schema_version": 1,
857
+ "source_root": str(scan_root),
858
+ }
859
+ try:
860
+ meta_json.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
861
+ except Exception:
862
+ pass
863
+
864
+ print(f"[c2rust-scanner] 完成。收集到的函数: {total_functions}, 类型: {total_types}, 符号: {total_functions + total_types}")
865
+ print(f"[c2rust-scanner] JSONL 已写入: {symbols_raw_jsonl} (原始符号)")
866
+ # 同步生成基线 symbols.jsonl(与 raw 等价),便于后续流程仅基于 symbols.jsonl 运行
867
+ try:
868
+ shutil.copy2(symbols_raw_jsonl, symbols_curated_jsonl)
869
+ print(f"[c2rust-scanner] JSONL 基线已写入: {symbols_curated_jsonl} (用于后续流程)")
870
+ except Exception as _e:
871
+ typer.secho(f"[c2rust-scanner] 生成 symbols.jsonl 失败: {_e}", fg=typer.colors.RED, err=True)
872
+ raise
873
+ print(f"[c2rust-scanner] 元数据已写入: {meta_json}")
874
+ return symbols_raw_jsonl
875
+
876
+ # ---------------------------
877
+ # Type scanning
878
+ # ---------------------------
879
+ @dataclass
880
+ class TypeInfo:
881
+ name: str
882
+ qualified_name: str
883
+ kind: str
884
+ underlying_type: str
885
+ file: str
886
+ start_line: int
887
+ start_col: int
888
+ end_line: int
889
+ end_col: int
890
+ language: str
891
+
892
+
893
+
894
+
895
+ TYPE_KINDS: Set[str] = {
896
+ "STRUCT_DECL",
897
+ "UNION_DECL",
898
+ "ENUM_DECL",
899
+ "CXX_RECORD_DECL", # C++ class/struct/union
900
+ "TYPEDEF_DECL",
901
+ "TYPE_ALIAS_DECL",
902
+ }
903
+
904
+
905
+ def scan_types_file(cindex, file_path: Path, args: List[str]) -> List[TypeInfo]:
906
+ index = cindex.Index.create()
907
+ tu = index.parse(
908
+ str(file_path),
909
+ args=args,
910
+ options=0,
911
+ )
912
+ types: List[TypeInfo] = []
913
+
914
+ def visit(node):
915
+ kind = node.kind.name
916
+ # Filter by file
917
+ loc_file = node.location.file
918
+ if loc_file is None or Path(loc_file.name).resolve() != file_path.resolve():
919
+ for ch in node.get_children():
920
+ visit(ch)
921
+ return
922
+
923
+ if kind in TYPE_KINDS:
924
+ # Accept full definitions for record/enum; typedef/alias are inherently definitions
925
+ need_def = kind in {"STRUCT_DECL", "UNION_DECL", "ENUM_DECL", "CXX_RECORD_DECL"}
926
+ if (not need_def) or node.is_definition():
927
+ try:
928
+ name = node.spelling or ""
929
+ qualified_name = get_qualified_name(node)
930
+ underlying = ""
931
+ if kind in {"TYPEDEF_DECL", "TYPE_ALIAS_DECL"}:
932
+ try:
933
+ underlying = node.underlying_typedef_type.spelling or ""
934
+ except Exception:
935
+ underlying = ""
936
+ extent = node.extent
937
+ start_line = extent.start.line
938
+ start_col = extent.start.column
939
+ end_line = extent.end.line
940
+ end_col = extent.end.column
941
+ language = lang_from_cursor(node)
942
+ ti = TypeInfo(
943
+ name=name,
944
+ qualified_name=qualified_name,
945
+ kind=kind.lower(),
946
+ underlying_type=underlying,
947
+ file=str(file_path),
948
+ start_line=start_line,
949
+ start_col=start_col,
950
+ end_line=end_line,
951
+ end_col=end_col,
952
+ language=language,
953
+ )
954
+ types.append(ti)
955
+ except Exception:
956
+ pass
957
+
958
+ for ch in node.get_children():
959
+ visit(ch)
960
+
961
+ visit(tu.cursor)
962
+ return types
963
+
964
+
965
+ # ---------------------------
966
+ # CLI and DOT export
967
+ # ---------------------------
968
+
969
+
970
+ def generate_dot_from_db(db_path: Path, out_path: Path) -> Path:
971
+ # Generate a global reference dependency graph (DOT) from symbols.jsonl.
972
+ def _resolve_symbols_jsonl_path(hint: Path) -> Path:
973
+ p = Path(hint)
974
+ # 允许直接传入 .jsonl 文件
975
+ if p.is_file() and p.suffix.lower() == ".jsonl":
976
+ return p
977
+ # 仅支持目录下的标准路径:<dir>/.jarvis/c2rust/symbols.jsonl
978
+ if p.is_dir():
979
+ prefer = p / ".jarvis" / "c2rust" / "symbols.jsonl"
980
+ return prefer
981
+ # 默认:项目 <cwd>/.jarvis/c2rust/symbols.jsonl
982
+ return Path(".") / ".jarvis" / "c2rust" / "symbols.jsonl"
983
+
984
+ sjsonl = _resolve_symbols_jsonl_path(db_path)
985
+ if not sjsonl.exists():
986
+ raise FileNotFoundError(f"未找到 symbols.jsonl: {sjsonl}")
987
+
988
+ # Load symbols (functions and types), unified handling (no category filtering)
989
+ by_id: Dict[int, Dict[str, Any]] = {}
990
+ name_to_id: Dict[str, int] = {}
991
+ adj_names: Dict[int, List[str]] = {}
992
+ with open(sjsonl, "r", encoding="utf-8") as f:
993
+ idx = 0
994
+ for line in f:
995
+ line = line.strip()
996
+ if not line:
997
+ continue
998
+ try:
999
+ obj = json.loads(line)
1000
+ except Exception:
1001
+ continue
1002
+ idx += 1
1003
+ fid = int(obj.get("id") or idx)
1004
+ nm = obj.get("name") or ""
1005
+ qn = obj.get("qualified_name") or ""
1006
+ sig = obj.get("signature") or ""
1007
+ refs = obj.get("ref")
1008
+ if not isinstance(refs, list):
1009
+ refs = []
1010
+ refs = [c for c in refs if isinstance(c, str) and c]
1011
+
1012
+ by_id[fid] = {"name": nm, "qname": qn, "sig": sig}
1013
+ if nm:
1014
+ name_to_id.setdefault(nm, fid)
1015
+ if qn:
1016
+ name_to_id.setdefault(qn, fid)
1017
+ adj_names[fid] = refs
1018
+
1019
+ # Convert name-based adjacency to id-based adjacency (internal edges only)
1020
+ adj_ids: Dict[int, List[int]] = {}
1021
+ all_ids: List[int] = sorted(by_id.keys())
1022
+ for src in all_ids:
1023
+ internal: List[int] = []
1024
+ for target in adj_names.get(src, []):
1025
+ tid = name_to_id.get(target)
1026
+ if tid is not None and tid != src:
1027
+ internal.append(tid)
1028
+ try:
1029
+ internal = list(dict.fromkeys(internal))
1030
+ except Exception:
1031
+ internal = sorted(list(set(internal)))
1032
+ adj_ids[src] = internal
1033
+
1034
+ def base_label(fid: int) -> str:
1035
+ meta = by_id.get(fid, {})
1036
+ base = meta.get("qname") or meta.get("name") or f"sym_{fid}"
1037
+ sig = meta.get("sig") or ""
1038
+ if sig and sig != base:
1039
+ return f"{base}\\n{sig}"
1040
+ return base
1041
+
1042
+ # Prepare output path
1043
+ if out_path is None:
1044
+ out_path = sjsonl.parent / "global_refgraph.dot"
1045
+ out_path = Path(out_path)
1046
+ out_path.parent.mkdir(parents=True, exist_ok=True)
1047
+
1048
+ # Write global DOT
1049
+ with open(out_path, "w", encoding="utf-8") as f:
1050
+ f.write("digraph refgraph {\n")
1051
+ f.write(" rankdir=LR;\n")
1052
+ f.write(" graph [fontsize=10];\n")
1053
+ f.write(" node [fontsize=10];\n")
1054
+ f.write(" edge [fontsize=9];\n")
1055
+
1056
+ # Nodes
1057
+ for fid in all_ids:
1058
+ lbl = base_label(fid)
1059
+ safe_label = lbl.replace("\\", "\\\\").replace('"', '\\"')
1060
+ f.write(f' n{fid} [label="{safe_label}", shape=box];\n')
1061
+
1062
+ # Edges
1063
+ for src in all_ids:
1064
+ for dst in adj_ids.get(src, []):
1065
+ f.write(f" n{src} -> n{dst};\n")
1066
+
1067
+ f.write("}\n")
1068
+
1069
+ return out_path
1070
+
1071
+
1072
+ def find_root_function_ids(db_path: Path) -> List[int]:
1073
+ """
1074
+ Return IDs of root symbols (no incoming references) by reading symbols.jsonl (or given .jsonl path).
1075
+ - 严格使用 ref 字段
1076
+ - 函数与类型统一处理(不区分)
1077
+ """
1078
+ def _resolve_symbols_jsonl_path(hint: Path) -> Path:
1079
+ p = Path(hint)
1080
+ if p.is_file() and p.suffix.lower() == ".jsonl":
1081
+ return p
1082
+ if p.is_dir():
1083
+ prefer = p / ".jarvis" / "c2rust" / "symbols.jsonl"
1084
+ return prefer
1085
+ # 默认:项目 .jarvis/c2rust/symbols.jsonl
1086
+ return Path(".") / ".jarvis" / "c2rust" / "symbols.jsonl"
1087
+
1088
+ fjsonl = _resolve_symbols_jsonl_path(db_path)
1089
+ if not fjsonl.exists():
1090
+ raise FileNotFoundError(f"未找到 symbols.jsonl: {fjsonl}")
1091
+
1092
+ records: List[Any] = []
1093
+ with open(fjsonl, "r", encoding="utf-8") as f:
1094
+ idx = 0
1095
+ for line in f:
1096
+ line = line.strip()
1097
+ if not line:
1098
+ continue
1099
+ try:
1100
+ obj = json.loads(line)
1101
+ except Exception:
1102
+ continue
1103
+ idx += 1
1104
+ fid = int(obj.get("id") or idx)
1105
+ name = obj.get("name") or ""
1106
+ qname = obj.get("qualified_name") or ""
1107
+ refs = obj.get("ref")
1108
+ if not isinstance(refs, list):
1109
+ refs = []
1110
+ refs = [r for r in refs if isinstance(r, str) and r]
1111
+ records.append((fid, name, qname, refs))
1112
+
1113
+ name_to_id: Dict[str, int] = {}
1114
+ all_ids: Set[int] = set()
1115
+ for fid, name, qname, _ in records:
1116
+ fid = int(fid)
1117
+ all_ids.add(fid)
1118
+ if isinstance(name, str) and name:
1119
+ name_to_id.setdefault(name, fid)
1120
+ if isinstance(qname, str) and qname:
1121
+ name_to_id.setdefault(qname, fid)
1122
+
1123
+ non_roots: Set[int] = set()
1124
+ for fid, _name, _qname, refs in records:
1125
+ for target in refs:
1126
+ tid = name_to_id.get(target)
1127
+ if tid is not None and tid != fid:
1128
+ non_roots.add(tid)
1129
+
1130
+ root_ids = sorted(list(all_ids - non_roots))
1131
+ return root_ids
1132
+
1133
+
1134
+ def compute_translation_order_jsonl(db_path: Path, out_path: Optional[Path] = None) -> Path:
1135
+ """
1136
+ Compute translation order on reference graph and write order to JSONL.
1137
+ Data source: symbols.jsonl (or provided .jsonl path), strictly using ref field and including all symbols.
1138
+ Output:
1139
+ Each line is a JSON object:
1140
+ {
1141
+ "step": int,
1142
+ "ids": [symbol_id, ...],
1143
+ "group": bool,
1144
+ "roots": [root_id], # root this step is attributed to (empty if residual)
1145
+ "created_at": "YYYY-MM-DDTHH:MM:SS"
1146
+ }
1147
+ """
1148
+ def _resolve_symbols_jsonl_path(hint: Path) -> Path:
1149
+ p = Path(hint)
1150
+ if p.is_file() and p.suffix.lower() == ".jsonl":
1151
+ return p
1152
+ if p.is_dir():
1153
+ prefer = p / ".jarvis" / "c2rust" / "symbols.jsonl"
1154
+ return prefer
1155
+ return Path(".") / ".jarvis" / "c2rust" / "symbols.jsonl"
1156
+
1157
+ fjsonl = _resolve_symbols_jsonl_path(db_path)
1158
+ if not fjsonl.exists():
1159
+ raise FileNotFoundError(f"未找到 symbols.jsonl: {fjsonl}")
1160
+
1161
+ # Load symbols and build name-based adjacency from ref
1162
+ by_id: Dict[int, Dict[str, Any]] = {}
1163
+ name_to_id: Dict[str, int] = {}
1164
+ adj_names: Dict[int, List[str]] = {}
1165
+ with open(fjsonl, "r", encoding="utf-8") as f:
1166
+ idx = 0
1167
+ for line in f:
1168
+ line = line.strip()
1169
+ if not line:
1170
+ continue
1171
+ try:
1172
+ obj = json.loads(line)
1173
+ except Exception:
1174
+ continue
1175
+ idx += 1
1176
+ fid = int(obj.get("id") or idx)
1177
+ nm = obj.get("name") or ""
1178
+ qn = obj.get("qualified_name") or ""
1179
+ refs = obj.get("ref")
1180
+ if not isinstance(refs, list):
1181
+ refs = []
1182
+ refs = [r for r in refs if isinstance(r, str) and r]
1183
+ by_id[fid] = {
1184
+ "name": nm,
1185
+ "qname": qn,
1186
+ "cat": (obj.get("category") or ""),
1187
+ "file": obj.get("file") or "",
1188
+ "start_line": obj.get("start_line"),
1189
+ "end_line": obj.get("end_line"),
1190
+ "start_col": obj.get("start_col"),
1191
+ "end_col": obj.get("end_col"),
1192
+ "language": obj.get("language") or "",
1193
+ "record": obj, # embed full symbol record for order file self-containment
1194
+ }
1195
+ if nm:
1196
+ name_to_id.setdefault(nm, fid)
1197
+ if qn:
1198
+ name_to_id.setdefault(qn, fid)
1199
+ adj_names[fid] = refs
1200
+
1201
+ # Convert to id-based adjacency (internal edges only)
1202
+ adj_ids: Dict[int, List[int]] = {}
1203
+ all_ids: List[int] = sorted(by_id.keys())
1204
+ for src in all_ids:
1205
+ internal: List[int] = []
1206
+ for target in adj_names.get(src, []):
1207
+ tid = name_to_id.get(target)
1208
+ if tid is not None and tid != src:
1209
+ internal.append(tid)
1210
+ try:
1211
+ internal = list(dict.fromkeys(internal))
1212
+ except Exception:
1213
+ internal = sorted(list(set(internal)))
1214
+ adj_ids[src] = internal
1215
+
1216
+ # Roots by incoming degree (no incoming)
1217
+ try:
1218
+ roots = find_root_function_ids(fjsonl)
1219
+ except Exception:
1220
+ roots = []
1221
+
1222
+ # Tarjan SCC
1223
+ index_counter = 0
1224
+ stack: List[int] = []
1225
+ onstack: Set[int] = set()
1226
+ indices: Dict[int, int] = {}
1227
+ lowlinks: Dict[int, int] = {}
1228
+ sccs: List[List[int]] = []
1229
+
1230
+ def strongconnect(v: int) -> None:
1231
+ nonlocal index_counter, stack
1232
+ indices[v] = index_counter
1233
+ lowlinks[v] = index_counter
1234
+ index_counter += 1
1235
+ stack.append(v)
1236
+ onstack.add(v)
1237
+
1238
+ for w in adj_ids.get(v, []):
1239
+ if w not in indices:
1240
+ strongconnect(w)
1241
+ lowlinks[v] = min(lowlinks[v], lowlinks[w])
1242
+ elif w in onstack:
1243
+ lowlinks[v] = min(lowlinks[v], indices[w])
1244
+
1245
+ if lowlinks[v] == indices[v]:
1246
+ comp: List[int] = []
1247
+ while True:
1248
+ w = stack.pop()
1249
+ onstack.discard(w)
1250
+ comp.append(w)
1251
+ if w == v:
1252
+ break
1253
+ sccs.append(sorted(comp))
1254
+
1255
+ for node in all_ids:
1256
+ if node not in indices:
1257
+ strongconnect(node)
1258
+
1259
+ # Component DAG (reversed: dependency -> dependent) for leaves-first order
1260
+ id2comp: Dict[int, int] = {}
1261
+ for i, comp in enumerate(sccs):
1262
+ for nid in comp:
1263
+ id2comp[nid] = i
1264
+
1265
+ comp_count = len(sccs)
1266
+ comp_rev_adj: Dict[int, Set[int]] = {i: set() for i in range(comp_count)}
1267
+ indeg: Dict[int, int] = {i: 0 for i in range(comp_count)}
1268
+ for u in all_ids:
1269
+ cu = id2comp[u]
1270
+ for v in adj_ids.get(u, []):
1271
+ cv = id2comp[v]
1272
+ if cu != cv:
1273
+ if cu not in comp_rev_adj[cv]:
1274
+ comp_rev_adj[cv].add(cu)
1275
+ for cv, succs in comp_rev_adj.items():
1276
+ for cu in succs:
1277
+ indeg[cu] += 1
1278
+
1279
+ # Kahn on reversed DAG
1280
+ from collections import deque
1281
+ q = deque(sorted([i for i in range(comp_count) if indeg[i] == 0]))
1282
+ comp_order: List[int] = []
1283
+ while q:
1284
+ c = q.popleft()
1285
+ comp_order.append(c)
1286
+ for nxt in sorted(comp_rev_adj.get(c, set())):
1287
+ indeg[nxt] -= 1
1288
+ if indeg[nxt] == 0:
1289
+ q.append(nxt)
1290
+
1291
+ if len(comp_order) < comp_count:
1292
+ remaining = [i for i in range(comp_count) if i not in comp_order]
1293
+ comp_order.extend(sorted(remaining))
1294
+
1295
+ # Emit steps by root priority
1296
+ emitted: Set[int] = set()
1297
+ steps: List[Dict[str, Any]] = []
1298
+ now_ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
1299
+
1300
+ # precompute reachability per root
1301
+ def _reachable(start_id: int) -> Set[int]:
1302
+ visited: Set[int] = set()
1303
+ stack2: List[int] = [start_id]
1304
+ visited.add(start_id)
1305
+ while stack2:
1306
+ s = stack2.pop()
1307
+ for v in adj_ids.get(s, []):
1308
+ if v not in visited:
1309
+ visited.add(v)
1310
+ stack2.append(v)
1311
+ return visited
1312
+
1313
+ root_reach: Dict[int, Set[int]] = {rid: _reachable(rid) for rid in roots}
1314
+
1315
+ def _emit_for_root(root_id: Optional[int]) -> None:
1316
+ # Emit order per root follows leaves-first (on reversed component DAG),
1317
+ # but delay entry functions (e.g., main) to the end if they are singleton components.
1318
+ reach = root_reach.get(root_id, set()) if root_id is not None else None
1319
+
1320
+ def _is_entry(nid: int) -> bool:
1321
+ meta = by_id.get(nid, {})
1322
+ nm = str(meta.get("name") or "").lower()
1323
+ qn = str(meta.get("qname") or "").lower()
1324
+ # Configurable delayed entry symbols via env:
1325
+ # - JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS
1326
+ # - JARVIS_C2RUST_DELAY_ENTRIES
1327
+ # - C2RUST_DELAY_ENTRIES
1328
+ entries_env = os.environ.get("JARVIS_C2RUST_DELAY_ENTRY_SYMBOLS") or \
1329
+ os.environ.get("JARVIS_C2RUST_DELAY_ENTRIES") or \
1330
+ os.environ.get("C2RUST_DELAY_ENTRIES") or ""
1331
+ entries_set = set()
1332
+ if entries_env:
1333
+ try:
1334
+ import re as _re
1335
+ parts = _re.split(r"[,\s;]+", entries_env.strip())
1336
+ except Exception:
1337
+ parts = [p.strip() for p in entries_env.replace(";", ",").split(",")]
1338
+ entries_set = {p.strip().lower() for p in parts if p and p.strip()}
1339
+ # If configured, use the provided entries; otherwise fallback to default 'main'
1340
+ if entries_set:
1341
+ return (nm in entries_set) or (qn in entries_set)
1342
+ return nm == "main" or qn == "main" or qn.endswith("::main")
1343
+
1344
+ delayed_entries: List[int] = []
1345
+
1346
+ for comp_idx in comp_order:
1347
+ comp_nodes = sccs[comp_idx]
1348
+ selected: List[int] = []
1349
+ # Select nodes for this component, deferring entry (main) if safe to do so
1350
+ for nid in comp_nodes:
1351
+ if nid in emitted:
1352
+ continue
1353
+ if reach is not None and nid not in reach:
1354
+ continue
1355
+ # Skip type symbols in order emission (types don't require translation steps)
1356
+ meta_n = by_id.get(nid, {})
1357
+ if str(meta_n.get("cat") or "") == "type":
1358
+ continue
1359
+ # Only delay entry when the SCC is a singleton to avoid breaking intra-SCC semantics
1360
+ if _is_entry(nid) and len(comp_nodes) == 1:
1361
+ delayed_entries.append(nid)
1362
+ else:
1363
+ selected.append(nid)
1364
+
1365
+ if selected:
1366
+ for nid in selected:
1367
+ emitted.add(nid)
1368
+ syms = []
1369
+ for nid in sorted(selected):
1370
+ meta = by_id.get(nid, {})
1371
+ label = meta.get("qname") or meta.get("name") or f"sym_{nid}"
1372
+ syms.append(label)
1373
+ roots_labels = []
1374
+ if root_id is not None:
1375
+ meta_r = by_id.get(root_id, {})
1376
+ rlabel = meta_r.get("qname") or meta_r.get("name") or f"sym_{root_id}"
1377
+ roots_labels = [rlabel]
1378
+ steps.append({
1379
+ "step": len(steps) + 1,
1380
+ "ids": sorted(selected),
1381
+ "items": [by_id.get(nid, {}).get("record") for nid in sorted(selected) if isinstance(by_id.get(nid, {}).get("record"), dict)],
1382
+ "symbols": syms,
1383
+ "group": len(syms) > 1,
1384
+ "roots": roots_labels,
1385
+ "created_at": now_ts,
1386
+ })
1387
+
1388
+ # Emit delayed entry functions as the final step for this root
1389
+ if delayed_entries:
1390
+ for nid in delayed_entries:
1391
+ emitted.add(nid)
1392
+ syms = []
1393
+ for nid in sorted(delayed_entries):
1394
+ meta = by_id.get(nid, {})
1395
+ label = meta.get("qname") or meta.get("name") or f"sym_{nid}"
1396
+ syms.append(label)
1397
+ roots_labels = []
1398
+ if root_id is not None:
1399
+ meta_r = by_id.get(root_id, {})
1400
+ rlabel = meta_r.get("qname") or meta_r.get("name") or f"sym_{root_id}"
1401
+ roots_labels = [rlabel]
1402
+ steps.append({
1403
+ "step": len(steps) + 1,
1404
+ "ids": sorted(delayed_entries),
1405
+ "items": [by_id.get(nid, {}).get("record") for nid in sorted(delayed_entries) if isinstance(by_id.get(nid, {}).get("record"), dict)],
1406
+ "symbols": syms,
1407
+ "group": len(syms) > 1,
1408
+ "roots": roots_labels,
1409
+ "created_at": now_ts,
1410
+ })
1411
+
1412
+ for rid in sorted(roots, key=lambda r: len(root_reach.get(r, set())), reverse=True):
1413
+ _emit_for_root(rid)
1414
+ _emit_for_root(None)
1415
+
1416
+ if out_path is None:
1417
+ # 根据输入符号表选择输出文件名:
1418
+ # - symbols_raw.jsonl -> translation_order_raw.jsonl(扫描阶段原始顺序)
1419
+ # - 其他(如 symbols.jsonl/curated) -> translation_order.jsonl(默认)
1420
+ base = "translation_order.jsonl"
1421
+ try:
1422
+ name = Path(fjsonl).name.lower()
1423
+ if "symbols_raw.jsonl" in name:
1424
+ base = "translation_order_raw.jsonl"
1425
+ except Exception:
1426
+ pass
1427
+ out_path = fjsonl.parent / base
1428
+ out_path = Path(out_path)
1429
+ out_path.parent.mkdir(parents=True, exist_ok=True)
1430
+ # Purge redundant fields before writing (keep ids and records; drop symbols/items)
1431
+ try:
1432
+ # 保留 items(包含完整符号记录及替换信息),仅移除冗余的 symbols 文本标签
1433
+ steps = [dict((k, v) for k, v in st.items() if k not in ("symbols",)) for st in steps]
1434
+ except Exception:
1435
+ pass
1436
+ with open(out_path, "w", encoding="utf-8") as fo:
1437
+ for st in steps:
1438
+ fo.write(json.dumps(st, ensure_ascii=False) + "\n")
1439
+ return out_path
1440
+
1441
+
1442
+ def export_root_subgraphs_to_dir(db_path: Path, out_dir: Path) -> List[Path]:
1443
+ # Generate per-root reference subgraph DOT files from symbols.jsonl into out_dir (unified: functions and types).
1444
+ def _resolve_symbols_jsonl_path(hint: Path) -> Path:
1445
+ p = Path(hint)
1446
+ if p.is_file() and p.suffix.lower() == ".jsonl":
1447
+ return p
1448
+ if p.is_dir():
1449
+ prefer = p / ".jarvis" / "c2rust" / "symbols.jsonl"
1450
+ return prefer
1451
+ return Path(".") / ".jarvis" / "c2rust" / "symbols.jsonl"
1452
+
1453
+ sjsonl = _resolve_symbols_jsonl_path(db_path)
1454
+ if not sjsonl.exists():
1455
+ raise FileNotFoundError(f"未找到 symbols.jsonl: {sjsonl}")
1456
+
1457
+ out_dir = Path(out_dir)
1458
+ out_dir.mkdir(parents=True, exist_ok=True)
1459
+
1460
+ # Load symbols (unified)
1461
+ by_id: Dict[int, Dict[str, str]] = {}
1462
+ name_to_id: Dict[str, int] = {}
1463
+ adj: Dict[int, List[str]] = {}
1464
+
1465
+ with open(sjsonl, "r", encoding="utf-8") as f:
1466
+ idx = 0
1467
+ for line in f:
1468
+ line = line.strip()
1469
+ if not line:
1470
+ continue
1471
+ try:
1472
+ obj = json.loads(line)
1473
+ except Exception:
1474
+ continue
1475
+ idx += 1
1476
+ # unified handling: include all symbols
1477
+ fid = int(obj.get("id") or idx)
1478
+ nm = obj.get("name") or ""
1479
+ qn = obj.get("qualified_name") or ""
1480
+ sig = obj.get("signature") or ""
1481
+ refs = obj.get("ref")
1482
+ if not isinstance(refs, list):
1483
+ refs = []
1484
+ refs = [c for c in refs if isinstance(c, str) and c]
1485
+
1486
+ by_id[fid] = {"name": nm, "qname": qn, "sig": sig}
1487
+ if nm:
1488
+ name_to_id.setdefault(nm, fid)
1489
+ if qn:
1490
+ name_to_id.setdefault(qn, fid)
1491
+ adj[fid] = refs
1492
+
1493
+ def base_label(fid: int) -> str:
1494
+ meta = by_id.get(fid, {})
1495
+ base = meta.get("qname") or meta.get("name") or f"sym_{fid}"
1496
+ sig = meta.get("sig") or ""
1497
+ if sig and sig != base:
1498
+ return f"{base}\\n{sig}"
1499
+ return base
1500
+
1501
+ def sanitize_filename(s: str) -> str:
1502
+ if not s:
1503
+ return "root"
1504
+ s = s.replace("::", "__")
1505
+ return "".join(ch if ch.isalnum() or ch in ("_", "-") else "_" for ch in s)[:120]
1506
+
1507
+ generated: List[Path] = []
1508
+ root_ids = find_root_function_ids(db_path)
1509
+
1510
+ for rid in root_ids:
1511
+ # DFS over internal refs from the root
1512
+ visited: Set[int] = set()
1513
+ stack: List[int] = [rid]
1514
+ visited.add(rid)
1515
+ while stack:
1516
+ src = stack.pop()
1517
+ for callee in adj.get(src, []):
1518
+ cid = name_to_id.get(callee)
1519
+ if cid is not None and cid not in visited:
1520
+ visited.add(cid)
1521
+ stack.append(cid)
1522
+
1523
+ # Build nodes and edges
1524
+ node_labels: Dict[str, str] = {}
1525
+ external_nodes: Dict[str, str] = {}
1526
+ ext_count = 0
1527
+ edges = set()
1528
+
1529
+ id_to_node = {fid: f"n{fid}" for fid in visited}
1530
+
1531
+ # Internal nodes
1532
+ for fid in visited:
1533
+ node_labels[id_to_node[fid]] = base_label(fid)
1534
+
1535
+ # Edges (internal -> internal/external)
1536
+ for src in visited:
1537
+ src_node = id_to_node[src]
1538
+ for callee in adj.get(src, []):
1539
+ cid = name_to_id.get(callee)
1540
+ if cid is not None and cid in visited:
1541
+ edges.add((src_node, id_to_node[cid]))
1542
+ else:
1543
+ dst = external_nodes.get(callee)
1544
+ if dst is None:
1545
+ dst = f"ext{ext_count}"
1546
+ ext_count += 1
1547
+ external_nodes[callee] = dst
1548
+ node_labels[dst] = callee
1549
+ edges.add((src_node, dst))
1550
+
1551
+ # Write DOT
1552
+ root_base = by_id.get(rid, {}).get("qname") or by_id.get(rid, {}).get("name") or f"sym_{rid}"
1553
+ fname = f"subgraph_root_{rid}_{sanitize_filename(root_base)}.dot"
1554
+ out_path = out_dir / fname
1555
+ with open(out_path, "w", encoding="utf-8") as f:
1556
+ f.write("digraph refgraph_sub {\n")
1557
+ f.write(" rankdir=LR;\n")
1558
+ f.write(" graph [fontsize=10];\n")
1559
+ f.write(" node [fontsize=10];\n")
1560
+ f.write(" edge [fontsize=9];\n")
1561
+
1562
+ # Emit nodes
1563
+ for nid, lbl in node_labels.items():
1564
+ safe_label = lbl.replace("\\", "\\\\").replace('"', '\\"')
1565
+ if nid.startswith("ext"):
1566
+ f.write(f' {nid} [label="{safe_label}", shape=ellipse, style=dashed, color=gray50, fontcolor=gray30];\n')
1567
+ else:
1568
+ f.write(f' {nid} [label="{safe_label}", shape=box];\n')
1569
+
1570
+ # Emit edges
1571
+ for s, d in sorted(edges):
1572
+ f.write(f" {s} -> {d};\n")
1573
+
1574
+ f.write("}\n")
1575
+
1576
+ generated.append(out_path)
1577
+
1578
+ return generated
1579
+
1580
+
1581
+ # ---------------------------
1582
+ # Third-party replacement evaluation
1583
+ # ---------------------------
1584
+
1585
+ def run_scan(
1586
+ dot: Optional[Path] = None,
1587
+ only_dot: bool = False,
1588
+ subgraphs_dir: Optional[Path] = None,
1589
+ only_subgraphs: bool = False,
1590
+ png: bool = False,
1591
+ non_interactive: bool = True,
1592
+ ) -> None:
1593
+ # Scan for C/C++ functions and persist results to JSONL; optionally generate DOT.
1594
+ # Determine data path
1595
+ root = Path('.')
1596
+ Path('.') / ".jarvis" / "c2rust" / "symbols_raw.jsonl"
1597
+ data_path_curated = Path('.') / ".jarvis" / "c2rust" / "symbols.jsonl"
1598
+
1599
+ # Helper: render a DOT file to PNG using Graphviz 'dot'
1600
+ def _render_dot_to_png(dot_file: Path, png_out: Optional[Path] = None) -> Path:
1601
+ try:
1602
+ from shutil import which
1603
+ import subprocess
1604
+ except Exception as _e:
1605
+ raise RuntimeError(f"准备 PNG 渲染时出现环境问题: {_e}")
1606
+ exe = which("dot")
1607
+ if not exe:
1608
+ raise RuntimeError("在 PATH 中未找到 Graphviz 'dot'。请安装 graphviz 并确保 'dot' 可用。")
1609
+ dot_file = Path(dot_file)
1610
+ if png_out is None:
1611
+ png_out = dot_file.with_suffix(".png")
1612
+ else:
1613
+ png_out = Path(png_out)
1614
+ png_out.parent.mkdir(parents=True, exist_ok=True)
1615
+ try:
1616
+ subprocess.run([exe, "-Tpng", str(dot_file), "-o", str(png_out)], check=True)
1617
+ except FileNotFoundError:
1618
+ raise RuntimeError("未找到 Graphviz 'dot' 可执行文件。")
1619
+ except subprocess.CalledProcessError as e:
1620
+ raise RuntimeError(f"'dot' 渲染 {dot_file} 为 PNG 失败: {e}")
1621
+ return png_out
1622
+
1623
+ if not (only_dot or only_subgraphs):
1624
+ try:
1625
+ scan_directory(root)
1626
+ except Exception as e:
1627
+ typer.secho(f"[c2rust-scanner] 错误: {e}", fg=typer.colors.RED, err=True)
1628
+ raise typer.Exit(code=1)
1629
+ else:
1630
+ # Only-generate mode (no rescan). 验证输入,仅基于既有 symbols.jsonl 进行可选的 DOT/子图输出;此处不计算翻译顺序。
1631
+ if not data_path_curated.exists():
1632
+ typer.secho(f"[c2rust-scanner] 未找到数据: {data_path_curated}", fg=typer.colors.RED, err=True)
1633
+ raise typer.Exit(code=2)
1634
+ if only_dot and dot is None:
1635
+ typer.secho("[c2rust-scanner] --only-dot 需要 --dot 来指定输出文件", fg=typer.colors.RED, err=True)
1636
+ raise typer.Exit(code=2)
1637
+ if only_subgraphs and subgraphs_dir is None:
1638
+ typer.secho("[c2rust-scanner] --only-subgraphs 需要 --subgraphs-dir 来指定输出目录", fg=typer.colors.RED, err=True)
1639
+ raise typer.Exit(code=2)
1640
+
1641
+ # Generate DOT (global) if requested
1642
+ if dot is not None:
1643
+ try:
1644
+ # 使用正式符号表生成可视化
1645
+ generate_dot_from_db(data_path_curated, dot)
1646
+ typer.secho(f"[c2rust-scanner] DOT 文件已写入: {dot}", fg=typer.colors.GREEN)
1647
+ if png:
1648
+ png_path = _render_dot_to_png(dot)
1649
+ typer.secho(f"[c2rust-scanner] PNG 文件已写入: {png_path}", fg=typer.colors.GREEN)
1650
+ except Exception as e:
1651
+ typer.secho(f"[c2rust-scanner] 写入 DOT/PNG 失败: {e}", fg=typer.colors.RED, err=True)
1652
+ raise typer.Exit(code=1)
1653
+
1654
+ # Generate per-root subgraphs if requested
1655
+ if subgraphs_dir is not None:
1656
+ try:
1657
+ # 使用正式符号表生成根节点子图
1658
+ files = export_root_subgraphs_to_dir(data_path_curated, subgraphs_dir)
1659
+ if png:
1660
+ png_count = 0
1661
+ for dp in files:
1662
+ try:
1663
+ _render_dot_to_png(dp)
1664
+ png_count += 1
1665
+ except Exception as _e:
1666
+ # Fail fast on PNG generation error for subgraphs to make issues visible
1667
+ raise
1668
+ typer.secho(
1669
+ f"[c2rust-scanner] 根节点子图已写入: {len(files)} 个 DOT 文件和 {png_count} 个 PNG 文件 -> {subgraphs_dir}",
1670
+ fg=typer.colors.GREEN,
1671
+ )
1672
+ else:
1673
+ typer.secho(
1674
+ f"[c2rust-scanner] 根节点子图已写入: {len(files)} 个文件 -> {subgraphs_dir}",
1675
+ fg=typer.colors.GREEN,
1676
+ )
1677
+ except Exception as e:
1678
+ typer.secho(f"[c2rust-scanner] 写入子图 DOT/PNG 失败: {e}", fg=typer.colors.RED, err=True)
1679
+ raise typer.Exit(code=1)
1680
+
1681
+