jarvis-ai-assistant 0.7.0__py3-none-any.whl → 0.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +243 -139
  3. jarvis/jarvis_agent/agent_manager.py +5 -10
  4. jarvis/jarvis_agent/builtin_input_handler.py +2 -6
  5. jarvis/jarvis_agent/config_editor.py +2 -7
  6. jarvis/jarvis_agent/event_bus.py +82 -12
  7. jarvis/jarvis_agent/file_context_handler.py +265 -15
  8. jarvis/jarvis_agent/file_methodology_manager.py +3 -4
  9. jarvis/jarvis_agent/jarvis.py +113 -98
  10. jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
  11. jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
  12. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
  13. jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
  14. jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
  15. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
  16. jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
  17. jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
  18. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
  19. jarvis/jarvis_agent/language_support_info.py +486 -0
  20. jarvis/jarvis_agent/main.py +6 -12
  21. jarvis/jarvis_agent/memory_manager.py +7 -16
  22. jarvis/jarvis_agent/methodology_share_manager.py +10 -16
  23. jarvis/jarvis_agent/prompt_manager.py +1 -1
  24. jarvis/jarvis_agent/prompts.py +193 -171
  25. jarvis/jarvis_agent/protocols.py +8 -12
  26. jarvis/jarvis_agent/run_loop.py +77 -14
  27. jarvis/jarvis_agent/session_manager.py +2 -3
  28. jarvis/jarvis_agent/share_manager.py +12 -21
  29. jarvis/jarvis_agent/shell_input_handler.py +1 -2
  30. jarvis/jarvis_agent/task_analyzer.py +26 -4
  31. jarvis/jarvis_agent/task_manager.py +11 -27
  32. jarvis/jarvis_agent/tool_executor.py +2 -3
  33. jarvis/jarvis_agent/tool_share_manager.py +12 -24
  34. jarvis/jarvis_agent/web_server.py +55 -20
  35. jarvis/jarvis_c2rust/__init__.py +5 -5
  36. jarvis/jarvis_c2rust/cli.py +461 -499
  37. jarvis/jarvis_c2rust/collector.py +45 -53
  38. jarvis/jarvis_c2rust/constants.py +26 -0
  39. jarvis/jarvis_c2rust/library_replacer.py +264 -132
  40. jarvis/jarvis_c2rust/llm_module_agent.py +162 -190
  41. jarvis/jarvis_c2rust/loaders.py +207 -0
  42. jarvis/jarvis_c2rust/models.py +28 -0
  43. jarvis/jarvis_c2rust/optimizer.py +1592 -395
  44. jarvis/jarvis_c2rust/transpiler.py +1722 -1064
  45. jarvis/jarvis_c2rust/utils.py +385 -0
  46. jarvis/jarvis_code_agent/build_validation_config.py +2 -3
  47. jarvis/jarvis_code_agent/code_agent.py +394 -320
  48. jarvis/jarvis_code_agent/code_analyzer/__init__.py +3 -0
  49. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +4 -0
  50. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +17 -2
  51. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +3 -0
  52. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +36 -4
  53. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +9 -0
  54. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +9 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +12 -1
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +22 -5
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +57 -32
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +62 -6
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +8 -9
  60. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +290 -5
  61. jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -0
  62. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +21 -3
  63. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +72 -4
  64. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +35 -3
  65. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
  66. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
  67. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +52 -2
  68. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +73 -1
  69. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
  70. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +306 -152
  71. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
  72. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +193 -18
  73. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +18 -8
  74. jarvis/jarvis_code_agent/lint.py +258 -27
  75. jarvis/jarvis_code_agent/utils.py +0 -1
  76. jarvis/jarvis_code_analysis/code_review.py +19 -24
  77. jarvis/jarvis_data/config_schema.json +53 -26
  78. jarvis/jarvis_git_squash/main.py +4 -5
  79. jarvis/jarvis_git_utils/git_commiter.py +44 -49
  80. jarvis/jarvis_mcp/sse_mcp_client.py +20 -27
  81. jarvis/jarvis_mcp/stdio_mcp_client.py +11 -12
  82. jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
  83. jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
  84. jarvis/jarvis_methodology/main.py +32 -48
  85. jarvis/jarvis_multi_agent/__init__.py +79 -61
  86. jarvis/jarvis_multi_agent/main.py +3 -7
  87. jarvis/jarvis_platform/base.py +469 -199
  88. jarvis/jarvis_platform/human.py +7 -8
  89. jarvis/jarvis_platform/kimi.py +30 -36
  90. jarvis/jarvis_platform/openai.py +65 -27
  91. jarvis/jarvis_platform/registry.py +26 -10
  92. jarvis/jarvis_platform/tongyi.py +24 -25
  93. jarvis/jarvis_platform/yuanbao.py +31 -42
  94. jarvis/jarvis_platform_manager/main.py +66 -77
  95. jarvis/jarvis_platform_manager/service.py +8 -13
  96. jarvis/jarvis_rag/cli.py +49 -51
  97. jarvis/jarvis_rag/embedding_manager.py +13 -18
  98. jarvis/jarvis_rag/llm_interface.py +8 -9
  99. jarvis/jarvis_rag/query_rewriter.py +10 -21
  100. jarvis/jarvis_rag/rag_pipeline.py +24 -27
  101. jarvis/jarvis_rag/reranker.py +4 -5
  102. jarvis/jarvis_rag/retriever.py +28 -30
  103. jarvis/jarvis_sec/__init__.py +220 -3520
  104. jarvis/jarvis_sec/agents.py +143 -0
  105. jarvis/jarvis_sec/analysis.py +276 -0
  106. jarvis/jarvis_sec/cli.py +29 -6
  107. jarvis/jarvis_sec/clustering.py +1439 -0
  108. jarvis/jarvis_sec/file_manager.py +427 -0
  109. jarvis/jarvis_sec/parsers.py +73 -0
  110. jarvis/jarvis_sec/prompts.py +268 -0
  111. jarvis/jarvis_sec/report.py +83 -4
  112. jarvis/jarvis_sec/review.py +453 -0
  113. jarvis/jarvis_sec/utils.py +499 -0
  114. jarvis/jarvis_sec/verification.py +848 -0
  115. jarvis/jarvis_sec/workflow.py +7 -0
  116. jarvis/jarvis_smart_shell/main.py +38 -87
  117. jarvis/jarvis_stats/cli.py +1 -1
  118. jarvis/jarvis_stats/stats.py +7 -7
  119. jarvis/jarvis_stats/storage.py +15 -21
  120. jarvis/jarvis_tools/clear_memory.py +3 -20
  121. jarvis/jarvis_tools/cli/main.py +20 -23
  122. jarvis/jarvis_tools/edit_file.py +1066 -0
  123. jarvis/jarvis_tools/execute_script.py +42 -21
  124. jarvis/jarvis_tools/file_analyzer.py +6 -9
  125. jarvis/jarvis_tools/generate_new_tool.py +11 -20
  126. jarvis/jarvis_tools/lsp_client.py +1552 -0
  127. jarvis/jarvis_tools/methodology.py +2 -3
  128. jarvis/jarvis_tools/read_code.py +1525 -87
  129. jarvis/jarvis_tools/read_symbols.py +2 -3
  130. jarvis/jarvis_tools/read_webpage.py +7 -10
  131. jarvis/jarvis_tools/registry.py +370 -181
  132. jarvis/jarvis_tools/retrieve_memory.py +20 -19
  133. jarvis/jarvis_tools/rewrite_file.py +105 -0
  134. jarvis/jarvis_tools/save_memory.py +3 -15
  135. jarvis/jarvis_tools/search_web.py +3 -7
  136. jarvis/jarvis_tools/sub_agent.py +17 -6
  137. jarvis/jarvis_tools/sub_code_agent.py +14 -16
  138. jarvis/jarvis_tools/virtual_tty.py +54 -32
  139. jarvis/jarvis_utils/clipboard.py +7 -10
  140. jarvis/jarvis_utils/config.py +98 -63
  141. jarvis/jarvis_utils/embedding.py +5 -5
  142. jarvis/jarvis_utils/fzf.py +8 -8
  143. jarvis/jarvis_utils/git_utils.py +81 -67
  144. jarvis/jarvis_utils/input.py +24 -49
  145. jarvis/jarvis_utils/jsonnet_compat.py +465 -0
  146. jarvis/jarvis_utils/methodology.py +33 -35
  147. jarvis/jarvis_utils/utils.py +245 -202
  148. {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/METADATA +205 -70
  149. jarvis_ai_assistant-0.7.8.dist-info/RECORD +218 -0
  150. jarvis/jarvis_agent/edit_file_handler.py +0 -584
  151. jarvis/jarvis_agent/rewrite_file_handler.py +0 -141
  152. jarvis/jarvis_agent/task_planner.py +0 -496
  153. jarvis/jarvis_platform/ai8.py +0 -332
  154. jarvis/jarvis_tools/ask_user.py +0 -54
  155. jarvis_ai_assistant-0.7.0.dist-info/RECORD +0 -192
  156. {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/WHEEL +0 -0
  157. {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/entry_points.txt +0 -0
  158. {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/licenses/LICENSE +0 -0
  159. {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/top_level.txt +0 -0
@@ -42,6 +42,41 @@ from jarvis.jarvis_c2rust.scanner import (
42
42
  find_root_function_ids,
43
43
  )
44
44
 
45
+ # ============================================================================
46
+ # 常量定义
47
+ # ============================================================================
48
+
49
+ # LLM评估重试配置
50
+ MAX_LLM_RETRIES = 3 # LLM评估最大重试次数
51
+
52
+ # 源码片段读取配置
53
+ DEFAULT_SOURCE_SNIPPET_MAX_LINES = 200 # 默认源码片段最大行数
54
+ SUBTREE_SOURCE_SNIPPET_MAX_LINES = 120 # 子树提示词中源码片段最大行数
55
+
56
+ # 子树提示词构建配置
57
+ MAX_SUBTREE_NODES_META = 200 # 子树节点元数据列表最大长度
58
+ MAX_SUBTREE_EDGES = 400 # 子树边列表最大长度
59
+ MAX_DOT_EDGES = 200 # DOT图边数阈值(超过此值不生成DOT)
60
+ MAX_CHILD_SAMPLES = 2 # 子节点采样数量
61
+ MAX_SOURCE_SAMPLES = 3 # 代表性源码样本最大数量(注释说明)
62
+
63
+ # 显示配置
64
+ MAX_NOTES_DISPLAY_LENGTH = 200 # 备注显示最大长度
65
+
66
+ # 输出文件路径配置
67
+ DEFAULT_SYMBOLS_OUTPUT = "symbols_library_pruned.jsonl" # 默认符号表输出文件名
68
+ DEFAULT_MAPPING_OUTPUT = "library_replacements.jsonl" # 默认替代映射输出文件名
69
+ SYMBOLS_PRUNE_OUTPUT = "symbols_prune.jsonl" # 兼容符号表输出文件名
70
+ ORDER_PRUNE_OUTPUT = "translation_order_prune.jsonl" # 剪枝阶段转译顺序输出文件名
71
+ ORDER_ALIAS_OUTPUT = "translation_order.jsonl" # 通用转译顺序输出文件名
72
+ DEFAULT_CHECKPOINT_FILE = "library_replacer_checkpoint.json" # 默认检查点文件名
73
+
74
+ # Checkpoint配置
75
+ DEFAULT_CHECKPOINT_INTERVAL = 1 # 默认检查点保存间隔(每评估N个节点保存一次)
76
+
77
+ # JSON格式化配置
78
+ JSON_INDENT = 2 # JSON格式化缩进空格数
79
+
45
80
 
46
81
  def _resolve_symbols_jsonl_path(hint: Path) -> Path:
47
82
  """解析symbols.jsonl路径"""
@@ -60,18 +95,18 @@ def _setup_output_paths(
60
95
  ) -> tuple[Path, Path, Path, Path, Path]:
61
96
  """设置输出路径,返回(符号表路径, 映射路径, 兼容符号表路径, 顺序路径, 别名顺序路径)"""
62
97
  if out_symbols_path is None:
63
- out_symbols_path = data_dir / "symbols_library_pruned.jsonl"
98
+ out_symbols_path = data_dir / DEFAULT_SYMBOLS_OUTPUT
64
99
  else:
65
100
  out_symbols_path = Path(out_symbols_path)
66
101
  if out_mapping_path is None:
67
- out_mapping_path = data_dir / "library_replacements.jsonl"
102
+ out_mapping_path = data_dir / DEFAULT_MAPPING_OUTPUT
68
103
  else:
69
104
  out_mapping_path = Path(out_mapping_path)
70
105
 
71
106
  # 兼容输出
72
- out_symbols_prune_path = data_dir / "symbols_prune.jsonl"
73
- order_prune_path = data_dir / "translation_order_prune.jsonl"
74
- alias_order_path = data_dir / "translation_order.jsonl"
107
+ out_symbols_prune_path = data_dir / SYMBOLS_PRUNE_OUTPUT
108
+ order_prune_path = data_dir / ORDER_PRUNE_OUTPUT
109
+ alias_order_path = data_dir / ORDER_ALIAS_OUTPUT
75
110
 
76
111
  return out_symbols_path, out_mapping_path, out_symbols_prune_path, order_prune_path, alias_order_path
77
112
 
@@ -245,7 +280,7 @@ def _process_candidate_scope(
245
280
  return filtered_roots, scope_unreachable_funcs
246
281
 
247
282
 
248
- def _read_source_snippet(rec: Dict[str, Any], max_lines: int = 200) -> str:
283
+ def _read_source_snippet(rec: Dict[str, Any], max_lines: int = DEFAULT_SOURCE_SNIPPET_MAX_LINES) -> str:
249
284
  """读取源码片段"""
250
285
  path = rec.get("file") or ""
251
286
  try:
@@ -270,11 +305,13 @@ def _read_source_snippet(rec: Dict[str, Any], max_lines: int = 200) -> str:
270
305
 
271
306
 
272
307
  def _check_llm_availability() -> tuple[bool, Any, Any, Any]:
273
- """检查LLM可用性,返回(是否可用, PlatformRegistry, get_normal_platform_name, get_normal_model_name)"""
308
+ """检查LLM可用性,返回(是否可用, PlatformRegistry, get_smart_platform_name, get_smart_model_name)
309
+ 使用smart平台,适用于代码生成等复杂场景
310
+ """
274
311
  try:
275
312
  from jarvis.jarvis_platform.registry import PlatformRegistry # type: ignore
276
- from jarvis.jarvis_utils.config import get_normal_platform_name, get_normal_model_name # type: ignore
277
- return True, PlatformRegistry, get_normal_platform_name, get_normal_model_name
313
+ from jarvis.jarvis_utils.config import get_smart_platform_name, get_smart_model_name # type: ignore
314
+ return True, PlatformRegistry, get_smart_platform_name, get_smart_model_name
278
315
  except Exception:
279
316
  return False, None, None, None
280
317
 
@@ -289,6 +326,21 @@ def _normalize_disabled_libraries(disabled_libraries: Optional[List[str]]) -> tu
289
326
  return disabled_norm, disabled_display
290
327
 
291
328
 
329
+ def _load_additional_notes(data_dir: Path) -> str:
330
+ """从配置文件加载附加说明"""
331
+ try:
332
+ from jarvis.jarvis_c2rust.constants import CONFIG_JSON
333
+ config_path = data_dir / CONFIG_JSON
334
+ if config_path.exists():
335
+ with config_path.open("r", encoding="utf-8") as f:
336
+ config = json.load(f)
337
+ if isinstance(config, dict):
338
+ return str(config.get("additional_notes", "") or "").strip()
339
+ except Exception:
340
+ pass
341
+ return ""
342
+
343
+
292
344
  def _normalize_list(items: Optional[List[str]]) -> List[str]:
293
345
  """规范化列表,去重并排序"""
294
346
  if not isinstance(items, list):
@@ -376,10 +428,10 @@ def _create_llm_model(
376
428
  disabled_display: str,
377
429
  _model_available: bool,
378
430
  PlatformRegistry: Any,
379
- get_normal_platform_name: Any,
380
- get_normal_model_name: Any,
431
+ get_smart_platform_name: Any,
432
+ get_smart_model_name: Any,
381
433
  ) -> Optional[Any]:
382
- """创建LLM模型"""
434
+ """创建LLM模型,使用smart平台,适用于代码生成等复杂场景"""
383
435
  if not _model_available:
384
436
  return None
385
437
  try:
@@ -387,20 +439,20 @@ def _create_llm_model(
387
439
  model = None
388
440
  if llm_group:
389
441
  try:
390
- platform_name = get_normal_platform_name(llm_group) # type: ignore
442
+ platform_name = get_smart_platform_name(llm_group) # type: ignore
391
443
  if platform_name:
392
444
  model = registry.create_platform(platform_name) # type: ignore
393
445
  except Exception:
394
446
  model = None
395
447
  if model is None:
396
- model = registry.get_normal_platform() # type: ignore
448
+ model = registry.get_smart_platform() # type: ignore
397
449
  try:
398
450
  model.set_model_group(llm_group) # type: ignore
399
451
  except Exception:
400
452
  pass
401
453
  if llm_group:
402
454
  try:
403
- mn = get_normal_model_name(llm_group) # type: ignore
455
+ mn = get_smart_model_name(llm_group) # type: ignore
404
456
  if mn:
405
457
  model.set_model_name(mn) # type: ignore
406
458
  except Exception:
@@ -409,7 +461,7 @@ def _create_llm_model(
409
461
  "你是资深 C→Rust 迁移专家。任务:给定一个函数及其调用子树(依赖图摘要、函数签名、源码片段),"
410
462
  "判断是否可以使用一个或多个成熟的 Rust 库整体替代该子树的功能(允许库内多个 API 协同,允许多个库组合;不允许使用不成熟/不常见库)。"
411
463
  "如可替代,请给出 libraries 列表(库名),可选给出代表性 API/模块与实现备注 notes(如何用这些库协作实现)。"
412
- "输出格式:仅输出一个 <yaml> 块,字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),可选 library(str,首选主库), api(str) 或 apis(list),notes(str)。"
464
+ "输出格式:仅输出一个 <SUMMARY> 块,块内直接包含 JSON 对象(不需要额外的标签),字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),可选 library(str,首选主库), api(str) 或 apis(list),notes(str)。"
413
465
  )
414
466
  return model
415
467
  except Exception as e:
@@ -421,9 +473,9 @@ def _create_llm_model(
421
473
  return None
422
474
 
423
475
 
424
- def _parse_agent_yaml_summary(text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
476
+ def _parse_agent_json_summary(text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
425
477
  """
426
- 解析Agent返回的YAML/JSON摘要
478
+ 解析Agent返回的JSON摘要
427
479
  返回(解析结果, 错误信息)
428
480
  如果解析成功,返回(data, None)
429
481
  如果解析失败,返回(None, 错误信息)
@@ -431,76 +483,24 @@ def _parse_agent_yaml_summary(text: str) -> Tuple[Optional[Dict[str, Any]], Opti
431
483
  if not isinstance(text, str) or not text.strip():
432
484
  return None, "摘要文本为空"
433
485
  import re as _re
434
- import json as _json
435
- try:
436
- import yaml # type: ignore
437
- except Exception:
438
- yaml = None # type: ignore
486
+ from jarvis.jarvis_utils.jsonnet_compat import loads as _json_loads
439
487
 
488
+ # 提取 <SUMMARY> 块
440
489
  m_sum = _re.search(r"<SUMMARY>([\s\S]*?)</SUMMARY>", text, flags=_re.IGNORECASE)
441
490
  block = (m_sum.group(1) if m_sum else text).strip()
442
491
 
443
- m_yaml = _re.search(r"<yaml>([\s\S]*?)</yaml>", block, flags=_re.IGNORECASE)
444
- if m_yaml:
445
- raw = m_yaml.group(1).strip()
446
- if raw and yaml:
447
- try:
448
- data = yaml.safe_load(raw)
449
- if isinstance(data, dict):
450
- return data, None
451
- except Exception as yaml_err:
452
- return None, f"YAML 解析失败: {str(yaml_err)}"
453
- elif raw and not yaml:
454
- return None, "PyYAML 未安装,无法解析 YAML"
455
-
456
- m_code = _re.search(r"```(?:yaml|yml)\s*([\s\S]*?)```", block, flags=_re.IGNORECASE)
457
- if m_code:
458
- raw = m_code.group(1).strip()
459
- if raw and yaml:
460
- try:
461
- data = yaml.safe_load(raw)
462
- if isinstance(data, dict):
463
- return data, None
464
- except Exception as yaml_err:
465
- return None, f"YAML 解析失败: {str(yaml_err)}"
466
- elif raw and not yaml:
467
- return None, "PyYAML 未安装,无法解析 YAML"
468
-
469
- m_json = _re.search(r"\{[\s\S]*\}", block)
470
- if m_json:
471
- raw = m_json.group(0).strip()
472
- try:
473
- data = _json.loads(raw)
474
- if isinstance(data, dict):
475
- return data, None
476
- except Exception as json_err:
477
- return None, f"JSON 解析失败: {str(json_err)}"
478
-
479
- # 宽松键值
480
- def _kv(pattern: str) -> Optional[str]:
481
- m = _re.search(pattern, block, flags=_re.IGNORECASE)
482
- return m.group(1).strip() if m else None
483
-
484
- rep_raw = _kv(r"replaceable\s*:\s*(.+)")
485
- lib_raw = _kv(r"library\s*:\s*(.+)")
486
- api_raw = _kv(r"(?:api|function)\s*:\s*(.+)")
487
- conf_raw = _kv(r"confidence\s*:\s*([0-9\.\-eE]+)")
488
- if any([rep_raw, lib_raw, api_raw, conf_raw]):
489
- result: Dict[str, Any] = {}
490
- if rep_raw is not None:
491
- rep_s = rep_raw.strip().strip("\"'")
492
- result["replaceable"] = rep_s.lower() in ("true", "yes", "y", "1")
493
- if lib_raw is not None:
494
- result["library"] = lib_raw.strip().strip("\"'")
495
- if api_raw is not None:
496
- result["api"] = api_raw.strip().strip("\"'")
497
- if conf_raw is not None:
498
- try:
499
- result["confidence"] = float(conf_raw)
500
- except Exception:
501
- pass
502
- return (result if result else None, None)
503
- return None, "未找到有效的YAML/JSON格式或键值对"
492
+ if not block:
493
+ return None, "未找到 <SUMMARY> 或 </SUMMARY> 标签,或标签内容为空"
494
+
495
+ # 直接解析 <SUMMARY> 块内的内容为 JSON
496
+ # jsonnet_compat.loads 会自动处理 markdown 代码块标记(如 ```json5、```json、``` 等)
497
+ try:
498
+ data = _json_loads(block)
499
+ if isinstance(data, dict):
500
+ return data, None
501
+ return None, f"JSON 解析结果不是字典,而是 {type(data).__name__}"
502
+ except Exception as json_err:
503
+ return None, f"JSON 解析失败: {str(json_err)}"
504
504
 
505
505
 
506
506
  def _build_subtree_prompt(
@@ -509,6 +509,7 @@ def _build_subtree_prompt(
509
509
  by_id: Dict[int, Dict[str, Any]],
510
510
  adj_func: Dict[int, List[int]],
511
511
  disabled_display: str,
512
+ additional_notes: str = "",
512
513
  ) -> str:
513
514
  """构建子树评估提示词"""
514
515
  root_rec = by_id.get(fid, {})
@@ -527,19 +528,19 @@ def _build_subtree_prompt(
527
528
  nodes_meta.append(f"- {nm} | {sg}")
528
529
  else:
529
530
  nodes_meta.append(f"- {nm}")
530
- if len(nodes_meta) > 200:
531
- nodes_meta = nodes_meta[:200] + [f"...({len(desc)-200} more)"]
531
+ if len(nodes_meta) > MAX_SUBTREE_NODES_META:
532
+ nodes_meta = nodes_meta[:MAX_SUBTREE_NODES_META] + [f"...({len(desc)-MAX_SUBTREE_NODES_META} more)"]
532
533
 
533
- # 选取部分代表性叶子/内部节点源码(最多 3 个)
534
+ # 选取部分代表性叶子/内部节点源码(最多 MAX_SOURCE_SAMPLES 个)
534
535
  samples: List[str] = []
535
536
  sample_ids: List[int] = [fid]
536
- for ch in adj_func.get(fid, [])[:2]:
537
+ for ch in adj_func.get(fid, [])[:MAX_CHILD_SAMPLES]:
537
538
  sample_ids.append(ch)
538
539
  for sid in sample_ids:
539
540
  rec = by_id.get(sid, {})
540
541
  nm = rec.get("qualified_name") or rec.get("name") or f"sym_{sid}"
541
542
  sg = rec.get("signature") or ""
542
- src = _read_source_snippet(rec, max_lines=120)
543
+ src = _read_source_snippet(rec, max_lines=SUBTREE_SOURCE_SNIPPET_MAX_LINES)
543
544
  samples.append(f"--- BEGIN {nm} ---\n{sg}\n{src}\n--- END {nm} ---")
544
545
 
545
546
  # 构建依赖图(子树内的调用有向边)
@@ -553,14 +554,14 @@ def _build_subtree_prompt(
553
554
  if v in desc:
554
555
  edges_list.append(f"{_label(u)} -> {_label(v)}")
555
556
  edges_text: str
556
- if len(edges_list) > 400:
557
- edges_text = "\n".join(edges_list[:400] + [f"...({len(edges_list) - 400} more edges)"])
557
+ if len(edges_list) > MAX_SUBTREE_EDGES:
558
+ edges_text = "\n".join(edges_list[:MAX_SUBTREE_EDGES] + [f"...({len(edges_list) - MAX_SUBTREE_EDGES} more edges)"])
558
559
  else:
559
560
  edges_text = "\n".join(edges_list)
560
561
 
561
562
  # 适度提供 DOT(边数不大时),便于大模型直观看图
562
563
  dot_text = ""
563
- if len(edges_list) <= 200:
564
+ if len(edges_list) <= MAX_DOT_EDGES:
564
565
  dot_lines: List[str] = ["digraph subtree {", " rankdir=LR;"]
565
566
  for u in sorted(desc):
566
567
  for v in adj_func.get(u, []):
@@ -578,7 +579,7 @@ def _build_subtree_prompt(
578
579
  "请评估以下 C/C++ 函数子树是否可以由一个或多个成熟的 Rust 库整体替代(语义等价或更强)。"
579
580
  "允许库内多个 API 协同,允许多个库组合;如果必须依赖尚不成熟/冷门库或非 Rust 库,则判定为不可替代。\n"
580
581
  f"{disabled_hint}"
581
- "输出格式:仅输出一个 <yaml> 块,字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),"
582
+ "输出格式:仅输出一个 <SUMMARY> 块,块内直接包含 JSON 对象(不需要额外的标签),字段: replaceable(bool), libraries(list[str]), confidence(float 0..1),"
582
583
  "可选字段: library(str,首选主库), api(str) 或 apis(list), notes(str: 简述如何由这些库协作实现的思路)。\n\n"
583
584
  f"根函数(被评估子树的根): {root_name}\n"
584
585
  f"签名: {root_sig}\n"
@@ -595,6 +596,7 @@ def _build_subtree_prompt(
595
596
  + "代表性源码样本(部分节点,可能截断,仅供辅助判断):\n"
596
597
  + "\n".join(samples)
597
598
  + "\n"
599
+ + (f"\n【附加说明(用户自定义)】\n{additional_notes}\n" if additional_notes else "")
598
600
  )
599
601
 
600
602
 
@@ -607,35 +609,102 @@ def _llm_evaluate_subtree(
607
609
  disabled_display: str,
608
610
  _model_available: bool,
609
611
  _new_model_func: Callable,
612
+ additional_notes: str = "",
610
613
  ) -> Dict[str, Any]:
611
- """使用LLM评估子树是否可替代"""
614
+ """使用LLM评估子树是否可替代,支持最多3次重试"""
612
615
  if not _model_available:
613
616
  return {"replaceable": False}
614
617
  model = _new_model_func()
615
618
  if not model:
616
619
  return {"replaceable": False}
617
620
 
618
- prompt = _build_subtree_prompt(fid, desc, by_id, adj_func, disabled_display)
621
+ base_prompt = _build_subtree_prompt(fid, desc, by_id, adj_func, disabled_display, additional_notes)
622
+ last_parse_error = None
619
623
 
620
- try:
621
- result = model.chat_until_success(prompt) # type: ignore
622
- parsed, parse_error = _parse_agent_yaml_summary(result or "")
623
- if parse_error:
624
- # YAML解析失败,将错误信息反馈给模型
625
- print(f"[c2rust-lib-replace] YAML解析失败: {parse_error}")
626
- # 更新提示词,包含解析错误信息
627
- prompt_with_error = (
628
- prompt
629
- + f"\n\n**格式错误详情(请根据以下错误修复输出格式):**\n- {parse_error}\n\n"
630
- + "请确保输出的YAML格式正确,包括正确的缩进、引号、冒号等。"
631
- )
632
- result = model.chat_until_success(prompt_with_error) # type: ignore
633
- parsed, parse_error = _parse_agent_yaml_summary(result or "")
624
+ for attempt in range(1, MAX_LLM_RETRIES + 1):
625
+ try:
626
+ # 构建当前尝试的提示词
627
+ if attempt == 1:
628
+ prompt = base_prompt
629
+ else:
630
+ # 重试时包含之前的错误信息
631
+ error_hint = ""
632
+ if last_parse_error:
633
+ error_hint = (
634
+ f"\n\n**格式错误详情(请根据以下错误修复输出格式):**\n- {last_parse_error}\n\n"
635
+ + "请确保输出的JSON格式正确,包括正确的引号、逗号、大括号等。仅输出一个 <SUMMARY> 块,块内直接包含 JSON 对象(不需要额外的标签)。"
636
+ )
637
+ prompt = base_prompt + error_hint
638
+
639
+ # 调用LLM
640
+ result = model.chat_until_success(prompt) # type: ignore
641
+ parsed, parse_error = _parse_agent_json_summary(result or "")
642
+
634
643
  if parse_error:
635
- # 仍然失败,使用默认值
636
- print(f"[c2rust-lib-replace] 重试后YAML解析仍然失败: {parse_error},使用默认值")
637
- parsed = None
638
- if isinstance(parsed, dict):
644
+ # JSON解析失败,记录错误并准备重试
645
+ last_parse_error = parse_error
646
+ typer.secho(
647
+ f"[c2rust-library] 第 {attempt}/{MAX_LLM_RETRIES} 次尝试:JSON解析失败: {parse_error}",
648
+ fg=typer.colors.YELLOW,
649
+ err=True,
650
+ )
651
+ # 打印原始内容以便调试
652
+ result_text = str(result or "").strip()
653
+ if result_text:
654
+ typer.secho(
655
+ f"[c2rust-library] 原始LLM响应内容(前1000字符):\n{result_text[:1000]}",
656
+ fg=typer.colors.RED,
657
+ err=True,
658
+ )
659
+ if len(result_text) > 1000:
660
+ typer.secho(
661
+ f"[c2rust-library] ... (还有 {len(result_text) - 1000} 个字符未显示)",
662
+ fg=typer.colors.RED,
663
+ err=True,
664
+ )
665
+ if attempt < MAX_LLM_RETRIES:
666
+ continue # 继续重试
667
+ else:
668
+ # 最后一次尝试也失败,使用默认值
669
+ typer.secho(
670
+ f"[c2rust-library] 重试 {MAX_LLM_RETRIES} 次后JSON解析仍然失败: {parse_error},使用默认值",
671
+ fg=typer.colors.YELLOW,
672
+ err=True,
673
+ )
674
+ return {"replaceable": False}
675
+
676
+ # 解析成功,检查是否为字典
677
+ if not isinstance(parsed, dict):
678
+ last_parse_error = f"解析结果不是字典,而是 {type(parsed).__name__}"
679
+ typer.secho(
680
+ f"[c2rust-library] 第 {attempt}/{MAX_LLM_RETRIES} 次尝试:{last_parse_error}",
681
+ fg=typer.colors.YELLOW,
682
+ err=True,
683
+ )
684
+ # 打印解析结果和原始内容以便调试
685
+ typer.secho(
686
+ f"[c2rust-library] 解析结果类型: {type(parsed).__name__}, 值: {repr(parsed)[:500]}",
687
+ fg=typer.colors.RED,
688
+ err=True,
689
+ )
690
+ result_text = str(result or "").strip()
691
+ if result_text:
692
+ typer.secho(
693
+ f"[c2rust-library] 原始LLM响应内容(前1000字符):\n{result_text[:1000]}",
694
+ fg=typer.colors.RED,
695
+ err=True,
696
+ )
697
+ if attempt < MAX_LLM_RETRIES:
698
+ continue # 继续重试
699
+ else:
700
+ typer.secho(
701
+ f"[c2rust-library] 重试 {MAX_LLM_RETRIES} 次后结果格式仍然不正确,视为不可替代。",
702
+ fg=typer.colors.YELLOW,
703
+ err=True,
704
+ )
705
+ return {"replaceable": False}
706
+
707
+ # 成功解析为字典,处理结果
639
708
  rep = bool(parsed.get("replaceable") is True)
640
709
  lib = str(parsed.get("library") or "").strip()
641
710
  api = str(parsed.get("api") or parsed.get("function") or "").strip()
@@ -688,12 +757,37 @@ def _llm_evaluate_subtree(
688
757
  result_obj["apis"] = apis
689
758
  if notes:
690
759
  result_obj["notes"] = notes
760
+
761
+ # 成功获取结果,返回
762
+ if attempt > 1:
763
+ typer.secho(
764
+ f"[c2rust-library] 第 {attempt} 次尝试成功获取评估结果",
765
+ fg=typer.colors.GREEN,
766
+ err=True,
767
+ )
691
768
  return result_obj
692
- typer.secho("[c2rust-library] LLM 结果解析失败,视为不可替代。", fg=typer.colors.YELLOW, err=True)
693
- return {"replaceable": False}
694
- except Exception as e:
695
- typer.secho(f"[c2rust-library] LLM 评估失败,视为不可替代: {e}", fg=typer.colors.YELLOW, err=True)
696
- return {"replaceable": False}
769
+
770
+ except Exception as e:
771
+ # LLM调用异常,记录并准备重试
772
+ last_parse_error = f"LLM调用异常: {str(e)}"
773
+ typer.secho(
774
+ f"[c2rust-library] 第 {attempt}/{MAX_LLM_RETRIES} 次尝试:LLM评估失败: {e}",
775
+ fg=typer.colors.YELLOW,
776
+ err=True,
777
+ )
778
+ if attempt < MAX_LLM_RETRIES:
779
+ continue # 继续重试
780
+ else:
781
+ # 最后一次尝试也失败,返回默认值
782
+ typer.secho(
783
+ f"[c2rust-library] 重试 {MAX_LLM_RETRIES} 次后LLM评估仍然失败: {e},视为不可替代",
784
+ fg=typer.colors.YELLOW,
785
+ err=True,
786
+ )
787
+ return {"replaceable": False}
788
+
789
+ # 理论上不会到达这里,但作为保险
790
+ return {"replaceable": False}
697
791
 
698
792
 
699
793
  def _is_entry_function(
@@ -843,7 +937,7 @@ def apply_library_replacement(
843
937
  disabled_libraries: Optional[List[str]] = None,
844
938
  resume: bool = True,
845
939
  checkpoint_path: Optional[Path] = None,
846
- checkpoint_interval: int = 1,
940
+ checkpoint_interval: int = DEFAULT_CHECKPOINT_INTERVAL,
847
941
  clear_checkpoint_on_done: bool = True,
848
942
  non_interactive: bool = True,
849
943
  ) -> Dict[str, Path]:
@@ -872,7 +966,7 @@ def apply_library_replacement(
872
966
 
873
967
  # Checkpoint 默认路径
874
968
  if checkpoint_path is None:
875
- checkpoint_path = data_dir / "library_replacer_checkpoint.json"
969
+ checkpoint_path = data_dir / DEFAULT_CHECKPOINT_FILE
876
970
 
877
971
  # 读取符号
878
972
  all_records, by_id, name_to_id, func_ids, id_refs_names = _load_symbols(sjsonl)
@@ -898,8 +992,11 @@ def apply_library_replacement(
898
992
  # 预处理禁用库
899
993
  disabled_norm, disabled_display = _normalize_disabled_libraries(disabled_libraries)
900
994
 
995
+ # 读取附加说明
996
+ additional_notes = _load_additional_notes(data_dir)
997
+
901
998
  # 断点恢复支持:工具函数与关键键构造
902
- ckpt_path: Path = Path(checkpoint_path) if checkpoint_path is not None else (data_dir / "library_replacer_checkpoint.json")
999
+ ckpt_path: Path = Path(checkpoint_path) if checkpoint_path is not None else (data_dir / DEFAULT_CHECKPOINT_FILE)
903
1000
  checkpoint_key = _make_checkpoint_key(sjsonl, library_name, llm_group, candidates, disabled_libraries, max_funcs)
904
1001
 
905
1002
  def _new_model() -> Optional[Any]:
@@ -910,6 +1007,7 @@ def apply_library_replacement(
910
1007
  pruned_dynamic: Set[int] = set() # 动态累计的"将被剪除"的函数集合(不含选中根)
911
1008
  selected_roots: List[Tuple[int, Dict[str, Any]]] = [] # 实时选中的可替代根(fid, LLM结果)
912
1009
  processed_roots: Set[int] = set() # 已处理(评估或跳过)的根集合
1010
+ root_funcs_processed: Set[int] = set() # 已处理的初始根函数集合(用于进度显示)
913
1011
  last_ckpt_saved = 0 # 上次保存的计数
914
1012
 
915
1013
  # 若存在匹配的断点文件,则加载恢复
@@ -935,6 +1033,11 @@ def apply_library_replacement(
935
1033
  selected_roots = sr_list
936
1034
  except Exception:
937
1035
  selected_roots = []
1036
+ # 恢复已处理的初始根函数集合(从 processed_roots 中筛选出在 root_funcs 中的)
1037
+ try:
1038
+ root_funcs_processed = {fid for fid in processed_roots if fid in root_funcs}
1039
+ except Exception:
1040
+ root_funcs_processed = set()
938
1041
  typer.secho(
939
1042
  f"[c2rust-library] 已从断点恢复: 已评估={eval_counter}, 已处理根={len(processed_roots)}, 已剪除={len(pruned_dynamic)}, 已选中替代根={len(selected_roots)}",
940
1043
  fg=typer.colors.YELLOW,
@@ -962,17 +1065,17 @@ def apply_library_replacement(
962
1065
  try:
963
1066
  interval = int(checkpoint_interval)
964
1067
  except Exception:
965
- interval = 1
1068
+ interval = DEFAULT_CHECKPOINT_INTERVAL
966
1069
  need_save = force or (interval <= 0) or ((eval_counter - last_ckpt_saved) >= interval)
967
1070
  if not need_save:
968
1071
  return
969
1072
  try:
970
- _atomic_write(ckpt_path, json.dumps(_current_checkpoint_state(), ensure_ascii=False, indent=2))
1073
+ _atomic_write(ckpt_path, json.dumps(_current_checkpoint_state(), ensure_ascii=False, indent=JSON_INDENT))
971
1074
  last_ckpt_saved = eval_counter
972
1075
  except Exception:
973
1076
  pass
974
1077
 
975
- def _evaluate_node(fid: int) -> None:
1078
+ def _evaluate_node(fid: int, is_root_func: bool = False) -> None:
976
1079
  nonlocal eval_counter
977
1080
  # 限流
978
1081
  if max_funcs is not None and eval_counter >= max_funcs:
@@ -985,8 +1088,22 @@ def apply_library_replacement(
985
1088
  desc = _collect_descendants(fid, adj_func, desc_cache)
986
1089
  rec_meta = by_id.get(fid, {})
987
1090
  label = rec_meta.get("qualified_name") or rec_meta.get("name") or f"sym_{fid}"
1091
+ # 计算进度:区分初始根函数和递归评估的子节点
1092
+ total_roots = len(root_funcs)
1093
+ total_evaluated = len(processed_roots) + 1 # +1 因为当前这个即将被处理
1094
+ if is_root_func:
1095
+ # 初始根函数:显示 (当前根函数索引/总根函数数)
1096
+ root_progress = len(root_funcs_processed) + 1
1097
+ progress_info = f"({root_progress}/{total_roots})" if total_roots > 0 else ""
1098
+ else:
1099
+ # 递归评估的子节点:显示 (当前根函数索引/总根函数数, 总评估节点数)
1100
+ root_progress = len(root_funcs_processed)
1101
+ if total_roots > 0:
1102
+ progress_info = f"({root_progress}/{total_roots}, 总评估={total_evaluated})"
1103
+ else:
1104
+ progress_info = f"(总评估={total_evaluated})"
988
1105
  typer.secho(
989
- f"[c2rust-library] 正在评估: {label} (ID: {fid}), 子树函数数={len(desc)}",
1106
+ f"[c2rust-library] {progress_info} 正在评估: {label} (ID: {fid}), 子树函数数={len(desc)}",
990
1107
  fg=typer.colors.CYAN,
991
1108
  err=True,
992
1109
  )
@@ -994,10 +1111,12 @@ def apply_library_replacement(
994
1111
  # 执行 LLM 评估
995
1112
  res = _llm_evaluate_subtree(
996
1113
  fid, desc, by_id, adj_func, disabled_norm, disabled_display,
997
- _model_available, _new_model
1114
+ _model_available, _new_model, additional_notes
998
1115
  )
999
1116
  eval_counter += 1
1000
1117
  processed_roots.add(fid)
1118
+ if is_root_func:
1119
+ root_funcs_processed.add(fid)
1001
1120
  res["mode"] = "llm"
1002
1121
  _periodic_checkpoint_save()
1003
1122
 
@@ -1016,12 +1135,26 @@ def apply_library_replacement(
1016
1135
  conf = 0.0
1017
1136
  libs_str = ", ".join(libs) if libs else "(未指定库)"
1018
1137
  apis_str = ", ".join([str(a) for a in apis]) if isinstance(apis, list) else (api if api else "")
1019
- msg = f"[c2rust-library] 可替换: {label} -> 库: {libs_str}"
1138
+ # 计算进度:区分初始根函数和递归评估的子节点
1139
+ total_roots = len(root_funcs)
1140
+ if is_root_func:
1141
+ # 初始根函数:显示 (当前根函数索引/总根函数数)
1142
+ root_progress = len(root_funcs_processed)
1143
+ progress_info = f"({root_progress}/{total_roots})" if total_roots > 0 else ""
1144
+ else:
1145
+ # 递归评估的子节点:显示 (当前根函数索引/总根函数数, 总评估节点数)
1146
+ root_progress = len(root_funcs_processed)
1147
+ total_evaluated = len(processed_roots)
1148
+ if total_roots > 0:
1149
+ progress_info = f"({root_progress}/{total_roots}, 总评估={total_evaluated})"
1150
+ else:
1151
+ progress_info = f"(总评估={total_evaluated})"
1152
+ msg = f"[c2rust-library] {progress_info} 可替换: {label} -> 库: {libs_str}"
1020
1153
  if apis_str:
1021
1154
  msg += f"; 参考API: {apis_str}"
1022
1155
  msg += f"; 置信度: {conf:.2f}"
1023
1156
  if notes:
1024
- msg += f"; 备注: {notes[:200]}"
1157
+ msg += f"; 备注: {notes[:MAX_NOTES_DISPLAY_LENGTH]}"
1025
1158
  typer.secho(msg, fg=typer.colors.GREEN, err=True)
1026
1159
 
1027
1160
  # 入口函数保护:不替代 main(保留进行转译),改为深入评估其子节点
@@ -1032,7 +1165,7 @@ def apply_library_replacement(
1032
1165
  err=True,
1033
1166
  )
1034
1167
  for ch in adj_func.get(fid, []):
1035
- _evaluate_node(ch)
1168
+ _evaluate_node(ch, is_root_func=False)
1036
1169
  else:
1037
1170
  # 即时剪枝(不含根)
1038
1171
  to_prune = set(desc)
@@ -1050,13 +1183,13 @@ def apply_library_replacement(
1050
1183
  else:
1051
1184
  # 若不可替代,继续评估其子节点(深度优先)
1052
1185
  for ch in adj_func.get(fid, []):
1053
- _evaluate_node(ch)
1186
+ _evaluate_node(ch, is_root_func=False)
1054
1187
  except Exception:
1055
1188
  pass
1056
1189
 
1057
1190
  # 对每个候选根进行评估;若根不可替代将递归评估其子节点
1058
1191
  for fid in root_funcs:
1059
- _evaluate_node(fid)
1192
+ _evaluate_node(fid, is_root_func=True)
1060
1193
 
1061
1194
  # 剪枝集合来自动态评估阶段的累计结果
1062
1195
  pruned_funcs: Set[int] = set(pruned_dynamic)
@@ -1118,5 +1251,4 @@ def apply_library_replacement(
1118
1251
  return result
1119
1252
 
1120
1253
 
1121
- __all__ = ["apply_library_replacement"]
1122
1254
  __all__ = ["apply_library_replacement"]