jarvis-ai-assistant 0.1.222__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +1143 -245
  3. jarvis/jarvis_agent/agent_manager.py +97 -0
  4. jarvis/jarvis_agent/builtin_input_handler.py +12 -10
  5. jarvis/jarvis_agent/config_editor.py +57 -0
  6. jarvis/jarvis_agent/edit_file_handler.py +392 -99
  7. jarvis/jarvis_agent/event_bus.py +48 -0
  8. jarvis/jarvis_agent/events.py +157 -0
  9. jarvis/jarvis_agent/file_context_handler.py +79 -0
  10. jarvis/jarvis_agent/file_methodology_manager.py +117 -0
  11. jarvis/jarvis_agent/jarvis.py +1117 -147
  12. jarvis/jarvis_agent/main.py +78 -34
  13. jarvis/jarvis_agent/memory_manager.py +195 -0
  14. jarvis/jarvis_agent/methodology_share_manager.py +174 -0
  15. jarvis/jarvis_agent/prompt_manager.py +82 -0
  16. jarvis/jarvis_agent/prompts.py +46 -9
  17. jarvis/jarvis_agent/protocols.py +4 -1
  18. jarvis/jarvis_agent/rewrite_file_handler.py +141 -0
  19. jarvis/jarvis_agent/run_loop.py +146 -0
  20. jarvis/jarvis_agent/session_manager.py +9 -9
  21. jarvis/jarvis_agent/share_manager.py +228 -0
  22. jarvis/jarvis_agent/shell_input_handler.py +23 -3
  23. jarvis/jarvis_agent/stdio_redirect.py +295 -0
  24. jarvis/jarvis_agent/task_analyzer.py +212 -0
  25. jarvis/jarvis_agent/task_manager.py +154 -0
  26. jarvis/jarvis_agent/task_planner.py +496 -0
  27. jarvis/jarvis_agent/tool_executor.py +8 -4
  28. jarvis/jarvis_agent/tool_share_manager.py +139 -0
  29. jarvis/jarvis_agent/user_interaction.py +42 -0
  30. jarvis/jarvis_agent/utils.py +54 -0
  31. jarvis/jarvis_agent/web_bridge.py +189 -0
  32. jarvis/jarvis_agent/web_output_sink.py +53 -0
  33. jarvis/jarvis_agent/web_server.py +751 -0
  34. jarvis/jarvis_c2rust/__init__.py +26 -0
  35. jarvis/jarvis_c2rust/cli.py +613 -0
  36. jarvis/jarvis_c2rust/collector.py +258 -0
  37. jarvis/jarvis_c2rust/library_replacer.py +1122 -0
  38. jarvis/jarvis_c2rust/llm_module_agent.py +1300 -0
  39. jarvis/jarvis_c2rust/optimizer.py +960 -0
  40. jarvis/jarvis_c2rust/scanner.py +1681 -0
  41. jarvis/jarvis_c2rust/transpiler.py +2325 -0
  42. jarvis/jarvis_code_agent/build_validation_config.py +133 -0
  43. jarvis/jarvis_code_agent/code_agent.py +1605 -178
  44. jarvis/jarvis_code_agent/code_analyzer/__init__.py +62 -0
  45. jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
  46. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
  47. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +102 -0
  48. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +59 -0
  49. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
  50. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +69 -0
  51. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +38 -0
  52. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +44 -0
  53. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +38 -0
  54. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +50 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +93 -0
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +129 -0
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +54 -0
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +154 -0
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
  60. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +363 -0
  61. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
  62. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
  63. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
  64. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
  65. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
  66. jarvis/jarvis_code_agent/code_analyzer/language_support.py +89 -0
  67. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +31 -0
  68. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +231 -0
  69. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +183 -0
  70. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +219 -0
  71. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +209 -0
  72. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +451 -0
  73. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +77 -0
  74. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +48 -0
  75. jarvis/jarvis_code_agent/lint.py +275 -13
  76. jarvis/jarvis_code_agent/utils.py +142 -0
  77. jarvis/jarvis_code_analysis/checklists/loader.py +20 -6
  78. jarvis/jarvis_code_analysis/code_review.py +583 -548
  79. jarvis/jarvis_data/config_schema.json +339 -28
  80. jarvis/jarvis_git_squash/main.py +22 -13
  81. jarvis/jarvis_git_utils/git_commiter.py +171 -55
  82. jarvis/jarvis_mcp/sse_mcp_client.py +22 -15
  83. jarvis/jarvis_mcp/stdio_mcp_client.py +4 -4
  84. jarvis/jarvis_mcp/streamable_mcp_client.py +36 -16
  85. jarvis/jarvis_memory_organizer/memory_organizer.py +753 -0
  86. jarvis/jarvis_methodology/main.py +48 -63
  87. jarvis/jarvis_multi_agent/__init__.py +302 -43
  88. jarvis/jarvis_multi_agent/main.py +70 -24
  89. jarvis/jarvis_platform/ai8.py +40 -23
  90. jarvis/jarvis_platform/base.py +210 -49
  91. jarvis/jarvis_platform/human.py +11 -1
  92. jarvis/jarvis_platform/kimi.py +82 -76
  93. jarvis/jarvis_platform/openai.py +73 -1
  94. jarvis/jarvis_platform/registry.py +8 -15
  95. jarvis/jarvis_platform/tongyi.py +115 -101
  96. jarvis/jarvis_platform/yuanbao.py +89 -63
  97. jarvis/jarvis_platform_manager/main.py +194 -132
  98. jarvis/jarvis_platform_manager/service.py +122 -86
  99. jarvis/jarvis_rag/cli.py +156 -53
  100. jarvis/jarvis_rag/embedding_manager.py +155 -12
  101. jarvis/jarvis_rag/llm_interface.py +10 -13
  102. jarvis/jarvis_rag/query_rewriter.py +63 -12
  103. jarvis/jarvis_rag/rag_pipeline.py +222 -40
  104. jarvis/jarvis_rag/reranker.py +26 -3
  105. jarvis/jarvis_rag/retriever.py +270 -14
  106. jarvis/jarvis_sec/__init__.py +3605 -0
  107. jarvis/jarvis_sec/checkers/__init__.py +32 -0
  108. jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
  109. jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
  110. jarvis/jarvis_sec/cli.py +116 -0
  111. jarvis/jarvis_sec/report.py +257 -0
  112. jarvis/jarvis_sec/status.py +264 -0
  113. jarvis/jarvis_sec/types.py +20 -0
  114. jarvis/jarvis_sec/workflow.py +219 -0
  115. jarvis/jarvis_smart_shell/main.py +405 -137
  116. jarvis/jarvis_stats/__init__.py +13 -0
  117. jarvis/jarvis_stats/cli.py +387 -0
  118. jarvis/jarvis_stats/stats.py +711 -0
  119. jarvis/jarvis_stats/storage.py +612 -0
  120. jarvis/jarvis_stats/visualizer.py +282 -0
  121. jarvis/jarvis_tools/ask_user.py +1 -0
  122. jarvis/jarvis_tools/base.py +18 -2
  123. jarvis/jarvis_tools/clear_memory.py +239 -0
  124. jarvis/jarvis_tools/cli/main.py +220 -144
  125. jarvis/jarvis_tools/execute_script.py +52 -12
  126. jarvis/jarvis_tools/file_analyzer.py +17 -12
  127. jarvis/jarvis_tools/generate_new_tool.py +46 -24
  128. jarvis/jarvis_tools/read_code.py +277 -18
  129. jarvis/jarvis_tools/read_symbols.py +141 -0
  130. jarvis/jarvis_tools/read_webpage.py +86 -13
  131. jarvis/jarvis_tools/registry.py +294 -90
  132. jarvis/jarvis_tools/retrieve_memory.py +227 -0
  133. jarvis/jarvis_tools/save_memory.py +194 -0
  134. jarvis/jarvis_tools/search_web.py +62 -28
  135. jarvis/jarvis_tools/sub_agent.py +205 -0
  136. jarvis/jarvis_tools/sub_code_agent.py +217 -0
  137. jarvis/jarvis_tools/virtual_tty.py +330 -62
  138. jarvis/jarvis_utils/builtin_replace_map.py +4 -5
  139. jarvis/jarvis_utils/clipboard.py +90 -0
  140. jarvis/jarvis_utils/config.py +607 -50
  141. jarvis/jarvis_utils/embedding.py +3 -0
  142. jarvis/jarvis_utils/fzf.py +57 -0
  143. jarvis/jarvis_utils/git_utils.py +251 -29
  144. jarvis/jarvis_utils/globals.py +174 -17
  145. jarvis/jarvis_utils/http.py +58 -79
  146. jarvis/jarvis_utils/input.py +899 -153
  147. jarvis/jarvis_utils/methodology.py +210 -83
  148. jarvis/jarvis_utils/output.py +220 -137
  149. jarvis/jarvis_utils/utils.py +1906 -135
  150. jarvis_ai_assistant-0.7.0.dist-info/METADATA +465 -0
  151. jarvis_ai_assistant-0.7.0.dist-info/RECORD +192 -0
  152. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/entry_points.txt +8 -2
  153. jarvis/jarvis_git_details/main.py +0 -265
  154. jarvis/jarvis_platform/oyi.py +0 -357
  155. jarvis/jarvis_tools/edit_file.py +0 -255
  156. jarvis/jarvis_tools/rewrite_file.py +0 -195
  157. jarvis_ai_assistant-0.1.222.dist-info/METADATA +0 -767
  158. jarvis_ai_assistant-0.1.222.dist-info/RECORD +0 -110
  159. /jarvis/{jarvis_git_details → jarvis_memory_organizer}/__init__.py +0 -0
  160. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/WHEEL +0 -0
  161. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/licenses/LICENSE +0 -0
  162. {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from typing import List
2
2
  from .llm_interface import LLMInterface
3
+ from jarvis.jarvis_utils.output import PrettyOutput, OutputType
3
4
 
4
5
 
5
6
  class QueryRewriter:
@@ -20,20 +21,29 @@ class QueryRewriter:
20
21
  def _create_prompt_template(self) -> str:
21
22
  """为多查询重写任务创建提示模板。"""
22
23
  return """
23
- 你是一个精通检索的AI助手。你的任务是将以下这个单一的用户问题,从不同角度改写成 3 个不同的、但语义上相关的搜索查询。这有助于在知识库中进行更全面的搜索。
24
+ 你是一个精通检索和语言的AI助手。你的任务是将以下这个单一的用户问题,改写为几个语义相关但表达方式不同的搜索查询,并提供英文翻译。这有助于在多语言知识库中进行更全面的搜索。
24
25
 
25
26
  请遵循以下原则:
26
- 1. **多样性**:生成的查询应尝试使用不同的关键词和表述方式。
27
- 2. **保留核心意图**:所有查询都必须围绕原始问题的核心意图。
28
- 3. **简洁性**:每个查询都应该是独立的、可以直接用于搜索的短语或问题。
29
- 4. **格式要求**:请直接输出 3 个查询,每个查询占一行,用换行符分隔。不要添加任何编号、前缀或解释。
27
+ 1. **保留核心意图**: 所有查询都必须围绕原始问题的核心意图。
28
+ 2. **查询类型**:
29
+ - **同义词/相关术语查询**: 使用原始语言,通过替换同义词或相关术语来生成1-2个新的查询。
30
+ - **英文翻译查询**: 将原始问题翻译成一个简洁的英文搜索查询。
31
+ 3. **简洁性**: 每个查询都应该是独立的、可以直接用于搜索的短语或问题。
32
+ 4. **严格格式要求**: 你必须将所有重写后的查询放置在 `<REWRITE>` 和 `</REWRITE>` 标签之间。每个查询占一行。不要在标签内外添加任何编号、前缀或解释。
33
+
34
+ 示例输出格式:
35
+ <REWRITE>
36
+ 使用不同表述的中文查询
37
+ 另一个中文查询
38
+ English version of the query
39
+ </REWRITE>
30
40
 
31
41
  原始问题:
32
42
  ---
33
43
  {query}
34
44
  ---
35
45
 
36
- 3个改写后的查询 (每行一个):
46
+ 请将改写后的查询包裹在 `<REWRITE>` 标签内:
37
47
  """
38
48
 
39
49
  def rewrite(self, query: str) -> List[str]:
@@ -47,16 +57,57 @@ class QueryRewriter:
47
57
  一个经过重写、搜索优化的查询列表。
48
58
  """
49
59
  prompt = self.rewrite_prompt_template.format(query=query)
50
- print(f"✍️ 正在将原始查询重写为多个搜索查询...")
60
+ PrettyOutput.print(
61
+ "正在将原始查询重写为多个搜索查询...",
62
+ output_type=OutputType.INFO,
63
+ timestamp=False,
64
+ )
65
+
66
+ import re
67
+
68
+ max_retries = 3
69
+ attempts = 0
70
+ rewritten_queries = []
71
+ response_text = ""
72
+
73
+ while attempts < max_retries:
74
+ attempts += 1
75
+ response_text = self.llm.generate(prompt)
76
+ match = re.search(r"<REWRITE>(.*?)</REWRITE>", response_text, re.DOTALL)
77
+
78
+ if match:
79
+ content = match.group(1).strip()
80
+ rewritten_queries = [
81
+ line.strip() for line in content.split("\n") if line.strip()
82
+ ]
83
+ PrettyOutput.print(
84
+ f"成功从LLM响应中提取到内容 (尝试 {attempts}/{max_retries})。",
85
+ output_type=OutputType.SUCCESS,
86
+ timestamp=False,
87
+ )
88
+ break # 提取成功,退出循环
89
+ else:
90
+ PrettyOutput.print(
91
+ f"未能从LLM响应中提取内容。正在重试... ({attempts}/{max_retries})",
92
+ output_type=OutputType.WARNING,
93
+ timestamp=False,
94
+ )
51
95
 
52
- response_text = self.llm.generate(prompt)
53
- rewritten_queries = [
54
- line.strip() for line in response_text.strip().split("\n") if line.strip()
55
- ]
96
+ # 如果所有重试都失败,则跳过重写步骤
97
+ if not rewritten_queries:
98
+ PrettyOutput.print(
99
+ "所有重试均失败。跳过查询重写,将仅使用原始查询。",
100
+ output_type=OutputType.ERROR,
101
+ timestamp=False,
102
+ )
56
103
 
57
104
  # 同时包含原始查询以保证鲁棒性
58
105
  if query not in rewritten_queries:
59
106
  rewritten_queries.insert(0, query)
60
107
 
61
- print(f"✅ 生成了 {len(rewritten_queries)} 个查询变体。")
108
+ PrettyOutput.print(
109
+ f"生成了 {len(rewritten_queries)} 个查询变体。",
110
+ output_type=OutputType.SUCCESS,
111
+ timestamp=False,
112
+ )
62
113
  return rewritten_queries
@@ -8,12 +8,14 @@ from .llm_interface import JarvisPlatform_LLM, LLMInterface, ToolAgent_LLM
8
8
  from .query_rewriter import QueryRewriter
9
9
  from .reranker import Reranker
10
10
  from .retriever import ChromaRetriever
11
+ from jarvis.jarvis_utils.output import OutputType, PrettyOutput
11
12
  from jarvis.jarvis_utils.config import (
12
13
  get_rag_embedding_model,
13
14
  get_rag_rerank_model,
14
15
  get_rag_vector_db_path,
15
16
  get_rag_embedding_cache_path,
16
17
  )
18
+ from jarvis.jarvis_utils.utils import get_yes_no
17
19
 
18
20
 
19
21
  class JarvisRAGPipeline:
@@ -30,6 +32,9 @@ class JarvisRAGPipeline:
30
32
  embedding_model: Optional[str] = None,
31
33
  db_path: Optional[str] = None,
32
34
  collection_name: str = "jarvis_rag_collection",
35
+ use_bm25: bool = True,
36
+ use_rerank: bool = True,
37
+ use_query_rewrite: bool = True,
33
38
  ):
34
39
  """
35
40
  初始化RAG管道。
@@ -40,6 +45,8 @@ class JarvisRAGPipeline:
40
45
  embedding_model: 嵌入模型的名称。如果为None,则使用配置值。
41
46
  db_path: 持久化向量数据库的路径。如果为None,则使用配置值。
42
47
  collection_name: 向量数据库中集合的名称。
48
+ use_bm25: 是否在检索中使用BM25。
49
+ use_rerank: 是否在检索后使用重排器。
43
50
  """
44
51
  # 确定嵌入模型以隔离数据路径
45
52
  model_name = embedding_model or get_rag_embedding_model()
@@ -56,22 +63,127 @@ class JarvisRAGPipeline:
56
63
  get_rag_embedding_cache_path(), sanitized_model_name
57
64
  )
58
65
 
59
- self.embedding_manager = EmbeddingManager(
60
- model_name=model_name,
61
- cache_dir=_final_cache_path,
66
+ # 存储初始化参数以供延迟加载
67
+ self.llm = llm if llm is not None else ToolAgent_LLM()
68
+ self.embedding_model_name = embedding_model or get_rag_embedding_model()
69
+ self.db_path = db_path
70
+ self.collection_name = collection_name
71
+ self.use_bm25 = use_bm25
72
+ self.use_rerank = use_rerank
73
+ # 查询重写开关(默认开启,可由CLI控制)
74
+ self.use_query_rewrite = use_query_rewrite
75
+
76
+ # 延迟加载的组件
77
+ self._embedding_manager: Optional[EmbeddingManager] = None
78
+ self._retriever: Optional[ChromaRetriever] = None
79
+ self._reranker: Optional[Reranker] = None
80
+ self._query_rewriter: Optional[QueryRewriter] = None
81
+
82
+ PrettyOutput.print(
83
+ "JarvisRAGPipeline 初始化成功 (模型按需加载).", OutputType.SUCCESS
84
+ )
85
+
86
+ def _get_embedding_manager(self) -> EmbeddingManager:
87
+ if self._embedding_manager is None:
88
+ sanitized_model_name = self.embedding_model_name.replace("/", "_").replace(
89
+ "\\", "_"
90
+ )
91
+ _final_cache_path = os.path.join(
92
+ get_rag_embedding_cache_path(), sanitized_model_name
93
+ )
94
+ self._embedding_manager = EmbeddingManager(
95
+ model_name=self.embedding_model_name,
96
+ cache_dir=_final_cache_path,
97
+ )
98
+ return self._embedding_manager
99
+
100
+ def _get_retriever(self) -> ChromaRetriever:
101
+ if self._retriever is None:
102
+ sanitized_model_name = self.embedding_model_name.replace("/", "_").replace(
103
+ "\\", "_"
104
+ )
105
+ _final_db_path = (
106
+ str(self.db_path)
107
+ if self.db_path
108
+ else os.path.join(get_rag_vector_db_path(), sanitized_model_name)
109
+ )
110
+ self._retriever = ChromaRetriever(
111
+ embedding_manager=self._get_embedding_manager(),
112
+ db_path=_final_db_path,
113
+ collection_name=self.collection_name,
114
+ )
115
+ return self._retriever
116
+
117
+ def _get_collection(self):
118
+ """
119
+ 在不加载嵌入模型的情况下,直接获取并返回Chroma集合对象。
120
+ 这对于仅需要访问集合元数据(如列出文档)而无需嵌入功能的操作非常有用。
121
+ """
122
+ # 为了避免初始化embedding_manager,我们直接构建db_path
123
+ if self._retriever:
124
+ return self._retriever.collection
125
+
126
+ sanitized_model_name = self.embedding_model_name.replace("/", "_").replace(
127
+ "\\", "_"
62
128
  )
63
- self.retriever = ChromaRetriever(
64
- embedding_manager=self.embedding_manager,
65
- db_path=_final_db_path,
66
- collection_name=collection_name,
129
+ _final_db_path = (
130
+ str(self.db_path)
131
+ if self.db_path
132
+ else os.path.join(get_rag_vector_db_path(), sanitized_model_name)
67
133
  )
68
- # 除非提供了特定的LLM,否则默认为ToolAgent_LLM
69
- self.llm = llm if llm is not None else ToolAgent_LLM()
70
- self.reranker = Reranker(model_name=get_rag_rerank_model())
71
- # 使用标准LLM执行查询重写任务,而不是代理
72
- self.query_rewriter = QueryRewriter(JarvisPlatform_LLM())
73
134
 
74
- print("✅ JarvisRAGPipeline 初始化成功。")
135
+ # 直接创建ChromaRetriever所使用的chroma_client,但绕过embedding_manager
136
+ import chromadb
137
+
138
+ chroma_client = chromadb.PersistentClient(path=_final_db_path)
139
+ return chroma_client.get_collection(name=self.collection_name)
140
+
141
+ def _get_reranker(self) -> Reranker:
142
+ if self._reranker is None:
143
+ self._reranker = Reranker(model_name=get_rag_rerank_model())
144
+ return self._reranker
145
+
146
+ def _get_query_rewriter(self) -> QueryRewriter:
147
+ if self._query_rewriter is None:
148
+ # 使用标准LLM执行查询重写任务,而不是代理
149
+ self._query_rewriter = QueryRewriter(JarvisPlatform_LLM())
150
+ return self._query_rewriter
151
+
152
+ def _pre_search_update_index_if_needed(self) -> None:
153
+ """
154
+ 在重写query之前执行:
155
+ - 检测索引变更(变更/删除)
156
+ - 询问用户是否立即更新索引
157
+ - 如确认,则执行增量更新并重建BM25
158
+ """
159
+ try:
160
+ retriever = self._get_retriever()
161
+ result = retriever.detect_index_changes()
162
+ changed = result.get("changed", [])
163
+ deleted = result.get("deleted", [])
164
+ if not changed and not deleted:
165
+ return
166
+ # 打印摘要
167
+ # 先拼接列表信息再统一打印,避免循环中逐条打印
168
+ lines = [
169
+ f"检测到索引可能不一致:变更 {len(changed)} 个,删除 {len(deleted)} 个。"
170
+ ]
171
+ if changed:
172
+ lines.extend([f" 变更: {p}" for p in changed[:3]])
173
+ if deleted:
174
+ lines.extend([f" 删除: {p}" for p in deleted[:3]])
175
+ PrettyOutput.print("\n".join(lines), OutputType.WARNING)
176
+ # 询问用户
177
+ if get_yes_no(
178
+ "检测到索引变更,是否现在更新索引后再开始检索?", default=True
179
+ ):
180
+ retriever.update_index_for_changes(changed, deleted)
181
+ else:
182
+ PrettyOutput.print(
183
+ "已跳过索引更新,将直接使用当前索引进行检索。", OutputType.INFO
184
+ )
185
+ except Exception as e:
186
+ PrettyOutput.print(f"检索前索引检查失败:{e}", OutputType.WARNING)
75
187
 
76
188
  def add_documents(self, documents: List[Document]):
77
189
  """
@@ -80,24 +192,21 @@ class JarvisRAGPipeline:
80
192
  参数:
81
193
  documents: 要添加的LangChain文档对象列表。
82
194
  """
83
- self.retriever.add_documents(documents)
195
+ self._get_retriever().add_documents(documents)
84
196
 
85
- def _create_prompt(
86
- self, query: str, context_docs: List[Document], source_files: List[str]
87
- ) -> str:
197
+ def _create_prompt(self, query: str, context_docs: List[Document]) -> str:
88
198
  """为LLM或代理创建最终的提示。"""
89
- context = "\n\n".join([doc.page_content for doc in context_docs])
90
- sources_text = "\n".join([f"- {source}" for source in source_files])
199
+ context_details = []
200
+ for doc in context_docs:
201
+ source = doc.metadata.get("source", "未知来源")
202
+ content = doc.page_content
203
+ context_details.append(f"来源: {source}\n\n---\n{content}\n---")
204
+ context = "\n\n".join(context_details)
91
205
 
92
206
  prompt_template = f"""
93
207
  你是一个专家助手。请根据用户的问题,结合下面提供的参考信息来回答。
94
208
 
95
- **重要**: 提供的上下文和文件列表**仅供参考**,可能不完整或已过时。在回答前,你应该**优先使用工具(如 read_code)来获取最新、最准确的信息**。
96
-
97
- 参考文件列表:
98
- ---
99
- {sources_text}
100
- ---
209
+ **重要**: 提供的上下文**仅供参考**,可能不完整或已过时。在回答前,你应该**优先使用工具(如 read_code)来获取最新、最准确的信息**。
101
210
 
102
211
  参考上下文:
103
212
  ---
@@ -121,14 +230,28 @@ class JarvisRAGPipeline:
121
230
  返回:
122
231
  由LLM生成的答案。
123
232
  """
124
- # 1. 将原始查询重写为多个查询
125
- rewritten_queries = self.query_rewriter.rewrite(query_text)
233
+ # 0. 检测索引变更并可选更新(在重写query之前)
234
+ self._pre_search_update_index_if_needed()
235
+ # 1. 将原始查询重写为多个查询(可配置)
236
+ if self.use_query_rewrite:
237
+ rewritten_queries = self._get_query_rewriter().rewrite(query_text)
238
+ else:
239
+ PrettyOutput.print(
240
+ "已关闭查询重写,将直接使用原始查询进行检索。",
241
+ OutputType.INFO,
242
+ )
243
+ rewritten_queries = [query_text]
126
244
 
127
245
  # 2. 为每个重写的查询检索初始候选文档
246
+ PrettyOutput.print(
247
+ "将为以下查询变体进行混合检索:\n" + "\n".join([f" - {q}" for q in rewritten_queries]),
248
+ OutputType.INFO,
249
+ )
128
250
  all_candidate_docs = []
129
251
  for q in rewritten_queries:
130
- print(f"🔍 正在为查询变体 '{q}' 进行混合检索...")
131
- candidates = self.retriever.retrieve(q, n_results=n_results * 2)
252
+ candidates = self._get_retriever().retrieve(
253
+ q, n_results=n_results * 2, use_bm25=self.use_bm25
254
+ )
132
255
  all_candidate_docs.extend(candidates)
133
256
 
134
257
  # 对候选文档进行去重
@@ -139,12 +262,16 @@ class JarvisRAGPipeline:
139
262
  return "我在提供的文档中找不到任何相关信息来回答您的问题。"
140
263
 
141
264
  # 3. 根据*原始*查询对统一的候选池进行重排
142
- print(
143
- f"🔍 正在对 {len(unique_candidate_docs)} 个候选文档进行重排(基于原始问题)..."
144
- )
145
- retrieved_docs = self.reranker.rerank(
146
- query_text, unique_candidate_docs, top_n=n_results
147
- )
265
+ if self.use_rerank:
266
+ PrettyOutput.print(
267
+ f"正在对 {len(unique_candidate_docs)} 个候选文档进行重排(基于原始问题)...",
268
+ OutputType.INFO,
269
+ )
270
+ retrieved_docs = self._get_reranker().rerank(
271
+ query_text, unique_candidate_docs, top_n=n_results
272
+ )
273
+ else:
274
+ retrieved_docs = unique_candidate_docs[:n_results]
148
275
 
149
276
  if not retrieved_docs:
150
277
  return "我在提供的文档中找不到任何相关信息来回答您的问题。"
@@ -160,15 +287,70 @@ class JarvisRAGPipeline:
160
287
  )
161
288
  )
162
289
  if sources:
163
- print(f"📚 根据以下文档回答:")
164
- for source in sources:
165
- print(f" - {source}")
290
+ # 合并来源列表后一次性打印,避免多次加框
291
+ lines = ["根据以下文档回答:"] + [f" - {source}" for source in sources]
292
+ PrettyOutput.print("\n".join(lines), OutputType.INFO)
166
293
 
167
294
  # 4. 创建最终提示并生成答案
168
295
  # 我们使用原始的query_text作为给LLM的最终提示
169
- prompt = self._create_prompt(query_text, retrieved_docs, sources)
296
+ prompt = self._create_prompt(query_text, retrieved_docs)
170
297
 
171
- print("🤖 正在从LLM生成答案...")
298
+ PrettyOutput.print("正在从LLM生成答案...", OutputType.INFO)
172
299
  answer = self.llm.generate(prompt)
173
300
 
174
301
  return answer
302
+
303
+ def retrieve_only(self, query_text: str, n_results: int = 5) -> List[Document]:
304
+ """
305
+ 仅执行检索和重排,不生成答案。
306
+
307
+ 参数:
308
+ query_text: 用户的原始问题。
309
+ n_results: 要检索的最终相关块的数量。
310
+
311
+ 返回:
312
+ 检索到的文档列表。
313
+ """
314
+ # 0. 检测索引变更并可选更新(在重写query之前)
315
+ self._pre_search_update_index_if_needed()
316
+ # 1. 重写查询(可配置)
317
+ if self.use_query_rewrite:
318
+ rewritten_queries = self._get_query_rewriter().rewrite(query_text)
319
+ else:
320
+ PrettyOutput.print(
321
+ "已关闭查询重写,将直接使用原始查询进行检索。",
322
+ OutputType.INFO,
323
+ )
324
+ rewritten_queries = [query_text]
325
+
326
+ # 2. 检索候选文档
327
+ PrettyOutput.print(
328
+ "将为以下查询变体进行混合检索:\n" + "\n".join([f" - {q}" for q in rewritten_queries]),
329
+ OutputType.INFO,
330
+ )
331
+ all_candidate_docs = []
332
+ for q in rewritten_queries:
333
+ candidates = self._get_retriever().retrieve(
334
+ q, n_results=n_results * 2, use_bm25=self.use_bm25
335
+ )
336
+ all_candidate_docs.extend(candidates)
337
+
338
+ unique_docs_dict = {doc.page_content: doc for doc in all_candidate_docs}
339
+ unique_candidate_docs = list(unique_docs_dict.values())
340
+
341
+ if not unique_candidate_docs:
342
+ return []
343
+
344
+ # 3. 重排
345
+ if self.use_rerank:
346
+ PrettyOutput.print(
347
+ f"正在对 {len(unique_candidate_docs)} 个候选文档进行重排...",
348
+ OutputType.INFO,
349
+ )
350
+ retrieved_docs = self._get_reranker().rerank(
351
+ query_text, unique_candidate_docs, top_n=n_results
352
+ )
353
+ else:
354
+ retrieved_docs = unique_candidate_docs[:n_results]
355
+
356
+ return retrieved_docs
@@ -1,9 +1,12 @@
1
1
  from typing import List
2
+ import os
2
3
 
3
4
  from langchain.docstore.document import Document
4
5
  from sentence_transformers.cross_encoder import ( # type: ignore
5
6
  CrossEncoder,
6
7
  )
8
+ from huggingface_hub import snapshot_download
9
+ from jarvis.jarvis_utils.output import OutputType, PrettyOutput
7
10
 
8
11
 
9
12
  class Reranker:
@@ -19,9 +22,29 @@ class Reranker:
19
22
  参数:
20
23
  model_name (str): 要使用的Cross-Encoder模型的名称。
21
24
  """
22
- print(f"🔍 正在初始化重排模型: {model_name}...")
23
- self.model = CrossEncoder(model_name)
24
- print("✅ 重排模型初始化成功。")
25
+ PrettyOutput.print(f"正在初始化重排模型: {model_name}...", OutputType.INFO)
26
+ try:
27
+ local_dir = None
28
+
29
+ if os.path.isdir(model_name):
30
+ self.model = CrossEncoder(model_name)
31
+ PrettyOutput.print("重排模型初始化成功。", OutputType.SUCCESS)
32
+ return
33
+ try:
34
+ # Prefer local cache; avoid any network access
35
+ local_dir = snapshot_download(repo_id=model_name, local_files_only=True)
36
+ except Exception:
37
+ local_dir = None
38
+
39
+ if local_dir:
40
+ self.model = CrossEncoder(local_dir)
41
+ else:
42
+ self.model = CrossEncoder(model_name)
43
+
44
+ PrettyOutput.print("重排模型初始化成功。", OutputType.SUCCESS)
45
+ except Exception as e:
46
+ PrettyOutput.print(f"初始化重排模型失败: {e}", OutputType.ERROR)
47
+ raise
25
48
 
26
49
  def rerank(
27
50
  self, query: str, documents: List[Document], top_n: int = 5