jarvis-ai-assistant 0.7.0__py3-none-any.whl → 0.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +243 -139
- jarvis/jarvis_agent/agent_manager.py +5 -10
- jarvis/jarvis_agent/builtin_input_handler.py +2 -6
- jarvis/jarvis_agent/config_editor.py +2 -7
- jarvis/jarvis_agent/event_bus.py +82 -12
- jarvis/jarvis_agent/file_context_handler.py +265 -15
- jarvis/jarvis_agent/file_methodology_manager.py +3 -4
- jarvis/jarvis_agent/jarvis.py +113 -98
- jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
- jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
- jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
- jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
- jarvis/jarvis_agent/language_support_info.py +486 -0
- jarvis/jarvis_agent/main.py +6 -12
- jarvis/jarvis_agent/memory_manager.py +7 -16
- jarvis/jarvis_agent/methodology_share_manager.py +10 -16
- jarvis/jarvis_agent/prompt_manager.py +1 -1
- jarvis/jarvis_agent/prompts.py +193 -171
- jarvis/jarvis_agent/protocols.py +8 -12
- jarvis/jarvis_agent/run_loop.py +77 -14
- jarvis/jarvis_agent/session_manager.py +2 -3
- jarvis/jarvis_agent/share_manager.py +12 -21
- jarvis/jarvis_agent/shell_input_handler.py +1 -2
- jarvis/jarvis_agent/task_analyzer.py +26 -4
- jarvis/jarvis_agent/task_manager.py +11 -27
- jarvis/jarvis_agent/tool_executor.py +2 -3
- jarvis/jarvis_agent/tool_share_manager.py +12 -24
- jarvis/jarvis_agent/web_server.py +55 -20
- jarvis/jarvis_c2rust/__init__.py +5 -5
- jarvis/jarvis_c2rust/cli.py +461 -499
- jarvis/jarvis_c2rust/collector.py +45 -53
- jarvis/jarvis_c2rust/constants.py +26 -0
- jarvis/jarvis_c2rust/library_replacer.py +264 -132
- jarvis/jarvis_c2rust/llm_module_agent.py +162 -190
- jarvis/jarvis_c2rust/loaders.py +207 -0
- jarvis/jarvis_c2rust/models.py +28 -0
- jarvis/jarvis_c2rust/optimizer.py +1592 -395
- jarvis/jarvis_c2rust/transpiler.py +1722 -1064
- jarvis/jarvis_c2rust/utils.py +385 -0
- jarvis/jarvis_code_agent/build_validation_config.py +2 -3
- jarvis/jarvis_code_agent/code_agent.py +394 -320
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +3 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +4 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +17 -2
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +3 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +36 -4
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +9 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +9 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +12 -1
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +22 -5
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +57 -32
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +62 -6
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +8 -9
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +290 -5
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +21 -3
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +72 -4
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +35 -3
- jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +52 -2
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +73 -1
- jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +306 -152
- jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +193 -18
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +18 -8
- jarvis/jarvis_code_agent/lint.py +258 -27
- jarvis/jarvis_code_agent/utils.py +0 -1
- jarvis/jarvis_code_analysis/code_review.py +19 -24
- jarvis/jarvis_data/config_schema.json +53 -26
- jarvis/jarvis_git_squash/main.py +4 -5
- jarvis/jarvis_git_utils/git_commiter.py +44 -49
- jarvis/jarvis_mcp/sse_mcp_client.py +20 -27
- jarvis/jarvis_mcp/stdio_mcp_client.py +11 -12
- jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
- jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
- jarvis/jarvis_methodology/main.py +32 -48
- jarvis/jarvis_multi_agent/__init__.py +79 -61
- jarvis/jarvis_multi_agent/main.py +3 -7
- jarvis/jarvis_platform/base.py +469 -199
- jarvis/jarvis_platform/human.py +7 -8
- jarvis/jarvis_platform/kimi.py +30 -36
- jarvis/jarvis_platform/openai.py +65 -27
- jarvis/jarvis_platform/registry.py +26 -10
- jarvis/jarvis_platform/tongyi.py +24 -25
- jarvis/jarvis_platform/yuanbao.py +31 -42
- jarvis/jarvis_platform_manager/main.py +66 -77
- jarvis/jarvis_platform_manager/service.py +8 -13
- jarvis/jarvis_rag/cli.py +49 -51
- jarvis/jarvis_rag/embedding_manager.py +13 -18
- jarvis/jarvis_rag/llm_interface.py +8 -9
- jarvis/jarvis_rag/query_rewriter.py +10 -21
- jarvis/jarvis_rag/rag_pipeline.py +24 -27
- jarvis/jarvis_rag/reranker.py +4 -5
- jarvis/jarvis_rag/retriever.py +28 -30
- jarvis/jarvis_sec/__init__.py +220 -3520
- jarvis/jarvis_sec/agents.py +143 -0
- jarvis/jarvis_sec/analysis.py +276 -0
- jarvis/jarvis_sec/cli.py +29 -6
- jarvis/jarvis_sec/clustering.py +1439 -0
- jarvis/jarvis_sec/file_manager.py +427 -0
- jarvis/jarvis_sec/parsers.py +73 -0
- jarvis/jarvis_sec/prompts.py +268 -0
- jarvis/jarvis_sec/report.py +83 -4
- jarvis/jarvis_sec/review.py +453 -0
- jarvis/jarvis_sec/utils.py +499 -0
- jarvis/jarvis_sec/verification.py +848 -0
- jarvis/jarvis_sec/workflow.py +7 -0
- jarvis/jarvis_smart_shell/main.py +38 -87
- jarvis/jarvis_stats/cli.py +1 -1
- jarvis/jarvis_stats/stats.py +7 -7
- jarvis/jarvis_stats/storage.py +15 -21
- jarvis/jarvis_tools/clear_memory.py +3 -20
- jarvis/jarvis_tools/cli/main.py +20 -23
- jarvis/jarvis_tools/edit_file.py +1066 -0
- jarvis/jarvis_tools/execute_script.py +42 -21
- jarvis/jarvis_tools/file_analyzer.py +6 -9
- jarvis/jarvis_tools/generate_new_tool.py +11 -20
- jarvis/jarvis_tools/lsp_client.py +1552 -0
- jarvis/jarvis_tools/methodology.py +2 -3
- jarvis/jarvis_tools/read_code.py +1525 -87
- jarvis/jarvis_tools/read_symbols.py +2 -3
- jarvis/jarvis_tools/read_webpage.py +7 -10
- jarvis/jarvis_tools/registry.py +370 -181
- jarvis/jarvis_tools/retrieve_memory.py +20 -19
- jarvis/jarvis_tools/rewrite_file.py +105 -0
- jarvis/jarvis_tools/save_memory.py +3 -15
- jarvis/jarvis_tools/search_web.py +3 -7
- jarvis/jarvis_tools/sub_agent.py +17 -6
- jarvis/jarvis_tools/sub_code_agent.py +14 -16
- jarvis/jarvis_tools/virtual_tty.py +54 -32
- jarvis/jarvis_utils/clipboard.py +7 -10
- jarvis/jarvis_utils/config.py +98 -63
- jarvis/jarvis_utils/embedding.py +5 -5
- jarvis/jarvis_utils/fzf.py +8 -8
- jarvis/jarvis_utils/git_utils.py +81 -67
- jarvis/jarvis_utils/input.py +24 -49
- jarvis/jarvis_utils/jsonnet_compat.py +465 -0
- jarvis/jarvis_utils/methodology.py +33 -35
- jarvis/jarvis_utils/utils.py +245 -202
- {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/METADATA +205 -70
- jarvis_ai_assistant-0.7.8.dist-info/RECORD +218 -0
- jarvis/jarvis_agent/edit_file_handler.py +0 -584
- jarvis/jarvis_agent/rewrite_file_handler.py +0 -141
- jarvis/jarvis_agent/task_planner.py +0 -496
- jarvis/jarvis_platform/ai8.py +0 -332
- jarvis/jarvis_tools/ask_user.py +0 -54
- jarvis_ai_assistant-0.7.0.dist-info/RECORD +0 -192
- {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.7.0.dist-info → jarvis_ai_assistant-0.7.8.dist-info}/top_level.txt +0 -0
jarvis/jarvis_rag/cli.py
CHANGED
|
@@ -20,7 +20,6 @@ from jarvis.jarvis_utils.config import (
|
|
|
20
20
|
get_rag_use_bm25,
|
|
21
21
|
get_rag_use_rerank,
|
|
22
22
|
)
|
|
23
|
-
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
def is_likely_text_file(file_path: Path) -> bool:
|
|
@@ -71,9 +70,8 @@ class _CustomPlatformLLM(LLMInterface):
|
|
|
71
70
|
|
|
72
71
|
def __init__(self, platform: BasePlatform):
|
|
73
72
|
self.platform = platform
|
|
74
|
-
|
|
75
|
-
f"使用自定义LLM: 平台='{platform.platform_name()}', 模型='{platform.name()}'"
|
|
76
|
-
OutputType.INFO,
|
|
73
|
+
print(
|
|
74
|
+
f"ℹ️ 使用自定义LLM: 平台='{platform.platform_name()}', 模型='{platform.name()}'"
|
|
77
75
|
)
|
|
78
76
|
|
|
79
77
|
def generate(self, prompt: str, **kwargs) -> str:
|
|
@@ -88,15 +86,15 @@ def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInter
|
|
|
88
86
|
registry = PlatformRegistry.get_global_platform_registry()
|
|
89
87
|
platform_instance = registry.create_platform(platform_name)
|
|
90
88
|
if not platform_instance:
|
|
91
|
-
|
|
92
|
-
f"错误: 平台 '{platform_name}' 未找到。"
|
|
89
|
+
print(
|
|
90
|
+
f"❌ 错误: 平台 '{platform_name}' 未找到。"
|
|
93
91
|
)
|
|
94
92
|
return None
|
|
95
93
|
platform_instance.set_model_name(model_name)
|
|
96
94
|
platform_instance.set_suppress_output(True)
|
|
97
95
|
return _CustomPlatformLLM(platform_instance)
|
|
98
96
|
except Exception as e:
|
|
99
|
-
|
|
97
|
+
print(f"❌ 创建自定义LLM时出错: {e}")
|
|
100
98
|
return None
|
|
101
99
|
|
|
102
100
|
|
|
@@ -120,13 +118,13 @@ def _load_ragignore_spec() -> Tuple[Optional[pathspec.PathSpec], Optional[Path]]
|
|
|
120
118
|
with open(ignore_file_to_use, "r", encoding="utf-8") as f:
|
|
121
119
|
patterns = f.read().splitlines()
|
|
122
120
|
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
123
|
-
|
|
124
|
-
f"加载忽略规则: {ignore_file_to_use}"
|
|
121
|
+
print(
|
|
122
|
+
f"✅ 加载忽略规则: {ignore_file_to_use}"
|
|
125
123
|
)
|
|
126
124
|
return spec, project_root_path
|
|
127
125
|
except Exception as e:
|
|
128
|
-
|
|
129
|
-
f"加载 {ignore_file_to_use.name} 文件失败: {e}"
|
|
126
|
+
print(
|
|
127
|
+
f"⚠️ 加载 {ignore_file_to_use.name} 文件失败: {e}"
|
|
130
128
|
)
|
|
131
129
|
|
|
132
130
|
return None, None
|
|
@@ -178,7 +176,7 @@ def add_documents(
|
|
|
178
176
|
continue
|
|
179
177
|
|
|
180
178
|
if path.is_dir():
|
|
181
|
-
|
|
179
|
+
print(f"ℹ️ 正在扫描目录: {path}")
|
|
182
180
|
for item in path.rglob("*"):
|
|
183
181
|
if item.is_file() and is_likely_text_file(item):
|
|
184
182
|
files_to_process.add(item)
|
|
@@ -186,12 +184,12 @@ def add_documents(
|
|
|
186
184
|
if is_likely_text_file(path):
|
|
187
185
|
files_to_process.add(path)
|
|
188
186
|
else:
|
|
189
|
-
|
|
190
|
-
f"跳过可能的二进制文件: {path}"
|
|
187
|
+
print(
|
|
188
|
+
f"⚠️ 跳过可能的二进制文件: {path}"
|
|
191
189
|
)
|
|
192
190
|
|
|
193
191
|
if not files_to_process:
|
|
194
|
-
|
|
192
|
+
print("⚠️ 在指定路径中未找到任何文本文件。")
|
|
195
193
|
return
|
|
196
194
|
|
|
197
195
|
# 使用 .ragignore 过滤文件
|
|
@@ -212,19 +210,19 @@ def add_documents(
|
|
|
212
210
|
|
|
213
211
|
ignored_count = initial_count - len(retained_files)
|
|
214
212
|
if ignored_count > 0:
|
|
215
|
-
|
|
216
|
-
f"根据 .ragignore 规则过滤掉 {ignored_count} 个文件。"
|
|
213
|
+
print(
|
|
214
|
+
f"ℹ️ 根据 .ragignore 规则过滤掉 {ignored_count} 个文件。"
|
|
217
215
|
)
|
|
218
216
|
files_to_process = retained_files
|
|
219
217
|
|
|
220
218
|
if not files_to_process:
|
|
221
|
-
|
|
222
|
-
"所有找到的文本文件都被忽略规则过滤掉了。"
|
|
219
|
+
print(
|
|
220
|
+
"⚠️ 所有找到的文本文件都被忽略规则过滤掉了。"
|
|
223
221
|
)
|
|
224
222
|
return
|
|
225
223
|
|
|
226
|
-
|
|
227
|
-
f"发现 {len(files_to_process)} 个独立文件待处理。"
|
|
224
|
+
print(
|
|
225
|
+
f"ℹ️ 发现 {len(files_to_process)} 个独立文件待处理。"
|
|
228
226
|
)
|
|
229
227
|
|
|
230
228
|
try:
|
|
@@ -252,38 +250,38 @@ def add_documents(
|
|
|
252
250
|
docs_batch.extend(loader.load())
|
|
253
251
|
loaded_msgs.append(f"已加载: {file_path} (文件 {i + 1}/{total_files})")
|
|
254
252
|
except Exception as e:
|
|
255
|
-
|
|
253
|
+
print(f"⚠️ 加载失败 {file_path}: {e}")
|
|
256
254
|
|
|
257
255
|
# 当批处理已满或是最后一个文件时处理批处理
|
|
258
256
|
if docs_batch and (len(docs_batch) >= batch_size or (i + 1) == total_files):
|
|
259
257
|
if loaded_msgs:
|
|
260
|
-
|
|
258
|
+
joined_msgs = '\n'.join(loaded_msgs)
|
|
259
|
+
print(f"ℹ️ {joined_msgs}")
|
|
261
260
|
loaded_msgs = []
|
|
262
|
-
|
|
263
|
-
f"正在处理批次,包含 {len(docs_batch)} 个文档..."
|
|
261
|
+
print(
|
|
262
|
+
f"ℹ️ 正在处理批次,包含 {len(docs_batch)} 个文档..."
|
|
264
263
|
)
|
|
265
264
|
pipeline.add_documents(docs_batch)
|
|
266
265
|
total_docs_added += len(docs_batch)
|
|
267
|
-
|
|
268
|
-
f"成功添加 {len(docs_batch)} 个文档。"
|
|
266
|
+
print(
|
|
267
|
+
f"✅ 成功添加 {len(docs_batch)} 个文档。"
|
|
269
268
|
)
|
|
270
269
|
docs_batch = [] # 清空批处理
|
|
271
270
|
|
|
272
|
-
#
|
|
271
|
+
# 最后统一打印可能残留的"已加载"信息
|
|
273
272
|
if loaded_msgs:
|
|
274
|
-
|
|
273
|
+
print(f"ℹ️ {chr(10).join(loaded_msgs)}")
|
|
275
274
|
loaded_msgs = []
|
|
276
275
|
if total_docs_added == 0:
|
|
277
|
-
|
|
276
|
+
print("❌ 未能成功加载任何文档。")
|
|
278
277
|
raise typer.Exit(code=1)
|
|
279
278
|
|
|
280
|
-
|
|
281
|
-
f"成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。"
|
|
282
|
-
OutputType.SUCCESS,
|
|
279
|
+
print(
|
|
280
|
+
f"✅ 成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。"
|
|
283
281
|
)
|
|
284
282
|
|
|
285
283
|
except Exception as e:
|
|
286
|
-
|
|
284
|
+
print(f"❌ 发生严重错误: {e}")
|
|
287
285
|
raise typer.Exit(code=1)
|
|
288
286
|
|
|
289
287
|
|
|
@@ -310,7 +308,7 @@ def list_documents(
|
|
|
310
308
|
results = collection.get() # 获取集合中的所有项目
|
|
311
309
|
|
|
312
310
|
if not results or not results["metadatas"]:
|
|
313
|
-
|
|
311
|
+
print("ℹ️ 知识库中没有找到任何文档。")
|
|
314
312
|
return
|
|
315
313
|
|
|
316
314
|
# 从元数据中提取唯一的源文件路径
|
|
@@ -322,8 +320,8 @@ def list_documents(
|
|
|
322
320
|
sources.add(source)
|
|
323
321
|
|
|
324
322
|
if not sources:
|
|
325
|
-
|
|
326
|
-
"知识库中没有找到任何带有源信息的文档。"
|
|
323
|
+
print(
|
|
324
|
+
"ℹ️ 知识库中没有找到任何带有源信息的文档。"
|
|
327
325
|
)
|
|
328
326
|
return
|
|
329
327
|
|
|
@@ -331,10 +329,11 @@ def list_documents(
|
|
|
331
329
|
lines = [f"知识库 '{collection_name}' 中共有 {len(sources)} 个独立文档:"]
|
|
332
330
|
for i, source in enumerate(sorted(list(sources)), 1):
|
|
333
331
|
lines.append(f" {i}. {source}")
|
|
334
|
-
|
|
332
|
+
joined_lines = '\n'.join(lines)
|
|
333
|
+
print(f"ℹ️ {joined_lines}")
|
|
335
334
|
|
|
336
335
|
except Exception as e:
|
|
337
|
-
|
|
336
|
+
print(f"❌ 发生错误: {e}")
|
|
338
337
|
raise typer.Exit(code=1)
|
|
339
338
|
|
|
340
339
|
|
|
@@ -380,15 +379,15 @@ def retrieve(
|
|
|
380
379
|
use_query_rewrite=rewrite,
|
|
381
380
|
)
|
|
382
381
|
|
|
383
|
-
|
|
382
|
+
print(f"ℹ️ 正在为问题检索文档: '{question}'")
|
|
384
383
|
retrieved_docs = pipeline.retrieve_only(question, n_results=n_results)
|
|
385
384
|
|
|
386
385
|
if not retrieved_docs:
|
|
387
|
-
|
|
386
|
+
print("ℹ️ 未找到相关文档。")
|
|
388
387
|
return
|
|
389
388
|
|
|
390
|
-
|
|
391
|
-
f"成功检索到 {len(retrieved_docs)} 个文档:"
|
|
389
|
+
print(
|
|
390
|
+
f"✅ 成功检索到 {len(retrieved_docs)} 个文档:"
|
|
392
391
|
)
|
|
393
392
|
from jarvis.jarvis_utils.globals import console
|
|
394
393
|
|
|
@@ -400,7 +399,7 @@ def retrieve(
|
|
|
400
399
|
console.print(Markdown(f"```\n{content}\n```"))
|
|
401
400
|
|
|
402
401
|
except Exception as e:
|
|
403
|
-
|
|
402
|
+
print(f"❌ 发生错误: {e}")
|
|
404
403
|
raise typer.Exit(code=1)
|
|
405
404
|
|
|
406
405
|
|
|
@@ -437,7 +436,7 @@ def query(
|
|
|
437
436
|
):
|
|
438
437
|
"""查询RAG知识库并打印答案。"""
|
|
439
438
|
if model and not platform:
|
|
440
|
-
|
|
439
|
+
print("❌ 错误: --model 需要指定 --platform。")
|
|
441
440
|
raise typer.Exit(code=1)
|
|
442
441
|
|
|
443
442
|
try:
|
|
@@ -459,13 +458,13 @@ def query(
|
|
|
459
458
|
use_rerank=use_rerank,
|
|
460
459
|
)
|
|
461
460
|
|
|
462
|
-
|
|
461
|
+
print(f"ℹ️ 正在查询: '{question}'")
|
|
463
462
|
answer = pipeline.query(question)
|
|
464
463
|
|
|
465
|
-
|
|
464
|
+
print(f"✅ {answer}")
|
|
466
465
|
|
|
467
466
|
except Exception as e:
|
|
468
|
-
|
|
467
|
+
print(f"❌ 发生错误: {e}")
|
|
469
468
|
raise typer.Exit(code=1)
|
|
470
469
|
|
|
471
470
|
|
|
@@ -476,9 +475,8 @@ def _check_rag_dependencies():
|
|
|
476
475
|
if not is_rag_installed():
|
|
477
476
|
missing = get_missing_rag_modules()
|
|
478
477
|
missing_str = f"缺少依赖: {', '.join(missing)}。" if missing else ""
|
|
479
|
-
|
|
480
|
-
f"RAG依赖项未安装或不完整。{missing_str}请运行 'pip install \"jarvis-ai-assistant[rag]\"' 后重试。"
|
|
481
|
-
OutputType.ERROR,
|
|
478
|
+
print(
|
|
479
|
+
f"❌ RAG依赖项未安装或不完整。{missing_str}请运行 'pip install \"jarvis-ai-assistant[rag]\"' 后重试。"
|
|
482
480
|
)
|
|
483
481
|
raise typer.Exit(code=1)
|
|
484
482
|
|
|
@@ -6,7 +6,6 @@ from huggingface_hub import snapshot_download
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
from .cache import EmbeddingCache
|
|
9
|
-
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class EmbeddingManager:
|
|
@@ -27,8 +26,8 @@ class EmbeddingManager:
|
|
|
27
26
|
"""
|
|
28
27
|
self.model_name = model_name
|
|
29
28
|
|
|
30
|
-
|
|
31
|
-
f"初始化嵌入管理器, 模型: '{self.model_name}'..."
|
|
29
|
+
print(
|
|
30
|
+
f"ℹ️ 初始化嵌入管理器, 模型: '{self.model_name}'..."
|
|
32
31
|
)
|
|
33
32
|
|
|
34
33
|
# 缓存的salt是模型名称,以防止冲突
|
|
@@ -160,11 +159,10 @@ class EmbeddingManager:
|
|
|
160
159
|
pass
|
|
161
160
|
|
|
162
161
|
if had_local_candidate:
|
|
163
|
-
|
|
164
|
-
"检测到本地模型路径但加载失败。为避免触发网络访问,已中止远程回退。\n"
|
|
162
|
+
print(
|
|
163
|
+
"❌ 检测到本地模型路径但加载失败。为避免触发网络访问,已中止远程回退。\n"
|
|
165
164
|
"请确认本地目录包含完整的 Transformers/Tokenizer 文件(如 config.json、model.safetensors、tokenizer.json/merges.txt 等),\n"
|
|
166
|
-
"或在配置中将 embedding_model 设置为该本地目录,或将模型放置到默认的 Hugging Face 缓存目录(例如 ~/.cache/huggingface/hub)。"
|
|
167
|
-
OutputType.ERROR,
|
|
165
|
+
"或在配置中将 embedding_model 设置为该本地目录,或将模型放置到默认的 Hugging Face 缓存目录(例如 ~/.cache/huggingface/hub)。"
|
|
168
166
|
)
|
|
169
167
|
raise
|
|
170
168
|
|
|
@@ -176,12 +174,11 @@ class EmbeddingManager:
|
|
|
176
174
|
show_progress=True,
|
|
177
175
|
)
|
|
178
176
|
except Exception as e:
|
|
179
|
-
|
|
180
|
-
f"加载嵌入模型 '{self.model_name}' 时出错: {e}"
|
|
177
|
+
print(
|
|
178
|
+
f"❌ 加载嵌入模型 '{self.model_name}' 时出错: {e}"
|
|
181
179
|
)
|
|
182
|
-
|
|
183
|
-
"请确保您已安装 'sentence_transformers' 和 'torch'。"
|
|
184
|
-
OutputType.WARNING,
|
|
180
|
+
print(
|
|
181
|
+
"⚠️ 请确保您已安装 'sentence_transformers' 和 'torch'。"
|
|
185
182
|
)
|
|
186
183
|
raise
|
|
187
184
|
|
|
@@ -210,9 +207,8 @@ class EmbeddingManager:
|
|
|
210
207
|
|
|
211
208
|
# 为不在缓存中的文本计算嵌入
|
|
212
209
|
if texts_to_embed:
|
|
213
|
-
|
|
214
|
-
f"缓存未命中。正在为 {len(texts_to_embed)}/{len(texts)} 个文档计算嵌入。"
|
|
215
|
-
OutputType.INFO,
|
|
210
|
+
print(
|
|
211
|
+
f"ℹ️ 缓存未命中。正在为 {len(texts_to_embed)}/{len(texts)} 个文档计算嵌入。"
|
|
216
212
|
)
|
|
217
213
|
new_embeddings = self.model.embed_documents(texts_to_embed)
|
|
218
214
|
|
|
@@ -223,9 +219,8 @@ class EmbeddingManager:
|
|
|
223
219
|
for i, embedding in zip(indices_to_embed, new_embeddings):
|
|
224
220
|
cached_embeddings[i] = embedding
|
|
225
221
|
else:
|
|
226
|
-
|
|
227
|
-
f"缓存命中。所有 {len(texts)} 个文档的嵌入均从缓存中检索。"
|
|
228
|
-
OutputType.SUCCESS,
|
|
222
|
+
print(
|
|
223
|
+
f"✅ 缓存命中。所有 {len(texts)} 个文档的嵌入均从缓存中检索。"
|
|
229
224
|
)
|
|
230
225
|
|
|
231
226
|
return cast(List[List[float]], cached_embeddings)
|
|
@@ -3,7 +3,6 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from jarvis.jarvis_agent import Agent as JarvisAgent
|
|
4
4
|
from jarvis.jarvis_platform.base import BasePlatform
|
|
5
5
|
from jarvis.jarvis_platform.registry import PlatformRegistry
|
|
6
|
-
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class LLMInterface(ABC):
|
|
@@ -39,7 +38,7 @@ class ToolAgent_LLM(LLMInterface):
|
|
|
39
38
|
"""
|
|
40
39
|
初始化工具-代理 LLM 包装器。
|
|
41
40
|
"""
|
|
42
|
-
|
|
41
|
+
print("ℹ️ 已初始化工具 Agent 作为最终应答者。")
|
|
43
42
|
self.allowed_tools = ["read_code", "execute_script"]
|
|
44
43
|
# 为代理提供一个通用的系统提示
|
|
45
44
|
self.system_prompt = "You are a helpful assistant. Please answer the user's question based on the provided context. You can use tools to find more information if needed."
|
|
@@ -79,7 +78,7 @@ class ToolAgent_LLM(LLMInterface):
|
|
|
79
78
|
return str(final_answer)
|
|
80
79
|
|
|
81
80
|
except Exception as e:
|
|
82
|
-
|
|
81
|
+
print(f"❌ Agent 在执行过程中发生错误: {e}")
|
|
83
82
|
return "错误: Agent 未能成功生成回答。"
|
|
84
83
|
|
|
85
84
|
|
|
@@ -93,17 +92,17 @@ class JarvisPlatform_LLM(LLMInterface):
|
|
|
93
92
|
def __init__(self):
|
|
94
93
|
"""
|
|
95
94
|
初始化Jarvis平台LLM客户端。
|
|
95
|
+
使用cheap平台,适用于查询重写、相关性评估等对模型能力要求一般的任务。
|
|
96
96
|
"""
|
|
97
97
|
try:
|
|
98
98
|
self.registry = PlatformRegistry.get_global_platform_registry()
|
|
99
|
-
self.platform: BasePlatform = self.registry.
|
|
99
|
+
self.platform: BasePlatform = self.registry.get_cheap_platform()
|
|
100
100
|
self.platform.set_suppress_output(False) # 确保模型没有控制台输出
|
|
101
|
-
|
|
102
|
-
f"已初始化 Jarvis 平台 LLM
|
|
103
|
-
OutputType.INFO,
|
|
101
|
+
print(
|
|
102
|
+
f"ℹ️ 已初始化 Jarvis 平台 LLM(cheap),模型: {self.platform.name()}"
|
|
104
103
|
)
|
|
105
104
|
except Exception as e:
|
|
106
|
-
|
|
105
|
+
print(f"❌ 初始化 Jarvis 平台 LLM 失败: {e}")
|
|
107
106
|
raise
|
|
108
107
|
|
|
109
108
|
def generate(self, prompt: str, **kwargs) -> str:
|
|
@@ -121,5 +120,5 @@ class JarvisPlatform_LLM(LLMInterface):
|
|
|
121
120
|
# 使用健壮的chat_until_success方法
|
|
122
121
|
return self.platform.chat_until_success(prompt)
|
|
123
122
|
except Exception as e:
|
|
124
|
-
|
|
123
|
+
print(f"❌ 调用 Jarvis 平台模型时发生错误: {e}")
|
|
125
124
|
return "错误: 无法从本地LLM获取响应。"
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
from .llm_interface import LLMInterface
|
|
3
|
-
from jarvis.jarvis_utils.output import PrettyOutput, OutputType
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
class QueryRewriter:
|
|
@@ -57,10 +56,8 @@ English version of the query
|
|
|
57
56
|
一个经过重写、搜索优化的查询列表。
|
|
58
57
|
"""
|
|
59
58
|
prompt = self.rewrite_prompt_template.format(query=query)
|
|
60
|
-
|
|
61
|
-
"正在将原始查询重写为多个搜索查询..."
|
|
62
|
-
output_type=OutputType.INFO,
|
|
63
|
-
timestamp=False,
|
|
59
|
+
print(
|
|
60
|
+
"ℹ️ 正在将原始查询重写为多个搜索查询..."
|
|
64
61
|
)
|
|
65
62
|
|
|
66
63
|
import re
|
|
@@ -80,34 +77,26 @@ English version of the query
|
|
|
80
77
|
rewritten_queries = [
|
|
81
78
|
line.strip() for line in content.split("\n") if line.strip()
|
|
82
79
|
]
|
|
83
|
-
|
|
84
|
-
f"成功从LLM响应中提取到内容 (尝试 {attempts}/{max_retries})。"
|
|
85
|
-
output_type=OutputType.SUCCESS,
|
|
86
|
-
timestamp=False,
|
|
80
|
+
print(
|
|
81
|
+
f"✅ 成功从LLM响应中提取到内容 (尝试 {attempts}/{max_retries})。"
|
|
87
82
|
)
|
|
88
83
|
break # 提取成功,退出循环
|
|
89
84
|
else:
|
|
90
|
-
|
|
91
|
-
f"未能从LLM响应中提取内容。正在重试... ({attempts}/{max_retries})"
|
|
92
|
-
output_type=OutputType.WARNING,
|
|
93
|
-
timestamp=False,
|
|
85
|
+
print(
|
|
86
|
+
f"⚠️ 未能从LLM响应中提取内容。正在重试... ({attempts}/{max_retries})"
|
|
94
87
|
)
|
|
95
88
|
|
|
96
89
|
# 如果所有重试都失败,则跳过重写步骤
|
|
97
90
|
if not rewritten_queries:
|
|
98
|
-
|
|
99
|
-
"所有重试均失败。跳过查询重写,将仅使用原始查询。"
|
|
100
|
-
output_type=OutputType.ERROR,
|
|
101
|
-
timestamp=False,
|
|
91
|
+
print(
|
|
92
|
+
"❌ 所有重试均失败。跳过查询重写,将仅使用原始查询。"
|
|
102
93
|
)
|
|
103
94
|
|
|
104
95
|
# 同时包含原始查询以保证鲁棒性
|
|
105
96
|
if query not in rewritten_queries:
|
|
106
97
|
rewritten_queries.insert(0, query)
|
|
107
98
|
|
|
108
|
-
|
|
109
|
-
f"生成了 {len(rewritten_queries)} 个查询变体。"
|
|
110
|
-
output_type=OutputType.SUCCESS,
|
|
111
|
-
timestamp=False,
|
|
99
|
+
print(
|
|
100
|
+
f"✅ 生成了 {len(rewritten_queries)} 个查询变体。"
|
|
112
101
|
)
|
|
113
102
|
return rewritten_queries
|
|
@@ -8,7 +8,6 @@ from .llm_interface import JarvisPlatform_LLM, LLMInterface, ToolAgent_LLM
|
|
|
8
8
|
from .query_rewriter import QueryRewriter
|
|
9
9
|
from .reranker import Reranker
|
|
10
10
|
from .retriever import ChromaRetriever
|
|
11
|
-
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
12
11
|
from jarvis.jarvis_utils.config import (
|
|
13
12
|
get_rag_embedding_model,
|
|
14
13
|
get_rag_rerank_model,
|
|
@@ -79,8 +78,8 @@ class JarvisRAGPipeline:
|
|
|
79
78
|
self._reranker: Optional[Reranker] = None
|
|
80
79
|
self._query_rewriter: Optional[QueryRewriter] = None
|
|
81
80
|
|
|
82
|
-
|
|
83
|
-
"JarvisRAGPipeline 初始化成功 (模型按需加载)."
|
|
81
|
+
print(
|
|
82
|
+
"✅ JarvisRAGPipeline 初始化成功 (模型按需加载)."
|
|
84
83
|
)
|
|
85
84
|
|
|
86
85
|
def _get_embedding_manager(self) -> EmbeddingManager:
|
|
@@ -172,18 +171,19 @@ class JarvisRAGPipeline:
|
|
|
172
171
|
lines.extend([f" 变更: {p}" for p in changed[:3]])
|
|
173
172
|
if deleted:
|
|
174
173
|
lines.extend([f" 删除: {p}" for p in deleted[:3]])
|
|
175
|
-
|
|
174
|
+
joined_lines = '\n'.join(lines)
|
|
175
|
+
print(f"⚠️ {joined_lines}")
|
|
176
176
|
# 询问用户
|
|
177
177
|
if get_yes_no(
|
|
178
178
|
"检测到索引变更,是否现在更新索引后再开始检索?", default=True
|
|
179
179
|
):
|
|
180
180
|
retriever.update_index_for_changes(changed, deleted)
|
|
181
181
|
else:
|
|
182
|
-
|
|
183
|
-
"已跳过索引更新,将直接使用当前索引进行检索。"
|
|
182
|
+
print(
|
|
183
|
+
"ℹ️ 已跳过索引更新,将直接使用当前索引进行检索。"
|
|
184
184
|
)
|
|
185
185
|
except Exception as e:
|
|
186
|
-
|
|
186
|
+
print(f"⚠️ 检索前索引检查失败:{e}")
|
|
187
187
|
|
|
188
188
|
def add_documents(self, documents: List[Document]):
|
|
189
189
|
"""
|
|
@@ -236,16 +236,15 @@ class JarvisRAGPipeline:
|
|
|
236
236
|
if self.use_query_rewrite:
|
|
237
237
|
rewritten_queries = self._get_query_rewriter().rewrite(query_text)
|
|
238
238
|
else:
|
|
239
|
-
|
|
240
|
-
"已关闭查询重写,将直接使用原始查询进行检索。"
|
|
241
|
-
OutputType.INFO,
|
|
239
|
+
print(
|
|
240
|
+
"ℹ️ 已关闭查询重写,将直接使用原始查询进行检索。"
|
|
242
241
|
)
|
|
243
242
|
rewritten_queries = [query_text]
|
|
244
243
|
|
|
245
244
|
# 2. 为每个重写的查询检索初始候选文档
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
245
|
+
query_lines = '\n'.join([f' - {q}' for q in rewritten_queries])
|
|
246
|
+
print(
|
|
247
|
+
f"ℹ️ 将为以下查询变体进行混合检索:\n{query_lines}"
|
|
249
248
|
)
|
|
250
249
|
all_candidate_docs = []
|
|
251
250
|
for q in rewritten_queries:
|
|
@@ -263,9 +262,8 @@ class JarvisRAGPipeline:
|
|
|
263
262
|
|
|
264
263
|
# 3. 根据*原始*查询对统一的候选池进行重排
|
|
265
264
|
if self.use_rerank:
|
|
266
|
-
|
|
267
|
-
f"正在对 {len(unique_candidate_docs)} 个候选文档进行重排(基于原始问题)..."
|
|
268
|
-
OutputType.INFO,
|
|
265
|
+
print(
|
|
266
|
+
f"ℹ️ 正在对 {len(unique_candidate_docs)} 个候选文档进行重排(基于原始问题)..."
|
|
269
267
|
)
|
|
270
268
|
retrieved_docs = self._get_reranker().rerank(
|
|
271
269
|
query_text, unique_candidate_docs, top_n=n_results
|
|
@@ -289,13 +287,14 @@ class JarvisRAGPipeline:
|
|
|
289
287
|
if sources:
|
|
290
288
|
# 合并来源列表后一次性打印,避免多次加框
|
|
291
289
|
lines = ["根据以下文档回答:"] + [f" - {source}" for source in sources]
|
|
292
|
-
|
|
290
|
+
joined_lines = '\n'.join(lines)
|
|
291
|
+
print(f"ℹ️ {joined_lines}")
|
|
293
292
|
|
|
294
293
|
# 4. 创建最终提示并生成答案
|
|
295
294
|
# 我们使用原始的query_text作为给LLM的最终提示
|
|
296
295
|
prompt = self._create_prompt(query_text, retrieved_docs)
|
|
297
296
|
|
|
298
|
-
|
|
297
|
+
print("ℹ️ 正在从LLM生成答案...")
|
|
299
298
|
answer = self.llm.generate(prompt)
|
|
300
299
|
|
|
301
300
|
return answer
|
|
@@ -317,16 +316,15 @@ class JarvisRAGPipeline:
|
|
|
317
316
|
if self.use_query_rewrite:
|
|
318
317
|
rewritten_queries = self._get_query_rewriter().rewrite(query_text)
|
|
319
318
|
else:
|
|
320
|
-
|
|
321
|
-
"已关闭查询重写,将直接使用原始查询进行检索。"
|
|
322
|
-
OutputType.INFO,
|
|
319
|
+
print(
|
|
320
|
+
"ℹ️ 已关闭查询重写,将直接使用原始查询进行检索。"
|
|
323
321
|
)
|
|
324
322
|
rewritten_queries = [query_text]
|
|
325
323
|
|
|
326
324
|
# 2. 检索候选文档
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
325
|
+
query_lines = '\n'.join([f' - {q}' for q in rewritten_queries])
|
|
326
|
+
print(
|
|
327
|
+
f"ℹ️ 将为以下查询变体进行混合检索:\n{query_lines}"
|
|
330
328
|
)
|
|
331
329
|
all_candidate_docs = []
|
|
332
330
|
for q in rewritten_queries:
|
|
@@ -343,9 +341,8 @@ class JarvisRAGPipeline:
|
|
|
343
341
|
|
|
344
342
|
# 3. 重排
|
|
345
343
|
if self.use_rerank:
|
|
346
|
-
|
|
347
|
-
f"正在对 {len(unique_candidate_docs)} 个候选文档进行重排..."
|
|
348
|
-
OutputType.INFO,
|
|
344
|
+
print(
|
|
345
|
+
f"ℹ️ 正在对 {len(unique_candidate_docs)} 个候选文档进行重排..."
|
|
349
346
|
)
|
|
350
347
|
retrieved_docs = self._get_reranker().rerank(
|
|
351
348
|
query_text, unique_candidate_docs, top_n=n_results
|
jarvis/jarvis_rag/reranker.py
CHANGED
|
@@ -6,7 +6,6 @@ from sentence_transformers.cross_encoder import ( # type: ignore
|
|
|
6
6
|
CrossEncoder,
|
|
7
7
|
)
|
|
8
8
|
from huggingface_hub import snapshot_download
|
|
9
|
-
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class Reranker:
|
|
@@ -22,13 +21,13 @@ class Reranker:
|
|
|
22
21
|
参数:
|
|
23
22
|
model_name (str): 要使用的Cross-Encoder模型的名称。
|
|
24
23
|
"""
|
|
25
|
-
|
|
24
|
+
print(f"ℹ️ 正在初始化重排模型: {model_name}...")
|
|
26
25
|
try:
|
|
27
26
|
local_dir = None
|
|
28
27
|
|
|
29
28
|
if os.path.isdir(model_name):
|
|
30
29
|
self.model = CrossEncoder(model_name)
|
|
31
|
-
|
|
30
|
+
print("✅ 重排模型初始化成功。")
|
|
32
31
|
return
|
|
33
32
|
try:
|
|
34
33
|
# Prefer local cache; avoid any network access
|
|
@@ -41,9 +40,9 @@ class Reranker:
|
|
|
41
40
|
else:
|
|
42
41
|
self.model = CrossEncoder(model_name)
|
|
43
42
|
|
|
44
|
-
|
|
43
|
+
print("✅ 重排模型初始化成功。")
|
|
45
44
|
except Exception as e:
|
|
46
|
-
|
|
45
|
+
print(f"❌ 初始化重排模型失败: {e}")
|
|
47
46
|
raise
|
|
48
47
|
|
|
49
48
|
def rerank(
|