jarvis-ai-assistant 0.1.222__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +1143 -245
- jarvis/jarvis_agent/agent_manager.py +97 -0
- jarvis/jarvis_agent/builtin_input_handler.py +12 -10
- jarvis/jarvis_agent/config_editor.py +57 -0
- jarvis/jarvis_agent/edit_file_handler.py +392 -99
- jarvis/jarvis_agent/event_bus.py +48 -0
- jarvis/jarvis_agent/events.py +157 -0
- jarvis/jarvis_agent/file_context_handler.py +79 -0
- jarvis/jarvis_agent/file_methodology_manager.py +117 -0
- jarvis/jarvis_agent/jarvis.py +1117 -147
- jarvis/jarvis_agent/main.py +78 -34
- jarvis/jarvis_agent/memory_manager.py +195 -0
- jarvis/jarvis_agent/methodology_share_manager.py +174 -0
- jarvis/jarvis_agent/prompt_manager.py +82 -0
- jarvis/jarvis_agent/prompts.py +46 -9
- jarvis/jarvis_agent/protocols.py +4 -1
- jarvis/jarvis_agent/rewrite_file_handler.py +141 -0
- jarvis/jarvis_agent/run_loop.py +146 -0
- jarvis/jarvis_agent/session_manager.py +9 -9
- jarvis/jarvis_agent/share_manager.py +228 -0
- jarvis/jarvis_agent/shell_input_handler.py +23 -3
- jarvis/jarvis_agent/stdio_redirect.py +295 -0
- jarvis/jarvis_agent/task_analyzer.py +212 -0
- jarvis/jarvis_agent/task_manager.py +154 -0
- jarvis/jarvis_agent/task_planner.py +496 -0
- jarvis/jarvis_agent/tool_executor.py +8 -4
- jarvis/jarvis_agent/tool_share_manager.py +139 -0
- jarvis/jarvis_agent/user_interaction.py +42 -0
- jarvis/jarvis_agent/utils.py +54 -0
- jarvis/jarvis_agent/web_bridge.py +189 -0
- jarvis/jarvis_agent/web_output_sink.py +53 -0
- jarvis/jarvis_agent/web_server.py +751 -0
- jarvis/jarvis_c2rust/__init__.py +26 -0
- jarvis/jarvis_c2rust/cli.py +613 -0
- jarvis/jarvis_c2rust/collector.py +258 -0
- jarvis/jarvis_c2rust/library_replacer.py +1122 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +1300 -0
- jarvis/jarvis_c2rust/optimizer.py +960 -0
- jarvis/jarvis_c2rust/scanner.py +1681 -0
- jarvis/jarvis_c2rust/transpiler.py +2325 -0
- jarvis/jarvis_code_agent/build_validation_config.py +133 -0
- jarvis/jarvis_code_agent/code_agent.py +1605 -178
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +62 -0
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +102 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +59 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +69 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +50 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +93 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +129 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +54 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +154 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +363 -0
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +89 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +31 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +231 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +183 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +219 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +209 -0
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +451 -0
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +77 -0
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +48 -0
- jarvis/jarvis_code_agent/lint.py +275 -13
- jarvis/jarvis_code_agent/utils.py +142 -0
- jarvis/jarvis_code_analysis/checklists/loader.py +20 -6
- jarvis/jarvis_code_analysis/code_review.py +583 -548
- jarvis/jarvis_data/config_schema.json +339 -28
- jarvis/jarvis_git_squash/main.py +22 -13
- jarvis/jarvis_git_utils/git_commiter.py +171 -55
- jarvis/jarvis_mcp/sse_mcp_client.py +22 -15
- jarvis/jarvis_mcp/stdio_mcp_client.py +4 -4
- jarvis/jarvis_mcp/streamable_mcp_client.py +36 -16
- jarvis/jarvis_memory_organizer/memory_organizer.py +753 -0
- jarvis/jarvis_methodology/main.py +48 -63
- jarvis/jarvis_multi_agent/__init__.py +302 -43
- jarvis/jarvis_multi_agent/main.py +70 -24
- jarvis/jarvis_platform/ai8.py +40 -23
- jarvis/jarvis_platform/base.py +210 -49
- jarvis/jarvis_platform/human.py +11 -1
- jarvis/jarvis_platform/kimi.py +82 -76
- jarvis/jarvis_platform/openai.py +73 -1
- jarvis/jarvis_platform/registry.py +8 -15
- jarvis/jarvis_platform/tongyi.py +115 -101
- jarvis/jarvis_platform/yuanbao.py +89 -63
- jarvis/jarvis_platform_manager/main.py +194 -132
- jarvis/jarvis_platform_manager/service.py +122 -86
- jarvis/jarvis_rag/cli.py +156 -53
- jarvis/jarvis_rag/embedding_manager.py +155 -12
- jarvis/jarvis_rag/llm_interface.py +10 -13
- jarvis/jarvis_rag/query_rewriter.py +63 -12
- jarvis/jarvis_rag/rag_pipeline.py +222 -40
- jarvis/jarvis_rag/reranker.py +26 -3
- jarvis/jarvis_rag/retriever.py +270 -14
- jarvis/jarvis_sec/__init__.py +3605 -0
- jarvis/jarvis_sec/checkers/__init__.py +32 -0
- jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
- jarvis/jarvis_sec/cli.py +116 -0
- jarvis/jarvis_sec/report.py +257 -0
- jarvis/jarvis_sec/status.py +264 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +219 -0
- jarvis/jarvis_smart_shell/main.py +405 -137
- jarvis/jarvis_stats/__init__.py +13 -0
- jarvis/jarvis_stats/cli.py +387 -0
- jarvis/jarvis_stats/stats.py +711 -0
- jarvis/jarvis_stats/storage.py +612 -0
- jarvis/jarvis_stats/visualizer.py +282 -0
- jarvis/jarvis_tools/ask_user.py +1 -0
- jarvis/jarvis_tools/base.py +18 -2
- jarvis/jarvis_tools/clear_memory.py +239 -0
- jarvis/jarvis_tools/cli/main.py +220 -144
- jarvis/jarvis_tools/execute_script.py +52 -12
- jarvis/jarvis_tools/file_analyzer.py +17 -12
- jarvis/jarvis_tools/generate_new_tool.py +46 -24
- jarvis/jarvis_tools/read_code.py +277 -18
- jarvis/jarvis_tools/read_symbols.py +141 -0
- jarvis/jarvis_tools/read_webpage.py +86 -13
- jarvis/jarvis_tools/registry.py +294 -90
- jarvis/jarvis_tools/retrieve_memory.py +227 -0
- jarvis/jarvis_tools/save_memory.py +194 -0
- jarvis/jarvis_tools/search_web.py +62 -28
- jarvis/jarvis_tools/sub_agent.py +205 -0
- jarvis/jarvis_tools/sub_code_agent.py +217 -0
- jarvis/jarvis_tools/virtual_tty.py +330 -62
- jarvis/jarvis_utils/builtin_replace_map.py +4 -5
- jarvis/jarvis_utils/clipboard.py +90 -0
- jarvis/jarvis_utils/config.py +607 -50
- jarvis/jarvis_utils/embedding.py +3 -0
- jarvis/jarvis_utils/fzf.py +57 -0
- jarvis/jarvis_utils/git_utils.py +251 -29
- jarvis/jarvis_utils/globals.py +174 -17
- jarvis/jarvis_utils/http.py +58 -79
- jarvis/jarvis_utils/input.py +899 -153
- jarvis/jarvis_utils/methodology.py +210 -83
- jarvis/jarvis_utils/output.py +220 -137
- jarvis/jarvis_utils/utils.py +1906 -135
- jarvis_ai_assistant-0.7.0.dist-info/METADATA +465 -0
- jarvis_ai_assistant-0.7.0.dist-info/RECORD +192 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/entry_points.txt +8 -2
- jarvis/jarvis_git_details/main.py +0 -265
- jarvis/jarvis_platform/oyi.py +0 -357
- jarvis/jarvis_tools/edit_file.py +0 -255
- jarvis/jarvis_tools/rewrite_file.py +0 -195
- jarvis_ai_assistant-0.1.222.dist-info/METADATA +0 -767
- jarvis_ai_assistant-0.1.222.dist-info/RECORD +0 -110
- /jarvis/{jarvis_git_details → jarvis_memory_organizer}/__init__.py +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/top_level.txt +0 -0
jarvis/jarvis_rag/cli.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Optional, List,
|
|
4
|
+
from typing import Optional, List, Tuple
|
|
5
5
|
import mimetypes
|
|
6
6
|
|
|
7
|
-
import pathspec
|
|
7
|
+
import pathspec # type: ignore
|
|
8
8
|
import typer
|
|
9
9
|
from langchain.docstore.document import Document
|
|
10
10
|
from langchain_community.document_loaders import (
|
|
@@ -14,7 +14,13 @@ from langchain_community.document_loaders import (
|
|
|
14
14
|
from langchain_core.document_loaders.base import BaseLoader
|
|
15
15
|
from rich.markdown import Markdown
|
|
16
16
|
|
|
17
|
-
from jarvis.jarvis_utils.utils import init_env
|
|
17
|
+
from jarvis.jarvis_utils.utils import init_env, is_rag_installed, get_missing_rag_modules
|
|
18
|
+
from jarvis.jarvis_utils.config import (
|
|
19
|
+
get_rag_embedding_model,
|
|
20
|
+
get_rag_use_bm25,
|
|
21
|
+
get_rag_use_rerank,
|
|
22
|
+
)
|
|
23
|
+
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
18
24
|
|
|
19
25
|
|
|
20
26
|
def is_likely_text_file(file_path: Path) -> bool:
|
|
@@ -48,10 +54,10 @@ _project_root = os.path.abspath(
|
|
|
48
54
|
if _project_root not in sys.path:
|
|
49
55
|
sys.path.insert(0, _project_root)
|
|
50
56
|
|
|
51
|
-
from jarvis.jarvis_platform.base import BasePlatform
|
|
52
|
-
from jarvis.jarvis_platform.registry import PlatformRegistry
|
|
53
|
-
from jarvis.jarvis_rag.llm_interface import LLMInterface
|
|
54
|
-
from jarvis.jarvis_rag.rag_pipeline import JarvisRAGPipeline
|
|
57
|
+
from jarvis.jarvis_platform.base import BasePlatform # noqa: E402
|
|
58
|
+
from jarvis.jarvis_platform.registry import PlatformRegistry # noqa: E402
|
|
59
|
+
from jarvis.jarvis_rag.llm_interface import LLMInterface # noqa: E402
|
|
60
|
+
from jarvis.jarvis_rag.rag_pipeline import JarvisRAGPipeline # noqa: E402
|
|
55
61
|
|
|
56
62
|
app = typer.Typer(
|
|
57
63
|
name="jarvis-rag",
|
|
@@ -65,8 +71,9 @@ class _CustomPlatformLLM(LLMInterface):
|
|
|
65
71
|
|
|
66
72
|
def __init__(self, platform: BasePlatform):
|
|
67
73
|
self.platform = platform
|
|
68
|
-
print(
|
|
69
|
-
f"
|
|
74
|
+
PrettyOutput.print(
|
|
75
|
+
f"使用自定义LLM: 平台='{platform.platform_name()}', 模型='{platform.name()}'",
|
|
76
|
+
OutputType.INFO,
|
|
70
77
|
)
|
|
71
78
|
|
|
72
79
|
def generate(self, prompt: str, **kwargs) -> str:
|
|
@@ -81,17 +88,19 @@ def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInter
|
|
|
81
88
|
registry = PlatformRegistry.get_global_platform_registry()
|
|
82
89
|
platform_instance = registry.create_platform(platform_name)
|
|
83
90
|
if not platform_instance:
|
|
84
|
-
print(
|
|
91
|
+
PrettyOutput.print(
|
|
92
|
+
f"错误: 平台 '{platform_name}' 未找到。", OutputType.ERROR
|
|
93
|
+
)
|
|
85
94
|
return None
|
|
86
95
|
platform_instance.set_model_name(model_name)
|
|
87
96
|
platform_instance.set_suppress_output(True)
|
|
88
97
|
return _CustomPlatformLLM(platform_instance)
|
|
89
98
|
except Exception as e:
|
|
90
|
-
print(f"
|
|
99
|
+
PrettyOutput.print(f"创建自定义LLM时出错: {e}", OutputType.ERROR)
|
|
91
100
|
return None
|
|
92
101
|
|
|
93
102
|
|
|
94
|
-
def _load_ragignore_spec() ->
|
|
103
|
+
def _load_ragignore_spec() -> Tuple[Optional[pathspec.PathSpec], Optional[Path]]:
|
|
95
104
|
"""
|
|
96
105
|
从项目根目录加载忽略模式。
|
|
97
106
|
首先查找 `.jarvis/rag/.ragignore`,如果未找到,则回退到 `.gitignore`。
|
|
@@ -111,10 +120,14 @@ def _load_ragignore_spec() -> tuple[Optional[pathspec.PathSpec], Optional[Path]]
|
|
|
111
120
|
with open(ignore_file_to_use, "r", encoding="utf-8") as f:
|
|
112
121
|
patterns = f.read().splitlines()
|
|
113
122
|
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
114
|
-
print(
|
|
123
|
+
PrettyOutput.print(
|
|
124
|
+
f"加载忽略规则: {ignore_file_to_use}", OutputType.SUCCESS
|
|
125
|
+
)
|
|
115
126
|
return spec, project_root_path
|
|
116
127
|
except Exception as e:
|
|
117
|
-
print(
|
|
128
|
+
PrettyOutput.print(
|
|
129
|
+
f"加载 {ignore_file_to_use.name} 文件失败: {e}", OutputType.WARNING
|
|
130
|
+
)
|
|
118
131
|
|
|
119
132
|
return None, None
|
|
120
133
|
|
|
@@ -165,7 +178,7 @@ def add_documents(
|
|
|
165
178
|
continue
|
|
166
179
|
|
|
167
180
|
if path.is_dir():
|
|
168
|
-
print(f"
|
|
181
|
+
PrettyOutput.print(f"正在扫描目录: {path}", OutputType.INFO)
|
|
169
182
|
for item in path.rglob("*"):
|
|
170
183
|
if item.is_file() and is_likely_text_file(item):
|
|
171
184
|
files_to_process.add(item)
|
|
@@ -173,10 +186,12 @@ def add_documents(
|
|
|
173
186
|
if is_likely_text_file(path):
|
|
174
187
|
files_to_process.add(path)
|
|
175
188
|
else:
|
|
176
|
-
print(
|
|
189
|
+
PrettyOutput.print(
|
|
190
|
+
f"跳过可能的二进制文件: {path}", OutputType.WARNING
|
|
191
|
+
)
|
|
177
192
|
|
|
178
193
|
if not files_to_process:
|
|
179
|
-
print("
|
|
194
|
+
PrettyOutput.print("在指定路径中未找到任何文本文件。", OutputType.WARNING)
|
|
180
195
|
return
|
|
181
196
|
|
|
182
197
|
# 使用 .ragignore 过滤文件
|
|
@@ -197,14 +212,20 @@ def add_documents(
|
|
|
197
212
|
|
|
198
213
|
ignored_count = initial_count - len(retained_files)
|
|
199
214
|
if ignored_count > 0:
|
|
200
|
-
print(
|
|
215
|
+
PrettyOutput.print(
|
|
216
|
+
f"根据 .ragignore 规则过滤掉 {ignored_count} 个文件。", OutputType.INFO
|
|
217
|
+
)
|
|
201
218
|
files_to_process = retained_files
|
|
202
219
|
|
|
203
220
|
if not files_to_process:
|
|
204
|
-
print(
|
|
221
|
+
PrettyOutput.print(
|
|
222
|
+
"所有找到的文本文件都被忽略规则过滤掉了。", OutputType.WARNING
|
|
223
|
+
)
|
|
205
224
|
return
|
|
206
225
|
|
|
207
|
-
print(
|
|
226
|
+
PrettyOutput.print(
|
|
227
|
+
f"发现 {len(files_to_process)} 个独立文件待处理。", OutputType.INFO
|
|
228
|
+
)
|
|
208
229
|
|
|
209
230
|
try:
|
|
210
231
|
pipeline = JarvisRAGPipeline(
|
|
@@ -219,6 +240,7 @@ def add_documents(
|
|
|
219
240
|
|
|
220
241
|
sorted_files = sorted(list(files_to_process))
|
|
221
242
|
total_files = len(sorted_files)
|
|
243
|
+
loaded_msgs: List[str] = []
|
|
222
244
|
|
|
223
245
|
for i, file_path in enumerate(sorted_files):
|
|
224
246
|
try:
|
|
@@ -228,28 +250,40 @@ def add_documents(
|
|
|
228
250
|
loader = TextLoader(str(file_path), encoding="utf-8")
|
|
229
251
|
|
|
230
252
|
docs_batch.extend(loader.load())
|
|
231
|
-
|
|
253
|
+
loaded_msgs.append(f"已加载: {file_path} (文件 {i + 1}/{total_files})")
|
|
232
254
|
except Exception as e:
|
|
233
|
-
print(f"
|
|
255
|
+
PrettyOutput.print(f"加载失败 {file_path}: {e}", OutputType.WARNING)
|
|
234
256
|
|
|
235
257
|
# 当批处理已满或是最后一个文件时处理批处理
|
|
236
258
|
if docs_batch and (len(docs_batch) >= batch_size or (i + 1) == total_files):
|
|
237
|
-
|
|
259
|
+
if loaded_msgs:
|
|
260
|
+
PrettyOutput.print("\n".join(loaded_msgs), OutputType.INFO)
|
|
261
|
+
loaded_msgs = []
|
|
262
|
+
PrettyOutput.print(
|
|
263
|
+
f"正在处理批次,包含 {len(docs_batch)} 个文档...", OutputType.INFO
|
|
264
|
+
)
|
|
238
265
|
pipeline.add_documents(docs_batch)
|
|
239
266
|
total_docs_added += len(docs_batch)
|
|
240
|
-
print(
|
|
267
|
+
PrettyOutput.print(
|
|
268
|
+
f"成功添加 {len(docs_batch)} 个文档。", OutputType.SUCCESS
|
|
269
|
+
)
|
|
241
270
|
docs_batch = [] # 清空批处理
|
|
242
271
|
|
|
272
|
+
# 最后统一打印可能残留的“已加载”信息
|
|
273
|
+
if loaded_msgs:
|
|
274
|
+
PrettyOutput.print("\n".join(loaded_msgs), OutputType.INFO)
|
|
275
|
+
loaded_msgs = []
|
|
243
276
|
if total_docs_added == 0:
|
|
244
|
-
print("
|
|
277
|
+
PrettyOutput.print("未能成功加载任何文档。", OutputType.ERROR)
|
|
245
278
|
raise typer.Exit(code=1)
|
|
246
279
|
|
|
247
|
-
print(
|
|
248
|
-
f"
|
|
280
|
+
PrettyOutput.print(
|
|
281
|
+
f"成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。",
|
|
282
|
+
OutputType.SUCCESS,
|
|
249
283
|
)
|
|
250
284
|
|
|
251
285
|
except Exception as e:
|
|
252
|
-
print(f"
|
|
286
|
+
PrettyOutput.print(f"发生严重错误: {e}", OutputType.ERROR)
|
|
253
287
|
raise typer.Exit(code=1)
|
|
254
288
|
|
|
255
289
|
|
|
@@ -272,11 +306,11 @@ def list_documents(
|
|
|
272
306
|
collection_name=collection_name,
|
|
273
307
|
)
|
|
274
308
|
|
|
275
|
-
collection = pipeline.
|
|
309
|
+
collection = pipeline._get_collection()
|
|
276
310
|
results = collection.get() # 获取集合中的所有项目
|
|
277
311
|
|
|
278
312
|
if not results or not results["metadatas"]:
|
|
279
|
-
print("
|
|
313
|
+
PrettyOutput.print("知识库中没有找到任何文档。", OutputType.INFO)
|
|
280
314
|
return
|
|
281
315
|
|
|
282
316
|
# 从元数据中提取唯一的源文件路径
|
|
@@ -288,15 +322,85 @@ def list_documents(
|
|
|
288
322
|
sources.add(source)
|
|
289
323
|
|
|
290
324
|
if not sources:
|
|
291
|
-
print(
|
|
325
|
+
PrettyOutput.print(
|
|
326
|
+
"知识库中没有找到任何带有源信息的文档。", OutputType.INFO
|
|
327
|
+
)
|
|
292
328
|
return
|
|
293
329
|
|
|
294
|
-
|
|
330
|
+
# 避免在循环中逐条打印,先拼接后统一打印
|
|
331
|
+
lines = [f"知识库 '{collection_name}' 中共有 {len(sources)} 个独立文档:"]
|
|
295
332
|
for i, source in enumerate(sorted(list(sources)), 1):
|
|
296
|
-
|
|
333
|
+
lines.append(f" {i}. {source}")
|
|
334
|
+
PrettyOutput.print("\n".join(lines), OutputType.INFO)
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
PrettyOutput.print(f"发生错误: {e}", OutputType.ERROR)
|
|
338
|
+
raise typer.Exit(code=1)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
@app.command("retrieve", help="仅从知识库检索相关文档,不生成答案。")
|
|
342
|
+
def retrieve(
|
|
343
|
+
question: str = typer.Argument(..., help="要提出的问题。"),
|
|
344
|
+
collection_name: str = typer.Option(
|
|
345
|
+
"jarvis_rag_collection",
|
|
346
|
+
"--collection",
|
|
347
|
+
"-c",
|
|
348
|
+
help="向量数据库中集合的名称。",
|
|
349
|
+
),
|
|
350
|
+
embedding_model: Optional[str] = typer.Option(
|
|
351
|
+
None,
|
|
352
|
+
"--embedding-model",
|
|
353
|
+
"-e",
|
|
354
|
+
help="嵌入模型的名称。覆盖全局配置。",
|
|
355
|
+
),
|
|
356
|
+
db_path: Optional[Path] = typer.Option(
|
|
357
|
+
None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
|
|
358
|
+
),
|
|
359
|
+
n_results: int = typer.Option(5, "--top-n", help="要检索的文档数量。"),
|
|
360
|
+
rewrite: bool = typer.Option(
|
|
361
|
+
True,
|
|
362
|
+
"--rewrite/--no-rewrite",
|
|
363
|
+
help="是否对查询进行LLM重写以提升召回,默认开启。",
|
|
364
|
+
show_default=True,
|
|
365
|
+
),
|
|
366
|
+
):
|
|
367
|
+
"""仅从RAG知识库检索文档并打印结果。"""
|
|
368
|
+
try:
|
|
369
|
+
# 如果未在命令行中指定,则从配置中加载RAG设置
|
|
370
|
+
final_embedding_model = embedding_model or get_rag_embedding_model()
|
|
371
|
+
use_bm25 = get_rag_use_bm25()
|
|
372
|
+
use_rerank = get_rag_use_rerank()
|
|
373
|
+
|
|
374
|
+
pipeline = JarvisRAGPipeline(
|
|
375
|
+
embedding_model=final_embedding_model,
|
|
376
|
+
db_path=str(db_path) if db_path else None,
|
|
377
|
+
collection_name=collection_name,
|
|
378
|
+
use_bm25=use_bm25,
|
|
379
|
+
use_rerank=use_rerank,
|
|
380
|
+
use_query_rewrite=rewrite,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
PrettyOutput.print(f"正在为问题检索文档: '{question}'", OutputType.INFO)
|
|
384
|
+
retrieved_docs = pipeline.retrieve_only(question, n_results=n_results)
|
|
385
|
+
|
|
386
|
+
if not retrieved_docs:
|
|
387
|
+
PrettyOutput.print("未找到相关文档。", OutputType.INFO)
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
PrettyOutput.print(
|
|
391
|
+
f"成功检索到 {len(retrieved_docs)} 个文档:", OutputType.SUCCESS
|
|
392
|
+
)
|
|
393
|
+
from jarvis.jarvis_utils.globals import console
|
|
394
|
+
|
|
395
|
+
for i, doc in enumerate(retrieved_docs, 1):
|
|
396
|
+
source = doc.metadata.get("source", "未知来源")
|
|
397
|
+
content = doc.page_content
|
|
398
|
+
panel_title = f"文档 {i} | 来源: {source}"
|
|
399
|
+
console.print(f"\n[bold magenta]{panel_title}[/bold magenta]")
|
|
400
|
+
console.print(Markdown(f"```\n{content}\n```"))
|
|
297
401
|
|
|
298
402
|
except Exception as e:
|
|
299
|
-
print(f"
|
|
403
|
+
PrettyOutput.print(f"发生错误: {e}", OutputType.ERROR)
|
|
300
404
|
raise typer.Exit(code=1)
|
|
301
405
|
|
|
302
406
|
|
|
@@ -333,7 +437,7 @@ def query(
|
|
|
333
437
|
):
|
|
334
438
|
"""查询RAG知识库并打印答案。"""
|
|
335
439
|
if model and not platform:
|
|
336
|
-
print("
|
|
440
|
+
PrettyOutput.print("错误: --model 需要指定 --platform。", OutputType.ERROR)
|
|
337
441
|
raise typer.Exit(code=1)
|
|
338
442
|
|
|
339
443
|
try:
|
|
@@ -341,41 +445,40 @@ def query(
|
|
|
341
445
|
if (platform or model) and not custom_llm:
|
|
342
446
|
raise typer.Exit(code=1)
|
|
343
447
|
|
|
448
|
+
# 如果未在命令行中指定,则从配置中加载RAG设置
|
|
449
|
+
final_embedding_model = embedding_model or get_rag_embedding_model()
|
|
450
|
+
use_bm25 = get_rag_use_bm25()
|
|
451
|
+
use_rerank = get_rag_use_rerank()
|
|
452
|
+
|
|
344
453
|
pipeline = JarvisRAGPipeline(
|
|
345
454
|
llm=custom_llm,
|
|
346
|
-
embedding_model=
|
|
455
|
+
embedding_model=final_embedding_model,
|
|
347
456
|
db_path=str(db_path) if db_path else None,
|
|
348
457
|
collection_name=collection_name,
|
|
458
|
+
use_bm25=use_bm25,
|
|
459
|
+
use_rerank=use_rerank,
|
|
349
460
|
)
|
|
350
461
|
|
|
351
|
-
print(f"
|
|
462
|
+
PrettyOutput.print(f"正在查询: '{question}'", OutputType.INFO)
|
|
352
463
|
answer = pipeline.query(question)
|
|
353
464
|
|
|
354
|
-
print(
|
|
355
|
-
# 我们仍然可以使用 rich.markdown.Markdown,因为 PrettyOutput 底层使用了 rich
|
|
356
|
-
from jarvis.jarvis_utils.globals import console
|
|
357
|
-
|
|
358
|
-
console.print(Markdown(answer))
|
|
465
|
+
PrettyOutput.print(answer, OutputType.SUCCESS)
|
|
359
466
|
|
|
360
467
|
except Exception as e:
|
|
361
|
-
print(f"
|
|
468
|
+
PrettyOutput.print(f"发生错误: {e}", OutputType.ERROR)
|
|
362
469
|
raise typer.Exit(code=1)
|
|
363
470
|
|
|
364
471
|
|
|
365
|
-
_RAG_INSTALLED = False
|
|
366
|
-
try:
|
|
367
|
-
import langchain # noqa
|
|
368
472
|
|
|
369
|
-
_RAG_INSTALLED = True
|
|
370
|
-
except ImportError:
|
|
371
|
-
pass
|
|
372
473
|
|
|
373
474
|
|
|
374
475
|
def _check_rag_dependencies():
|
|
375
|
-
if not
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
476
|
+
if not is_rag_installed():
|
|
477
|
+
missing = get_missing_rag_modules()
|
|
478
|
+
missing_str = f"缺少依赖: {', '.join(missing)}。" if missing else ""
|
|
479
|
+
PrettyOutput.print(
|
|
480
|
+
f"RAG依赖项未安装或不完整。{missing_str}请运行 'pip install \"jarvis-ai-assistant[rag]\"' 后重试。",
|
|
481
|
+
OutputType.ERROR,
|
|
379
482
|
)
|
|
380
483
|
raise typer.Exit(code=1)
|
|
381
484
|
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import torch
|
|
2
|
+
import os
|
|
2
3
|
from typing import List, cast
|
|
3
4
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
5
|
+
from huggingface_hub import snapshot_download
|
|
6
|
+
|
|
4
7
|
|
|
5
8
|
from .cache import EmbeddingCache
|
|
9
|
+
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
class EmbeddingManager:
|
|
@@ -23,7 +27,9 @@ class EmbeddingManager:
|
|
|
23
27
|
"""
|
|
24
28
|
self.model_name = model_name
|
|
25
29
|
|
|
26
|
-
print(
|
|
30
|
+
PrettyOutput.print(
|
|
31
|
+
f"初始化嵌入管理器, 模型: '{self.model_name}'...", OutputType.INFO
|
|
32
|
+
)
|
|
27
33
|
|
|
28
34
|
# 缓存的salt是模型名称,以防止冲突
|
|
29
35
|
self.cache = EmbeddingCache(cache_dir=cache_dir, salt=self.model_name)
|
|
@@ -35,15 +41,148 @@ class EmbeddingManager:
|
|
|
35
41
|
encode_kwargs = {"normalize_embeddings": True}
|
|
36
42
|
|
|
37
43
|
try:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
+
# First try to load model from local cache without any network access
|
|
45
|
+
try:
|
|
46
|
+
local_dir = None
|
|
47
|
+
# Prefer explicit local dir via env or direct path
|
|
48
|
+
|
|
49
|
+
if os.path.isdir(self.model_name):
|
|
50
|
+
return HuggingFaceEmbeddings(
|
|
51
|
+
model_name=self.model_name,
|
|
52
|
+
model_kwargs=model_kwargs,
|
|
53
|
+
encode_kwargs=encode_kwargs,
|
|
54
|
+
show_progress=False,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Try common local cache directories for sentence-transformers and HF hub
|
|
58
|
+
try:
|
|
59
|
+
home = os.path.expanduser("~")
|
|
60
|
+
st_home = os.path.join(home, ".cache", "sentence_transformers")
|
|
61
|
+
torch_st_home = os.path.join(home, ".cache", "torch", "sentence_transformers")
|
|
62
|
+
# Build common name variants found in local caches
|
|
63
|
+
org, name = (
|
|
64
|
+
self.model_name.split("/", 1)
|
|
65
|
+
if "/" in self.model_name
|
|
66
|
+
else ("", self.model_name)
|
|
67
|
+
)
|
|
68
|
+
san1 = self.model_name.replace("/", "_")
|
|
69
|
+
san2 = self.model_name.replace("/", "__")
|
|
70
|
+
san3 = self.model_name.replace("/", "--")
|
|
71
|
+
# include plain 'name' for caches that drop org prefix
|
|
72
|
+
name_variants = list(dict.fromkeys([self.model_name, san1, san2, san3, name]))
|
|
73
|
+
candidates = []
|
|
74
|
+
for base in [st_home, torch_st_home]:
|
|
75
|
+
for nv in name_variants:
|
|
76
|
+
p = os.path.join(base, nv)
|
|
77
|
+
if os.path.isdir(p):
|
|
78
|
+
candidates.append(p)
|
|
79
|
+
# Fuzzy scan cache directory for entries that include variants
|
|
80
|
+
try:
|
|
81
|
+
for entry in os.listdir(base):
|
|
82
|
+
ep = os.path.join(base, entry)
|
|
83
|
+
if not os.path.isdir(ep):
|
|
84
|
+
continue
|
|
85
|
+
if (
|
|
86
|
+
(org and entry.startswith(f"{org}__") and name in entry)
|
|
87
|
+
or (san1 in entry)
|
|
88
|
+
or (name in entry)
|
|
89
|
+
):
|
|
90
|
+
candidates.append(ep)
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
# Hugging Face Hub cache snapshots
|
|
95
|
+
hf_cache = os.path.join(home, ".cache", "huggingface", "hub")
|
|
96
|
+
if "/" in self.model_name:
|
|
97
|
+
org, name = self.model_name.split("/", 1)
|
|
98
|
+
models_dir = os.path.join(hf_cache, f"models--{org}--{name}", "snapshots")
|
|
99
|
+
if os.path.isdir(models_dir):
|
|
100
|
+
try:
|
|
101
|
+
snaps = sorted(
|
|
102
|
+
[os.path.join(models_dir, d) for d in os.listdir(models_dir)],
|
|
103
|
+
key=lambda p: os.path.getmtime(p),
|
|
104
|
+
reverse=True,
|
|
105
|
+
)
|
|
106
|
+
except Exception:
|
|
107
|
+
snaps = [os.path.join(models_dir, d) for d in os.listdir(models_dir)]
|
|
108
|
+
for sp in snaps:
|
|
109
|
+
if os.path.isdir(sp):
|
|
110
|
+
candidates.append(sp)
|
|
111
|
+
break
|
|
112
|
+
|
|
113
|
+
for cand in candidates:
|
|
114
|
+
try:
|
|
115
|
+
return HuggingFaceEmbeddings(
|
|
116
|
+
model_name=cand,
|
|
117
|
+
model_kwargs=model_kwargs,
|
|
118
|
+
encode_kwargs=encode_kwargs,
|
|
119
|
+
show_progress=False,
|
|
120
|
+
)
|
|
121
|
+
except Exception:
|
|
122
|
+
continue
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
# Try resolve local cached directory; do not hit network
|
|
128
|
+
local_dir = snapshot_download(repo_id=self.model_name, local_files_only=True)
|
|
129
|
+
except Exception:
|
|
130
|
+
local_dir = None
|
|
131
|
+
|
|
132
|
+
if local_dir:
|
|
133
|
+
return HuggingFaceEmbeddings(
|
|
134
|
+
model_name=local_dir,
|
|
135
|
+
model_kwargs=model_kwargs,
|
|
136
|
+
encode_kwargs=encode_kwargs,
|
|
137
|
+
show_progress=False,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Fall back to remote download if local cache not found and not offline
|
|
143
|
+
return HuggingFaceEmbeddings(
|
|
144
|
+
model_name=self.model_name,
|
|
145
|
+
model_kwargs=model_kwargs,
|
|
146
|
+
encode_kwargs=encode_kwargs,
|
|
147
|
+
show_progress=True,
|
|
148
|
+
)
|
|
149
|
+
except Exception as _e:
|
|
150
|
+
# 如果已检测到本地候选路径(直接目录 / 本地缓存快照),则视为本地加载失败,
|
|
151
|
+
# 为避免在用户期望“本地优先不联网”的情况下触发联网,直接抛错并给出修复建议。
|
|
152
|
+
had_local_candidate = False
|
|
153
|
+
try:
|
|
154
|
+
had_local_candidate = (
|
|
155
|
+
os.path.isdir(self.model_name)
|
|
156
|
+
# 如果上面 snapshot_download 命中了本地缓存,会将 local_dir 设为非 None
|
|
157
|
+
or (locals().get("local_dir") is not None)
|
|
158
|
+
)
|
|
159
|
+
except Exception:
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
if had_local_candidate:
|
|
163
|
+
PrettyOutput.print(
|
|
164
|
+
"检测到本地模型路径但加载失败。为避免触发网络访问,已中止远程回退。\n"
|
|
165
|
+
"请确认本地目录包含完整的 Transformers/Tokenizer 文件(如 config.json、model.safetensors、tokenizer.json/merges.txt 等),\n"
|
|
166
|
+
"或在配置中将 embedding_model 设置为该本地目录,或将模型放置到默认的 Hugging Face 缓存目录(例如 ~/.cache/huggingface/hub)。",
|
|
167
|
+
OutputType.ERROR,
|
|
168
|
+
)
|
|
169
|
+
raise
|
|
170
|
+
|
|
171
|
+
# 未发现任何本地候选,则保持原有行为:回退至远程下载
|
|
172
|
+
return HuggingFaceEmbeddings(
|
|
173
|
+
model_name=self.model_name,
|
|
174
|
+
model_kwargs=model_kwargs,
|
|
175
|
+
encode_kwargs=encode_kwargs,
|
|
176
|
+
show_progress=True,
|
|
177
|
+
)
|
|
44
178
|
except Exception as e:
|
|
45
|
-
print(
|
|
46
|
-
|
|
179
|
+
PrettyOutput.print(
|
|
180
|
+
f"加载嵌入模型 '{self.model_name}' 时出错: {e}", OutputType.ERROR
|
|
181
|
+
)
|
|
182
|
+
PrettyOutput.print(
|
|
183
|
+
"请确保您已安装 'sentence_transformers' 和 'torch'。",
|
|
184
|
+
OutputType.WARNING,
|
|
185
|
+
)
|
|
47
186
|
raise
|
|
48
187
|
|
|
49
188
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
@@ -71,8 +210,9 @@ class EmbeddingManager:
|
|
|
71
210
|
|
|
72
211
|
# 为不在缓存中的文本计算嵌入
|
|
73
212
|
if texts_to_embed:
|
|
74
|
-
print(
|
|
75
|
-
f"
|
|
213
|
+
PrettyOutput.print(
|
|
214
|
+
f"缓存未命中。正在为 {len(texts_to_embed)}/{len(texts)} 个文档计算嵌入。",
|
|
215
|
+
OutputType.INFO,
|
|
76
216
|
)
|
|
77
217
|
new_embeddings = self.model.embed_documents(texts_to_embed)
|
|
78
218
|
|
|
@@ -83,7 +223,10 @@ class EmbeddingManager:
|
|
|
83
223
|
for i, embedding in zip(indices_to_embed, new_embeddings):
|
|
84
224
|
cached_embeddings[i] = embedding
|
|
85
225
|
else:
|
|
86
|
-
print(
|
|
226
|
+
PrettyOutput.print(
|
|
227
|
+
f"缓存命中。所有 {len(texts)} 个文档的嵌入均从缓存中检索。",
|
|
228
|
+
OutputType.SUCCESS,
|
|
229
|
+
)
|
|
87
230
|
|
|
88
231
|
return cast(List[List[float]], cached_embeddings)
|
|
89
232
|
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
import os
|
|
3
|
-
import os
|
|
4
|
-
from abc import ABC, abstractmethod
|
|
5
2
|
|
|
6
3
|
from jarvis.jarvis_agent import Agent as JarvisAgent
|
|
7
4
|
from jarvis.jarvis_platform.base import BasePlatform
|
|
8
5
|
from jarvis.jarvis_platform.registry import PlatformRegistry
|
|
6
|
+
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
9
7
|
|
|
10
8
|
|
|
11
9
|
class LLMInterface(ABC):
|
|
@@ -41,18 +39,16 @@ class ToolAgent_LLM(LLMInterface):
|
|
|
41
39
|
"""
|
|
42
40
|
初始化工具-代理 LLM 包装器。
|
|
43
41
|
"""
|
|
44
|
-
print("
|
|
42
|
+
PrettyOutput.print("已初始化工具 Agent 作为最终应答者。", OutputType.INFO)
|
|
45
43
|
self.allowed_tools = ["read_code", "execute_script"]
|
|
46
44
|
# 为代理提供一个通用的系统提示
|
|
47
45
|
self.system_prompt = "You are a helpful assistant. Please answer the user's question based on the provided context. You can use tools to find more information if needed."
|
|
48
46
|
self.summary_prompt = """
|
|
49
|
-
<report>
|
|
50
47
|
请为本次问答任务生成一个总结报告,包含以下内容:
|
|
51
48
|
|
|
52
49
|
1. **原始问题**: 重述用户最开始提出的问题。
|
|
53
50
|
2. **关键信息来源**: 总结你是基于哪些关键信息或文件得出的结论。
|
|
54
51
|
3. **最终答案**: 给出最终的、精炼的回答。
|
|
55
|
-
</report>
|
|
56
52
|
"""
|
|
57
53
|
|
|
58
54
|
def generate(self, prompt: str, **kwargs) -> str:
|
|
@@ -83,7 +79,7 @@ class ToolAgent_LLM(LLMInterface):
|
|
|
83
79
|
return str(final_answer)
|
|
84
80
|
|
|
85
81
|
except Exception as e:
|
|
86
|
-
print(f"
|
|
82
|
+
PrettyOutput.print(f"Agent 在执行过程中发生错误: {e}", OutputType.ERROR)
|
|
87
83
|
return "错误: Agent 未能成功生成回答。"
|
|
88
84
|
|
|
89
85
|
|
|
@@ -101,12 +97,13 @@ class JarvisPlatform_LLM(LLMInterface):
|
|
|
101
97
|
try:
|
|
102
98
|
self.registry = PlatformRegistry.get_global_platform_registry()
|
|
103
99
|
self.platform: BasePlatform = self.registry.get_normal_platform()
|
|
104
|
-
self.platform.set_suppress_output(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
100
|
+
self.platform.set_suppress_output(False) # 确保模型没有控制台输出
|
|
101
|
+
PrettyOutput.print(
|
|
102
|
+
f"已初始化 Jarvis 平台 LLM,模型: {self.platform.name()}",
|
|
103
|
+
OutputType.INFO,
|
|
104
|
+
)
|
|
108
105
|
except Exception as e:
|
|
109
|
-
print(f"
|
|
106
|
+
PrettyOutput.print(f"初始化 Jarvis 平台 LLM 失败: {e}", OutputType.ERROR)
|
|
110
107
|
raise
|
|
111
108
|
|
|
112
109
|
def generate(self, prompt: str, **kwargs) -> str:
|
|
@@ -124,5 +121,5 @@ class JarvisPlatform_LLM(LLMInterface):
|
|
|
124
121
|
# 使用健壮的chat_until_success方法
|
|
125
122
|
return self.platform.chat_until_success(prompt)
|
|
126
123
|
except Exception as e:
|
|
127
|
-
print(f"
|
|
124
|
+
PrettyOutput.print(f"调用 Jarvis 平台模型时发生错误: {e}", OutputType.ERROR)
|
|
128
125
|
return "错误: 无法从本地LLM获取响应。"
|