jarvis-ai-assistant 0.1.220__py3-none-any.whl → 0.1.221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +93 -382
- jarvis/jarvis_agent/edit_file_handler.py +32 -185
- jarvis/jarvis_agent/prompt_builder.py +57 -0
- jarvis/jarvis_agent/prompts.py +188 -0
- jarvis/jarvis_agent/protocols.py +30 -0
- jarvis/jarvis_agent/session_manager.py +84 -0
- jarvis/jarvis_agent/tool_executor.py +49 -0
- jarvis/jarvis_code_agent/code_agent.py +4 -4
- jarvis/jarvis_data/config_schema.json +8 -18
- jarvis/jarvis_rag/__init__.py +2 -2
- jarvis/jarvis_rag/cache.py +28 -30
- jarvis/jarvis_rag/cli.py +141 -52
- jarvis/jarvis_rag/embedding_manager.py +32 -46
- jarvis/jarvis_rag/llm_interface.py +32 -34
- jarvis/jarvis_rag/query_rewriter.py +11 -12
- jarvis/jarvis_rag/rag_pipeline.py +40 -43
- jarvis/jarvis_rag/reranker.py +18 -18
- jarvis/jarvis_rag/retriever.py +29 -29
- jarvis/jarvis_tools/edit_file.py +11 -36
- jarvis/jarvis_utils/config.py +10 -25
- {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/METADATA +15 -12
- {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/RECORD +27 -22
- {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
from typing import Any, Tuple, TYPE_CHECKING
|
3
|
+
|
4
|
+
from jarvis.jarvis_utils.input import user_confirm
|
5
|
+
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from jarvis.jarvis_agent import Agent
|
9
|
+
|
10
|
+
|
11
|
+
def execute_tool_call(response: str, agent: "Agent") -> Tuple[bool, Any]:
|
12
|
+
"""
|
13
|
+
Parses the model's response, identifies the appropriate tool, and executes it.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
response: The response string from the model, potentially containing a tool call.
|
17
|
+
agent: The agent instance, providing context like output handlers and settings.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
A tuple containing:
|
21
|
+
- A boolean indicating if the tool's result should be returned to the user.
|
22
|
+
- The result of the tool execution or an error message.
|
23
|
+
"""
|
24
|
+
tool_list = []
|
25
|
+
for handler in agent.output_handler:
|
26
|
+
if handler.can_handle(response):
|
27
|
+
tool_list.append(handler)
|
28
|
+
|
29
|
+
if len(tool_list) > 1:
|
30
|
+
error_message = (
|
31
|
+
f"操作失败:检测到多个操作。一次只能执行一个操作。"
|
32
|
+
f"尝试执行的操作:{', '.join([handler.name() for handler in tool_list])}"
|
33
|
+
)
|
34
|
+
PrettyOutput.print(error_message, OutputType.WARNING)
|
35
|
+
return False, error_message
|
36
|
+
|
37
|
+
if not tool_list:
|
38
|
+
return False, ""
|
39
|
+
|
40
|
+
tool_to_execute = tool_list[0]
|
41
|
+
if not agent.execute_tool_confirm or user_confirm(
|
42
|
+
f"需要执行{tool_to_execute.name()}确认执行?", True
|
43
|
+
):
|
44
|
+
print(f"🔧 正在执行{tool_to_execute.name()}...")
|
45
|
+
result = tool_to_execute.handle(response, agent)
|
46
|
+
print(f"✅ {tool_to_execute.name()}执行完成")
|
47
|
+
return result
|
48
|
+
|
49
|
+
return False, ""
|
@@ -392,19 +392,19 @@ class CodeAgent:
|
|
392
392
|
return
|
393
393
|
# 用户确认最终结果
|
394
394
|
if commited:
|
395
|
-
agent.prompt += final_ret
|
395
|
+
agent.session.prompt += final_ret
|
396
396
|
return
|
397
397
|
PrettyOutput.print(final_ret, OutputType.USER, lang="markdown")
|
398
398
|
if not is_confirm_before_apply_patch() or user_confirm(
|
399
399
|
"是否使用此回复?", default=True
|
400
400
|
):
|
401
|
-
agent.prompt += final_ret
|
401
|
+
agent.session.prompt += final_ret
|
402
402
|
return
|
403
|
-
agent.prompt += final_ret
|
403
|
+
agent.session.prompt += final_ret
|
404
404
|
custom_reply = get_multiline_input("请输入自定义回复")
|
405
405
|
if custom_reply.strip(): # 如果自定义回复为空,返回空字符串
|
406
406
|
agent.set_addon_prompt(custom_reply)
|
407
|
-
agent.prompt += final_ret
|
407
|
+
agent.session.prompt += final_ret
|
408
408
|
|
409
409
|
|
410
410
|
def main() -> None:
|
@@ -185,30 +185,20 @@
|
|
185
185
|
"type": "object",
|
186
186
|
"description": "RAG框架的配置",
|
187
187
|
"properties": {
|
188
|
-
"
|
188
|
+
"embedding_model": {
|
189
189
|
"type": "string",
|
190
|
-
"
|
191
|
-
|
192
|
-
"accuracy"
|
193
|
-
],
|
194
|
-
"default": "performance",
|
195
|
-
"description": "嵌入模型的模式, 'performance'表示性能优先, 'accuracy'表示准确度优先"
|
190
|
+
"default": "BAAI/bge-base-zh-v1.5",
|
191
|
+
"description": "用于RAG的嵌入模型的名称, 默认为 'BAAI/bge-base-zh-v1.5'"
|
196
192
|
},
|
197
|
-
"
|
193
|
+
"rerank_model": {
|
198
194
|
"type": "string",
|
199
|
-
"default": "
|
200
|
-
"description": "
|
201
|
-
},
|
202
|
-
"vector_db_path": {
|
203
|
-
"type": "string",
|
204
|
-
"default": ".jarvis/rag/vectordb",
|
205
|
-
"description": "向量数据库的持久化存储路径, 相对于当前工作目录"
|
195
|
+
"default": "BAAI/bge-reranker-base",
|
196
|
+
"description": "用于RAG的rerank模型的名称, 默认为 'BAAI/bge-reranker-base'"
|
206
197
|
}
|
207
198
|
},
|
208
199
|
"default": {
|
209
|
-
"
|
210
|
-
"
|
211
|
-
"vector_db_path": ".jarvis/rag/vectordb"
|
200
|
+
"embedding_model": "BAAI/bge-base-zh-v1.5",
|
201
|
+
"rerank_model": "BAAI/bge-reranker-base"
|
212
202
|
}
|
213
203
|
},
|
214
204
|
"JARVIS_REPLACE_MAP": {
|
jarvis/jarvis_rag/__init__.py
CHANGED
jarvis/jarvis_rag/cache.py
CHANGED
@@ -6,74 +6,72 @@ from diskcache import Cache
|
|
6
6
|
|
7
7
|
class EmbeddingCache:
|
8
8
|
"""
|
9
|
-
|
9
|
+
一个用于存储和检索文本嵌入的基于磁盘的缓存。
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
deterministic and efficient.
|
11
|
+
该类使用diskcache创建一个持久化的本地缓存。它根据每个文本内容的
|
12
|
+
SHA256哈希值为其生成一个键,使得查找过程具有确定性和高效性。
|
14
13
|
"""
|
15
14
|
|
16
15
|
def __init__(self, cache_dir: str, salt: str = ""):
|
17
16
|
"""
|
18
|
-
|
17
|
+
初始化EmbeddingCache。
|
19
18
|
|
20
|
-
|
21
|
-
cache_dir (str):
|
22
|
-
salt (str):
|
23
|
-
|
24
|
-
do not collide. For example, use the model name as a salt.
|
19
|
+
参数:
|
20
|
+
cache_dir (str): 缓存将要存储的目录。
|
21
|
+
salt (str): 添加到哈希中的盐值。这对于确保由不同模型生成的
|
22
|
+
嵌入不会发生冲突至关重要。例如,可以使用模型名称作为盐值。
|
25
23
|
"""
|
26
24
|
self.cache = Cache(cache_dir)
|
27
25
|
self.salt = salt
|
28
26
|
|
29
27
|
def _get_key(self, text: str) -> str:
|
30
|
-
"""
|
28
|
+
"""为一个给定的文本和盐值生成一个唯一的缓存键。"""
|
31
29
|
hash_object = hashlib.sha256((self.salt + text).encode("utf-8"))
|
32
30
|
return hash_object.hexdigest()
|
33
31
|
|
34
32
|
def get(self, text: str) -> Optional[Any]:
|
35
33
|
"""
|
36
|
-
|
34
|
+
从缓存中检索一个嵌入。
|
37
35
|
|
38
|
-
|
39
|
-
text (str):
|
36
|
+
参数:
|
37
|
+
text (str): 要查找的文本。
|
40
38
|
|
41
|
-
|
42
|
-
|
39
|
+
返回:
|
40
|
+
缓存的嵌入,如果不在缓存中则返回None。
|
43
41
|
"""
|
44
42
|
key = self._get_key(text)
|
45
43
|
return self.cache.get(key)
|
46
44
|
|
47
45
|
def set(self, text: str, embedding: Any) -> None:
|
48
46
|
"""
|
49
|
-
|
47
|
+
在缓存中存储一个嵌入。
|
50
48
|
|
51
|
-
|
52
|
-
text (str):
|
53
|
-
embedding (Any):
|
49
|
+
参数:
|
50
|
+
text (str): 与嵌入相对应的文本。
|
51
|
+
embedding (Any): 要存储的嵌入向量。
|
54
52
|
"""
|
55
53
|
key = self._get_key(text)
|
56
54
|
self.cache.set(key, embedding)
|
57
55
|
|
58
56
|
def get_batch(self, texts: List[str]) -> List[Optional[Any]]:
|
59
57
|
"""
|
60
|
-
|
58
|
+
从缓存中检索一批嵌入。
|
61
59
|
|
62
|
-
|
63
|
-
texts (List[str]):
|
60
|
+
参数:
|
61
|
+
texts (List[str]): 要查找的文本列表。
|
64
62
|
|
65
|
-
|
66
|
-
|
63
|
+
返回:
|
64
|
+
一个列表,其中包含缓存的嵌入,对于缓存未命中的情况则为None。
|
67
65
|
"""
|
68
66
|
return [self.get(text) for text in texts]
|
69
67
|
|
70
68
|
def set_batch(self, texts: List[str], embeddings: List[Any]) -> None:
|
71
69
|
"""
|
72
|
-
|
70
|
+
在缓存中存储一批嵌入。
|
73
71
|
|
74
|
-
|
75
|
-
texts (List[str]):
|
76
|
-
embeddings (List[Any]):
|
72
|
+
参数:
|
73
|
+
texts (List[str]): 文本列表。
|
74
|
+
embeddings (List[Any]): 相应的嵌入列表。
|
77
75
|
"""
|
78
76
|
if len(texts) != len(embeddings):
|
79
77
|
raise ValueError("Length of texts and embeddings must be the same.")
|
@@ -83,5 +81,5 @@ class EmbeddingCache:
|
|
83
81
|
self.set(text, embedding)
|
84
82
|
|
85
83
|
def close(self):
|
86
|
-
"""
|
84
|
+
"""关闭缓存连接。"""
|
87
85
|
self.cache.close()
|
jarvis/jarvis_rag/cli.py
CHANGED
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
4
4
|
from typing import Optional, List, Literal, cast
|
5
5
|
import mimetypes
|
6
6
|
|
7
|
+
import pathspec
|
7
8
|
import typer
|
8
9
|
from langchain.docstore.document import Document
|
9
10
|
from langchain_community.document_loaders import (
|
@@ -18,29 +19,29 @@ from jarvis.jarvis_utils.utils import init_env
|
|
18
19
|
|
19
20
|
def is_likely_text_file(file_path: Path) -> bool:
|
20
21
|
"""
|
21
|
-
|
22
|
-
|
22
|
+
通过读取文件开头部分,检查文件是否可能为文本文件。
|
23
|
+
此方法可以避免将大型二进制文件加载到内存中。
|
23
24
|
"""
|
24
25
|
try:
|
25
|
-
#
|
26
|
+
# 启发式方法1:检查MIME类型(如果可用)
|
26
27
|
mime_type, _ = mimetypes.guess_type(file_path)
|
27
28
|
if mime_type and mime_type.startswith("text/"):
|
28
29
|
return True
|
29
30
|
if mime_type and any(x in mime_type for x in ["json", "xml", "javascript"]):
|
30
31
|
return True
|
31
32
|
|
32
|
-
#
|
33
|
+
# 启发式方法2:检查文件的前几KB中是否包含空字节
|
33
34
|
with open(file_path, "rb") as f:
|
34
|
-
chunk = f.read(4096) #
|
35
|
+
chunk = f.read(4096) # 读取前4KB
|
35
36
|
if b"\x00" in chunk:
|
36
|
-
return False #
|
37
|
+
return False # 空字节是二进制文件的强指示符
|
37
38
|
return True
|
38
39
|
except Exception:
|
39
40
|
return False
|
40
41
|
|
41
42
|
|
42
|
-
#
|
43
|
-
#
|
43
|
+
# 确保项目根目录在Python路径中,以允许绝对导入
|
44
|
+
# 这使得脚本可以作为模块运行。
|
44
45
|
_project_root = os.path.abspath(
|
45
46
|
os.path.join(os.path.dirname(__file__), "..", "..", "..")
|
46
47
|
)
|
@@ -54,13 +55,13 @@ from jarvis.jarvis_rag.rag_pipeline import JarvisRAGPipeline
|
|
54
55
|
|
55
56
|
app = typer.Typer(
|
56
57
|
name="jarvis-rag",
|
57
|
-
help="
|
58
|
+
help="一个与Jarvis RAG框架交互的命令行工具。",
|
58
59
|
add_completion=False,
|
59
60
|
)
|
60
61
|
|
61
62
|
|
62
63
|
class _CustomPlatformLLM(LLMInterface):
|
63
|
-
"""
|
64
|
+
"""一个简单的包装器,使BasePlatform实例与LLMInterface兼容。"""
|
64
65
|
|
65
66
|
def __init__(self, platform: BasePlatform):
|
66
67
|
self.platform = platform
|
@@ -73,7 +74,7 @@ class _CustomPlatformLLM(LLMInterface):
|
|
73
74
|
|
74
75
|
|
75
76
|
def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInterface]:
|
76
|
-
"""
|
77
|
+
"""从指定的平台和模型创建LLM接口。"""
|
77
78
|
if not platform_name or not model_name:
|
78
79
|
return None
|
79
80
|
try:
|
@@ -90,36 +91,70 @@ def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInter
|
|
90
91
|
return None
|
91
92
|
|
92
93
|
|
94
|
+
def _load_ragignore_spec() -> tuple[Optional[pathspec.PathSpec], Optional[Path]]:
|
95
|
+
"""
|
96
|
+
从项目根目录加载忽略模式。
|
97
|
+
首先查找 `.jarvis/rag/.ragignore`,如果未找到,则回退到 `.gitignore`。
|
98
|
+
"""
|
99
|
+
project_root_path = Path(_project_root)
|
100
|
+
ragignore_file = project_root_path / ".jarvis" / "rag" / ".ragignore"
|
101
|
+
gitignore_file = project_root_path / ".gitignore"
|
102
|
+
|
103
|
+
ignore_file_to_use = None
|
104
|
+
if ragignore_file.is_file():
|
105
|
+
ignore_file_to_use = ragignore_file
|
106
|
+
elif gitignore_file.is_file():
|
107
|
+
ignore_file_to_use = gitignore_file
|
108
|
+
|
109
|
+
if ignore_file_to_use:
|
110
|
+
try:
|
111
|
+
with open(ignore_file_to_use, "r", encoding="utf-8") as f:
|
112
|
+
patterns = f.read().splitlines()
|
113
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
114
|
+
print(f"✅ 加载忽略规则: {ignore_file_to_use}")
|
115
|
+
return spec, project_root_path
|
116
|
+
except Exception as e:
|
117
|
+
print(f"⚠️ 加载 {ignore_file_to_use.name} 文件失败: {e}")
|
118
|
+
|
119
|
+
return None, None
|
120
|
+
|
121
|
+
|
93
122
|
@app.command(
|
94
123
|
"add",
|
95
|
-
help="
|
124
|
+
help="从文件、目录或glob模式(例如 'src/**/*.py')添加文档。",
|
96
125
|
)
|
97
126
|
def add_documents(
|
98
127
|
paths: List[Path] = typer.Argument(
|
99
128
|
...,
|
100
|
-
help="
|
129
|
+
help="文件/目录路径或glob模式。支持Shell扩展。",
|
101
130
|
),
|
102
131
|
collection_name: str = typer.Option(
|
103
132
|
"jarvis_rag_collection",
|
104
133
|
"--collection",
|
105
134
|
"-c",
|
106
|
-
help="
|
135
|
+
help="向量数据库中集合的名称。",
|
107
136
|
),
|
108
|
-
|
137
|
+
embedding_model: Optional[str] = typer.Option(
|
109
138
|
None,
|
110
|
-
"--embedding-
|
139
|
+
"--embedding-model",
|
111
140
|
"-e",
|
112
|
-
help="
|
141
|
+
help="嵌入模型的名称。覆盖全局配置。",
|
113
142
|
),
|
114
143
|
db_path: Optional[Path] = typer.Option(
|
115
|
-
None, "--db-path", help="
|
144
|
+
None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
|
145
|
+
),
|
146
|
+
batch_size: int = typer.Option(
|
147
|
+
500,
|
148
|
+
"--batch-size",
|
149
|
+
"-b",
|
150
|
+
help="单个批次中要处理的文档数。",
|
116
151
|
),
|
117
152
|
):
|
118
|
-
"""
|
153
|
+
"""从不同来源向RAG知识库添加文档。"""
|
119
154
|
files_to_process = set()
|
120
155
|
|
121
156
|
for path_str in paths:
|
122
|
-
# Typer
|
157
|
+
# Typer的List[Path]可能不会扩展glob,所以我们手动处理
|
123
158
|
from glob import glob
|
124
159
|
|
125
160
|
expanded_paths = glob(str(path_str), recursive=True)
|
@@ -141,59 +176,96 @@ def add_documents(
|
|
141
176
|
print(f"⚠️ 跳过可能的二进制文件: {path}")
|
142
177
|
|
143
178
|
if not files_to_process:
|
144
|
-
print(
|
179
|
+
print("⚠️ 在指定路径中未找到任何文本文件。")
|
180
|
+
return
|
181
|
+
|
182
|
+
# 使用 .ragignore 过滤文件
|
183
|
+
ragignore_spec, ragignore_root = _load_ragignore_spec()
|
184
|
+
if ragignore_spec and ragignore_root:
|
185
|
+
initial_count = len(files_to_process)
|
186
|
+
retained_files = set()
|
187
|
+
for file_path in files_to_process:
|
188
|
+
try:
|
189
|
+
# 将文件路径解析为绝对路径以确保正确比较
|
190
|
+
resolved_path = file_path.resolve()
|
191
|
+
relative_path = str(resolved_path.relative_to(ragignore_root))
|
192
|
+
if not ragignore_spec.match_file(relative_path):
|
193
|
+
retained_files.add(file_path)
|
194
|
+
except ValueError:
|
195
|
+
# 文件不在项目根目录下,保留它
|
196
|
+
retained_files.add(file_path)
|
197
|
+
|
198
|
+
ignored_count = initial_count - len(retained_files)
|
199
|
+
if ignored_count > 0:
|
200
|
+
print(f"ℹ️ 根据 .ragignore 规则过滤掉 {ignored_count} 个文件。")
|
201
|
+
files_to_process = retained_files
|
202
|
+
|
203
|
+
if not files_to_process:
|
204
|
+
print("⚠️ 所有找到的文本文件都被忽略规则过滤掉了。")
|
145
205
|
return
|
146
206
|
|
147
207
|
print(f"✅ 发现 {len(files_to_process)} 个独立文件待处理。")
|
148
208
|
|
149
209
|
try:
|
150
210
|
pipeline = JarvisRAGPipeline(
|
151
|
-
|
152
|
-
Optional[Literal["performance", "accuracy"]], embedding_mode
|
153
|
-
),
|
211
|
+
embedding_model=embedding_model,
|
154
212
|
db_path=str(db_path) if db_path else None,
|
155
213
|
collection_name=collection_name,
|
156
214
|
)
|
157
215
|
|
158
|
-
|
216
|
+
docs_batch: List[Document] = []
|
217
|
+
total_docs_added = 0
|
159
218
|
loader: BaseLoader
|
160
|
-
|
219
|
+
|
220
|
+
sorted_files = sorted(list(files_to_process))
|
221
|
+
total_files = len(sorted_files)
|
222
|
+
|
223
|
+
for i, file_path in enumerate(sorted_files):
|
161
224
|
try:
|
162
225
|
if file_path.suffix.lower() == ".md":
|
163
226
|
loader = UnstructuredMarkdownLoader(str(file_path))
|
164
|
-
else: #
|
227
|
+
else: # 对.txt和所有代码文件默认使用TextLoader
|
165
228
|
loader = TextLoader(str(file_path), encoding="utf-8")
|
166
229
|
|
167
|
-
|
168
|
-
print(f"✅ 已加载: {file_path}")
|
230
|
+
docs_batch.extend(loader.load())
|
231
|
+
print(f"✅ 已加载: {file_path} (文件 {i + 1}/{total_files})")
|
169
232
|
except Exception as e:
|
170
233
|
print(f"⚠️ 加载失败 {file_path}: {e}")
|
171
234
|
|
172
|
-
|
235
|
+
# 当批处理已满或是最后一个文件时处理批处理
|
236
|
+
if docs_batch and (len(docs_batch) >= batch_size or (i + 1) == total_files):
|
237
|
+
print(f"⚙️ 正在处理批次,包含 {len(docs_batch)} 个文档...")
|
238
|
+
pipeline.add_documents(docs_batch)
|
239
|
+
total_docs_added += len(docs_batch)
|
240
|
+
print(f"✅ 成功添加 {len(docs_batch)} 个文档。")
|
241
|
+
docs_batch = [] # 清空批处理
|
242
|
+
|
243
|
+
if total_docs_added == 0:
|
173
244
|
print("❌ 未能成功加载任何文档。")
|
174
245
|
raise typer.Exit(code=1)
|
175
246
|
|
176
|
-
|
177
|
-
|
247
|
+
print(
|
248
|
+
f"✅ 成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。"
|
249
|
+
)
|
178
250
|
|
179
251
|
except Exception as e:
|
180
252
|
print(f"❌ 发生严重错误: {e}")
|
181
253
|
raise typer.Exit(code=1)
|
182
254
|
|
183
255
|
|
184
|
-
@app.command("list-docs", help="
|
256
|
+
@app.command("list-docs", help="列出知识库中所有唯一的文档。")
|
185
257
|
def list_documents(
|
186
258
|
collection_name: str = typer.Option(
|
187
259
|
"jarvis_rag_collection",
|
188
260
|
"--collection",
|
189
261
|
"-c",
|
190
|
-
help="
|
262
|
+
help="向量数据库中集合的名称。",
|
191
263
|
),
|
192
264
|
db_path: Optional[Path] = typer.Option(
|
193
|
-
None, "--db-path", help="
|
265
|
+
None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
|
194
266
|
),
|
195
267
|
):
|
196
|
-
"""
|
268
|
+
"""列出指定集合中的所有唯一文档。"""
|
197
269
|
try:
|
198
270
|
pipeline = JarvisRAGPipeline(
|
199
271
|
db_path=str(db_path) if db_path else None,
|
@@ -201,13 +273,13 @@ def list_documents(
|
|
201
273
|
)
|
202
274
|
|
203
275
|
collection = pipeline.retriever.collection
|
204
|
-
results = collection.get() #
|
276
|
+
results = collection.get() # 获取集合中的所有项目
|
205
277
|
|
206
278
|
if not results or not results["metadatas"]:
|
207
279
|
print("ℹ️ 知识库中没有找到任何文档。")
|
208
280
|
return
|
209
281
|
|
210
|
-
#
|
282
|
+
# 从元数据中提取唯一的源文件路径
|
211
283
|
sources = set()
|
212
284
|
for metadata in results["metadatas"]:
|
213
285
|
if metadata:
|
@@ -228,38 +300,38 @@ def list_documents(
|
|
228
300
|
raise typer.Exit(code=1)
|
229
301
|
|
230
302
|
|
231
|
-
@app.command("query", help="
|
303
|
+
@app.command("query", help="向知识库提问。")
|
232
304
|
def query(
|
233
|
-
question: str = typer.Argument(..., help="
|
305
|
+
question: str = typer.Argument(..., help="要提出的问题。"),
|
234
306
|
collection_name: str = typer.Option(
|
235
307
|
"jarvis_rag_collection",
|
236
308
|
"--collection",
|
237
309
|
"-c",
|
238
|
-
help="
|
310
|
+
help="向量数据库中集合的名称。",
|
239
311
|
),
|
240
|
-
|
312
|
+
embedding_model: Optional[str] = typer.Option(
|
241
313
|
None,
|
242
|
-
"--embedding-
|
314
|
+
"--embedding-model",
|
243
315
|
"-e",
|
244
|
-
help="
|
316
|
+
help="嵌入模型的名称。覆盖全局配置。",
|
245
317
|
),
|
246
318
|
db_path: Optional[Path] = typer.Option(
|
247
|
-
None, "--db-path", help="
|
319
|
+
None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
|
248
320
|
),
|
249
321
|
platform: Optional[str] = typer.Option(
|
250
322
|
None,
|
251
323
|
"--platform",
|
252
324
|
"-p",
|
253
|
-
help="
|
325
|
+
help="为LLM指定平台名称。覆盖默认的思考模型。",
|
254
326
|
),
|
255
327
|
model: Optional[str] = typer.Option(
|
256
328
|
None,
|
257
329
|
"--model",
|
258
330
|
"-m",
|
259
|
-
help="
|
331
|
+
help="为LLM指定模型名称。需要 --platform。",
|
260
332
|
),
|
261
333
|
):
|
262
|
-
"""
|
334
|
+
"""查询RAG知识库并打印答案。"""
|
263
335
|
if model and not platform:
|
264
336
|
print("❌ 错误: --model 需要指定 --platform。")
|
265
337
|
raise typer.Exit(code=1)
|
@@ -271,9 +343,7 @@ def query(
|
|
271
343
|
|
272
344
|
pipeline = JarvisRAGPipeline(
|
273
345
|
llm=custom_llm,
|
274
|
-
|
275
|
-
Optional[Literal["performance", "accuracy"]], embedding_mode
|
276
|
-
),
|
346
|
+
embedding_model=embedding_model,
|
277
347
|
db_path=str(db_path) if db_path else None,
|
278
348
|
collection_name=collection_name,
|
279
349
|
)
|
@@ -282,7 +352,7 @@ def query(
|
|
282
352
|
answer = pipeline.query(question)
|
283
353
|
|
284
354
|
print("💬 答案:")
|
285
|
-
#
|
355
|
+
# 我们仍然可以使用 rich.markdown.Markdown,因为 PrettyOutput 底层使用了 rich
|
286
356
|
from jarvis.jarvis_utils.globals import console
|
287
357
|
|
288
358
|
console.print(Markdown(answer))
|
@@ -292,6 +362,25 @@ def query(
|
|
292
362
|
raise typer.Exit(code=1)
|
293
363
|
|
294
364
|
|
365
|
+
_RAG_INSTALLED = False
|
366
|
+
try:
|
367
|
+
import langchain # noqa
|
368
|
+
|
369
|
+
_RAG_INSTALLED = True
|
370
|
+
except ImportError:
|
371
|
+
pass
|
372
|
+
|
373
|
+
|
374
|
+
def _check_rag_dependencies():
|
375
|
+
if not _RAG_INSTALLED:
|
376
|
+
print(
|
377
|
+
"❌ RAG依赖项未安装。"
|
378
|
+
"请运行 'pip install \"jarvis-ai-assistant[rag]\"' 来使用此命令。"
|
379
|
+
)
|
380
|
+
raise typer.Exit(code=1)
|
381
|
+
|
382
|
+
|
295
383
|
def main():
|
384
|
+
_check_rag_dependencies()
|
296
385
|
init_env(welcome_str="Jarvis RAG")
|
297
386
|
app()
|