fr-cli 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fr_cli/README.md +148 -0
- fr_cli/WEAPON.MD +186 -0
- fr_cli/__init__.py +4 -0
- fr_cli/addon/plugin.py +69 -0
- fr_cli/agent/__init__.py +9 -0
- fr_cli/agent/builtins/__init__.py +4 -0
- fr_cli/agent/builtins/_utils.py +48 -0
- fr_cli/agent/builtins/db.py +269 -0
- fr_cli/agent/builtins/local.py +105 -0
- fr_cli/agent/builtins/rag.py +652 -0
- fr_cli/agent/builtins/rag_watcher_daemon.py +156 -0
- fr_cli/agent/builtins/remote.py +214 -0
- fr_cli/agent/builtins/spider.py +247 -0
- fr_cli/agent/client.py +164 -0
- fr_cli/agent/executor.py +86 -0
- fr_cli/agent/generator.py +104 -0
- fr_cli/agent/manager.py +193 -0
- fr_cli/agent/master.py +604 -0
- fr_cli/agent/master_prompt.py +118 -0
- fr_cli/agent/remote.py +70 -0
- fr_cli/agent/server.py +279 -0
- fr_cli/agent/workflow.py +164 -0
- fr_cli/breakthrough/update.py +154 -0
- fr_cli/command/__init__.py +4 -0
- fr_cli/command/executor.py +276 -0
- fr_cli/command/registry.py +1034 -0
- fr_cli/command/security.py +30 -0
- fr_cli/conf/config.py +126 -0
- fr_cli/conf/wizard.py +172 -0
- fr_cli/core/chat.py +280 -0
- fr_cli/core/core.py +111 -0
- fr_cli/core/intent.py +129 -0
- fr_cli/core/recommender.py +71 -0
- fr_cli/core/stream.py +83 -0
- fr_cli/core/sysmon.py +117 -0
- fr_cli/core/thinking.py +215 -0
- fr_cli/gatekeeper/__init__.py +7 -0
- fr_cli/gatekeeper/daemon.py +216 -0
- fr_cli/gatekeeper/manager.py +218 -0
- fr_cli/lang/i18n.py +827 -0
- fr_cli/main.py +329 -0
- fr_cli/memory/context.py +119 -0
- fr_cli/memory/history.py +96 -0
- fr_cli/memory/session.py +134 -0
- fr_cli/repl/__init__.py +0 -0
- fr_cli/repl/commands.py +1098 -0
- fr_cli/security/security.py +46 -0
- fr_cli/ui/ui.py +116 -0
- fr_cli/weapon/cron.py +217 -0
- fr_cli/weapon/dataframe.py +97 -0
- fr_cli/weapon/disk.py +141 -0
- fr_cli/weapon/fs.py +206 -0
- fr_cli/weapon/launcher.py +249 -0
- fr_cli/weapon/loader.py +98 -0
- fr_cli/weapon/mail.py +227 -0
- fr_cli/weapon/mcp.py +204 -0
- fr_cli/weapon/vision.py +74 -0
- fr_cli/weapon/web.py +88 -0
- fr_cli-2.1.0.dist-info/METADATA +227 -0
- fr_cli-2.1.0.dist-info/RECORD +64 -0
- fr_cli-2.1.0.dist-info/WHEEL +5 -0
- fr_cli-2.1.0.dist-info/entry_points.txt +2 -0
- fr_cli-2.1.0.dist-info/licenses/LICENSE +21 -0
- fr_cli-2.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@RAG 内置 Agent —— 本地知识库检索增强生成
|
|
3
|
+
使用 ChromaDB 持久化向量存储 + sentence-transformers 嵌入模型。
|
|
4
|
+
自动监控知识库目录,新文件自动向量化入库。
|
|
5
|
+
"""
|
|
6
|
+
import hashlib
|
|
7
|
+
import os
|
|
8
|
+
import signal
|
|
9
|
+
import sys
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# 可选依赖延迟导入
|
|
15
|
+
_chroma = None
|
|
16
|
+
_sentence_transformers = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_chroma():
|
|
20
|
+
global _chroma
|
|
21
|
+
if _chroma is None:
|
|
22
|
+
try:
|
|
23
|
+
import chromadb
|
|
24
|
+
_chroma = chromadb
|
|
25
|
+
except ImportError:
|
|
26
|
+
pass
|
|
27
|
+
return _chroma
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _get_st():
|
|
31
|
+
global _sentence_transformers
|
|
32
|
+
if _sentence_transformers is None:
|
|
33
|
+
try:
|
|
34
|
+
import sentence_transformers as st
|
|
35
|
+
_sentence_transformers = st
|
|
36
|
+
except ImportError:
|
|
37
|
+
pass
|
|
38
|
+
return _sentence_transformers
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RAGManager:
|
|
42
|
+
"""RAG 知识库管理器 —— 向量存储 + 文件监控 + 检索生成"""
|
|
43
|
+
|
|
44
|
+
DEFAULT_MODEL = "all-MiniLM-L6-v2"
|
|
45
|
+
RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
46
|
+
CHUNK_SIZE = 500
|
|
47
|
+
CHUNK_OVERLAP = 50
|
|
48
|
+
|
|
49
|
+
def __init__(self, kb_dir=None, db_path=None):
|
|
50
|
+
self.kb_dir = Path(kb_dir) if kb_dir else None
|
|
51
|
+
self.db_path = Path(db_path) if db_path else Path.home() / ".fr_cli_rag_db"
|
|
52
|
+
self.client = None
|
|
53
|
+
self.collection = None
|
|
54
|
+
self.embedder = None
|
|
55
|
+
self.reranker = None
|
|
56
|
+
self._watcher_thread = None
|
|
57
|
+
self._stop_watcher = threading.Event()
|
|
58
|
+
self._file_state = {} # path -> (mtime, hash)
|
|
59
|
+
self._initialized = False
|
|
60
|
+
self._db_lock = threading.Lock()
|
|
61
|
+
|
|
62
|
+
def _ensure_initialized(self):
|
|
63
|
+
if self._initialized:
|
|
64
|
+
return True
|
|
65
|
+
chroma = _get_chroma()
|
|
66
|
+
st = _get_st()
|
|
67
|
+
if not chroma or not st:
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
self.client = chroma.PersistentClient(path=str(self.db_path))
|
|
71
|
+
self.collection = self.client.get_or_create_collection(name="kb")
|
|
72
|
+
self.embedder = st.SentenceTransformer(self.DEFAULT_MODEL)
|
|
73
|
+
try:
|
|
74
|
+
from sentence_transformers import CrossEncoder
|
|
75
|
+
self.reranker = CrossEncoder(self.RERANKER_MODEL)
|
|
76
|
+
except Exception:
|
|
77
|
+
self.reranker = None
|
|
78
|
+
self._initialized = True
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
# ---------- 文件处理 ----------
|
|
82
|
+
|
|
83
|
+
def _read_file(self, path):
|
|
84
|
+
"""读取文件内容"""
|
|
85
|
+
path = Path(path)
|
|
86
|
+
if not path.exists():
|
|
87
|
+
return None
|
|
88
|
+
try:
|
|
89
|
+
if path.stat().st_size > 10 * 1024 * 1024:
|
|
90
|
+
return None
|
|
91
|
+
except Exception:
|
|
92
|
+
return None
|
|
93
|
+
try:
|
|
94
|
+
if path.suffix.lower() in (".txt", ".md", ".py", ".js", ".json", ".html", ".css", ".xml", ".yaml", ".yml"):
|
|
95
|
+
return path.read_text(encoding="utf-8", errors="ignore")
|
|
96
|
+
elif path.suffix.lower() in (".csv",):
|
|
97
|
+
import pandas as pd
|
|
98
|
+
df = pd.read_csv(path, nrows=1000)
|
|
99
|
+
return df.to_string(index=False)
|
|
100
|
+
elif path.suffix.lower() in (".xlsx", ".xls"):
|
|
101
|
+
import pandas as pd
|
|
102
|
+
df = pd.read_excel(path, nrows=1000)
|
|
103
|
+
return df.to_string(index=False)
|
|
104
|
+
except Exception:
|
|
105
|
+
pass
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def _chunk_text(self, text, source):
|
|
109
|
+
"""将文本分块"""
|
|
110
|
+
chunks = []
|
|
111
|
+
start = 0
|
|
112
|
+
text_len = len(text)
|
|
113
|
+
idx = 0
|
|
114
|
+
while start < text_len:
|
|
115
|
+
end = min(start + self.CHUNK_SIZE, text_len)
|
|
116
|
+
chunk = text[start:end]
|
|
117
|
+
chunk_id = hashlib.md5(f"{source}:{idx}:{chunk[:50]}".encode()).hexdigest()
|
|
118
|
+
chunks.append({"id": chunk_id, "text": chunk, "source": str(source)})
|
|
119
|
+
start += self.CHUNK_SIZE - self.CHUNK_OVERLAP
|
|
120
|
+
idx += 1
|
|
121
|
+
return chunks
|
|
122
|
+
|
|
123
|
+
def _file_hash(self, path):
|
|
124
|
+
"""计算文件哈希用于去重检测"""
|
|
125
|
+
try:
|
|
126
|
+
stat = os.stat(path)
|
|
127
|
+
return f"{stat.st_mtime}_{stat.st_size}"
|
|
128
|
+
except Exception:
|
|
129
|
+
return ""
|
|
130
|
+
|
|
131
|
+
# ---------- 向量入库 ----------
|
|
132
|
+
|
|
133
|
+
def add_document(self, path):
|
|
134
|
+
"""将单个文件向量化并入库。如果文件已存在,先删除旧片段再重新入库。"""
|
|
135
|
+
if not self._ensure_initialized():
|
|
136
|
+
return False, "缺少依赖: pip install chromadb sentence-transformers"
|
|
137
|
+
|
|
138
|
+
text = self._read_file(path)
|
|
139
|
+
if text is None:
|
|
140
|
+
return False, f"无法读取文件: {path}"
|
|
141
|
+
|
|
142
|
+
chunks = self._chunk_text(text, path)
|
|
143
|
+
if not chunks:
|
|
144
|
+
return False, "文件内容为空"
|
|
145
|
+
|
|
146
|
+
with self._db_lock:
|
|
147
|
+
# 如果文件之前已入库,先删除该文件的所有旧片段
|
|
148
|
+
source_key = str(path)
|
|
149
|
+
if source_key in self._file_state:
|
|
150
|
+
try:
|
|
151
|
+
old_ids = self.collection.get(
|
|
152
|
+
where={"source": source_key}, include=[]
|
|
153
|
+
)
|
|
154
|
+
if old_ids and "ids" in old_ids and old_ids["ids"]:
|
|
155
|
+
self.collection.delete(ids=old_ids["ids"])
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
ids = [c["id"] for c in chunks]
|
|
160
|
+
texts = [c["text"] for c in chunks]
|
|
161
|
+
embeddings = self.embedder.encode(texts).tolist()
|
|
162
|
+
self.collection.add(
|
|
163
|
+
ids=ids,
|
|
164
|
+
embeddings=embeddings,
|
|
165
|
+
documents=texts,
|
|
166
|
+
metadatas=[{"source": c["source"]} for c in chunks],
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
self._file_state[str(path)] = self._file_hash(path)
|
|
170
|
+
return True, f"已入库 {len(chunks)} 个片段"
|
|
171
|
+
|
|
172
|
+
def sync_directory(self, kb_dir=None):
|
|
173
|
+
"""扫描目录,自动向量化新文件/更新文件,并清理已删除文件的旧片段"""
|
|
174
|
+
if not self._ensure_initialized():
|
|
175
|
+
return False, "缺少依赖: pip install chromadb sentence-transformers"
|
|
176
|
+
|
|
177
|
+
target = Path(kb_dir) if kb_dir else self.kb_dir
|
|
178
|
+
if not target or not target.exists():
|
|
179
|
+
return False, "知识库目录未设置或不存在"
|
|
180
|
+
|
|
181
|
+
# 收集当前目录中的所有文件路径
|
|
182
|
+
current_files = set()
|
|
183
|
+
for root, _, files in os.walk(target):
|
|
184
|
+
for fname in files:
|
|
185
|
+
current_files.add(str(Path(root) / fname))
|
|
186
|
+
|
|
187
|
+
with self._db_lock:
|
|
188
|
+
# 清理已不在目录中的文件的旧片段
|
|
189
|
+
removed_sources = [p for p in self._file_state if p not in current_files]
|
|
190
|
+
for source in removed_sources:
|
|
191
|
+
try:
|
|
192
|
+
old_ids = self.collection.get(
|
|
193
|
+
where={"source": source}, include=[]
|
|
194
|
+
)
|
|
195
|
+
if old_ids and "ids" in old_ids and old_ids["ids"]:
|
|
196
|
+
self.collection.delete(ids=old_ids["ids"])
|
|
197
|
+
except Exception:
|
|
198
|
+
pass
|
|
199
|
+
del self._file_state[source]
|
|
200
|
+
|
|
201
|
+
results = []
|
|
202
|
+
for root, _, files in os.walk(target):
|
|
203
|
+
for fname in files:
|
|
204
|
+
path = Path(root) / fname
|
|
205
|
+
fhash = self._file_hash(path)
|
|
206
|
+
prev = self._file_state.get(str(path))
|
|
207
|
+
if prev != fhash:
|
|
208
|
+
ok, msg = self.add_document(path)
|
|
209
|
+
results.append(f"{fname}: {msg}")
|
|
210
|
+
|
|
211
|
+
if not results:
|
|
212
|
+
return True, "所有文件已是最新状态"
|
|
213
|
+
return True, "\n".join(results)
|
|
214
|
+
|
|
215
|
+
# ---------- 检索生成 ----------
|
|
216
|
+
|
|
217
|
+
def _rerank(self, question, candidates, candidate_metas):
|
|
218
|
+
"""使用 CrossEncoder 对候选片段重新排序"""
|
|
219
|
+
if not self.reranker or not candidates:
|
|
220
|
+
return candidates, candidate_metas
|
|
221
|
+
|
|
222
|
+
pairs = [[question, doc] for doc in candidates]
|
|
223
|
+
scores = self.reranker.predict(pairs)
|
|
224
|
+
# 按分数降序排序
|
|
225
|
+
indexed = list(enumerate(scores))
|
|
226
|
+
indexed.sort(key=lambda x: x[1], reverse=True)
|
|
227
|
+
sorted_candidates = [candidates[i] for i, _ in indexed]
|
|
228
|
+
sorted_metas = [candidate_metas[i] for i, _ in indexed]
|
|
229
|
+
return sorted_candidates, sorted_metas
|
|
230
|
+
|
|
231
|
+
def _judge_best_doc(self, question, docs, client, model, lang):
|
|
232
|
+
"""让大模型从 top-3 片段中判定哪一条最适合用于回答"""
|
|
233
|
+
if len(docs) <= 1:
|
|
234
|
+
return 0
|
|
235
|
+
|
|
236
|
+
if lang == "zh":
|
|
237
|
+
judge_prompt = f"""你是一个知识库检索结果评估专家。以下是从向量库中检索出的候选片段,请评估哪一个最适合用来回答用户问题。
|
|
238
|
+
|
|
239
|
+
评估标准(每项1-10分):
|
|
240
|
+
1. 相关性:片段内容与用户问题的匹配程度
|
|
241
|
+
2. 完整性:片段是否包含足够的信息来回答问题
|
|
242
|
+
3. 准确性:片段信息是否直接对应问题的核心诉求
|
|
243
|
+
|
|
244
|
+
请对每条片段打分并给出简短理由,然后明确指出最佳片段的编号(只输出1/2/3中的一个数字)。
|
|
245
|
+
|
|
246
|
+
用户问题: {question}
|
|
247
|
+
|
|
248
|
+
片段1:
|
|
249
|
+
{docs[0] if len(docs) > 0 else "(无)"}
|
|
250
|
+
|
|
251
|
+
片段2:
|
|
252
|
+
{docs[1] if len(docs) > 1 else "(无)"}
|
|
253
|
+
|
|
254
|
+
片段3:
|
|
255
|
+
{docs[2] if len(docs) > 2 else "(无)"}
|
|
256
|
+
|
|
257
|
+
请严格按以下格式输出:
|
|
258
|
+
片段1评分: X分 | 理由: ...
|
|
259
|
+
片段2评分: X分 | 理由: ...
|
|
260
|
+
片段3评分: X分 | 理由: ...
|
|
261
|
+
最佳片段编号: N"""
|
|
262
|
+
else:
|
|
263
|
+
judge_prompt = f"""You are a knowledge base retrieval evaluation expert. Please evaluate which of the following candidate snippets is most suitable for answering the user's question.
|
|
264
|
+
|
|
265
|
+
Scoring criteria (1-10 each):
|
|
266
|
+
1. Relevance: How well the snippet matches the user's question
|
|
267
|
+
2. Completeness: Whether the snippet contains enough information to answer
|
|
268
|
+
3. Accuracy: Whether the information directly addresses the core question
|
|
269
|
+
|
|
270
|
+
Please score each snippet with a brief rationale, then clearly indicate the best snippet number (output only 1/2/3).
|
|
271
|
+
|
|
272
|
+
User Question: {question}
|
|
273
|
+
|
|
274
|
+
Snippet 1:
|
|
275
|
+
{docs[0] if len(docs) > 0 else "(none)"}
|
|
276
|
+
|
|
277
|
+
Snippet 2:
|
|
278
|
+
{docs[1] if len(docs) > 1 else "(none)"}
|
|
279
|
+
|
|
280
|
+
Snippet 3:
|
|
281
|
+
{docs[2] if len(docs) > 2 else "(none)"}
|
|
282
|
+
|
|
283
|
+
Strict output format:
|
|
284
|
+
Snippet 1 score: X | Rationale: ...
|
|
285
|
+
Snippet 2 score: X | Rationale: ...
|
|
286
|
+
Snippet 3 score: X | Rationale: ...
|
|
287
|
+
Best snippet number: N"""
|
|
288
|
+
|
|
289
|
+
from fr_cli.core.stream import stream_cnt
|
|
290
|
+
messages = [{"role": "user", "content": judge_prompt}]
|
|
291
|
+
txt, _, _ = stream_cnt(client, model, messages, lang, custom_prefix="", max_tokens=1024, silent=True)
|
|
292
|
+
# 从回复中提取最佳片段编号
|
|
293
|
+
import re
|
|
294
|
+
match = re.search(r"最佳片段编号[::]\s*(\d)", txt)
|
|
295
|
+
if not match:
|
|
296
|
+
match = re.search(r"Best snippet number[::]\s*(\d)", txt)
|
|
297
|
+
if match:
|
|
298
|
+
idx = int(match.group(1)) - 1
|
|
299
|
+
if 0 <= idx < len(docs):
|
|
300
|
+
return idx
|
|
301
|
+
return 0
|
|
302
|
+
|
|
303
|
+
def query(self, question, client, model, lang="zh", top_k=5):
|
|
304
|
+
"""向量检索 -> Rerank 重排序 -> 大模型判定最佳片段 -> 生成回答"""
|
|
305
|
+
if not self._ensure_initialized():
|
|
306
|
+
return None, "缺少依赖: pip install chromadb sentence-transformers"
|
|
307
|
+
|
|
308
|
+
with self._db_lock:
|
|
309
|
+
if self.collection.count() == 0:
|
|
310
|
+
return None, "知识库为空,请先设置知识库目录并同步。"
|
|
311
|
+
|
|
312
|
+
# Step 1: 向量检索,扩大候选池(取更多候选供 reranker 筛选)
|
|
313
|
+
retrieve_k = max(top_k * 3, 15)
|
|
314
|
+
q_emb = self.embedder.encode([question]).tolist()
|
|
315
|
+
results = self.collection.query(query_embeddings=q_emb, n_results=retrieve_k, include=["documents", "metadatas"])
|
|
316
|
+
|
|
317
|
+
candidates = []
|
|
318
|
+
candidate_metas = []
|
|
319
|
+
for i, doc_list in enumerate(results.get("documents", [])):
|
|
320
|
+
for j, doc in enumerate(doc_list):
|
|
321
|
+
meta = results.get("metadatas", [])[i][j] if results.get("metadatas") else {}
|
|
322
|
+
candidates.append(doc)
|
|
323
|
+
candidate_metas.append(meta)
|
|
324
|
+
|
|
325
|
+
if not candidates:
|
|
326
|
+
return None, "未检索到相关知识。"
|
|
327
|
+
|
|
328
|
+
# Step 2: Rerank 重排序
|
|
329
|
+
reranked_docs, reranked_metas = self._rerank(question, candidates, candidate_metas)
|
|
330
|
+
|
|
331
|
+
# Step 3: 取 top-3
|
|
332
|
+
top3_docs = reranked_docs[:3]
|
|
333
|
+
top3_metas = reranked_metas[:3]
|
|
334
|
+
|
|
335
|
+
# Step 4: 大模型判定哪一条最适合
|
|
336
|
+
best_idx = self._judge_best_doc(question, top3_docs, client, model, lang)
|
|
337
|
+
best_doc = top3_docs[best_idx]
|
|
338
|
+
best_meta = top3_metas[best_idx]
|
|
339
|
+
best_source = best_meta.get("source", "未知")
|
|
340
|
+
|
|
341
|
+
# Step 5: 构建增强 prompt,将 top-3 全部放入,但特别标注最佳片段
|
|
342
|
+
doc_blocks = []
|
|
343
|
+
for idx, (doc, meta) in enumerate(zip(top3_docs, top3_metas), 1):
|
|
344
|
+
marker = " ★【最佳】" if idx - 1 == best_idx else ""
|
|
345
|
+
source = meta.get("source", "未知")
|
|
346
|
+
doc_blocks.append(f"片段{idx}{marker} [来源: {source}]\n{doc}")
|
|
347
|
+
|
|
348
|
+
context = "\n\n---\n\n".join(doc_blocks)
|
|
349
|
+
|
|
350
|
+
if lang == "zh":
|
|
351
|
+
prompt = f"""你是一个知识库问答助手。以下是从知识库中检索出的 Top-3 相关片段(已按相关性重排序)。
|
|
352
|
+
其中标注 ★【最佳】的片段是大模型判定最适合回答用户问题的来源。
|
|
353
|
+
|
|
354
|
+
知识片段:
|
|
355
|
+
{context}
|
|
356
|
+
|
|
357
|
+
用户问题: {question}
|
|
358
|
+
|
|
359
|
+
回答要求:
|
|
360
|
+
1. 优先基于 ★【最佳】片段进行回答
|
|
361
|
+
2. 如果最佳片段不足以完整回答,可以综合其他片段补充
|
|
362
|
+
3. 如果所有片段都不足以回答,请明确说明
|
|
363
|
+
4. 引用来源时请标注 [来源: 文件名]
|
|
364
|
+
5. 请用中文给出准确、简洁的回答
|
|
365
|
+
"""
|
|
366
|
+
else:
|
|
367
|
+
prompt = f"""You are a knowledge base Q&A assistant. Below are the Top-3 relevant snippets retrieved from the knowledge base (re-ranked by relevance).
|
|
368
|
+
The snippet marked with ★【BEST】has been judged by the model as the most suitable source for answering the user's question.
|
|
369
|
+
|
|
370
|
+
Knowledge Snippets:
|
|
371
|
+
{context}
|
|
372
|
+
|
|
373
|
+
User Question: {question}
|
|
374
|
+
|
|
375
|
+
Instructions:
|
|
376
|
+
1. Prioritize the ★【BEST】snippet for your answer
|
|
377
|
+
2. If the best snippet is insufficient, you may supplement with other snippets
|
|
378
|
+
3. If none of the snippets can answer the question, state so clearly
|
|
379
|
+
4. Cite sources as [Source: filename]
|
|
380
|
+
5. Give an accurate and concise answer
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
from fr_cli.core.stream import stream_cnt
|
|
384
|
+
messages = [{"role": "user", "content": prompt}]
|
|
385
|
+
result, _, _ = stream_cnt(client, model, messages, lang, custom_prefix="", max_tokens=4096)
|
|
386
|
+
return result, None
|
|
387
|
+
|
|
388
|
+
# ---------- 后台监控 ----------
|
|
389
|
+
|
|
390
|
+
def start_watcher(self, kb_dir=None):
|
|
391
|
+
"""启动后台线程监控目录变化"""
|
|
392
|
+
target = Path(kb_dir) if kb_dir else self.kb_dir
|
|
393
|
+
if not target:
|
|
394
|
+
return False, "知识库目录未设置"
|
|
395
|
+
|
|
396
|
+
self.kb_dir = target
|
|
397
|
+
self._stop_watcher.clear()
|
|
398
|
+
|
|
399
|
+
def _watch():
|
|
400
|
+
while not self._stop_watcher.is_set():
|
|
401
|
+
self.sync_directory()
|
|
402
|
+
time.sleep(30) # 每30秒扫描一次
|
|
403
|
+
|
|
404
|
+
self._watcher_thread = threading.Thread(target=_watch, daemon=True)
|
|
405
|
+
self._watcher_thread.start()
|
|
406
|
+
return True, f"后台监控已启动: {target}(每30秒扫描)"
|
|
407
|
+
|
|
408
|
+
def stop_watcher(self):
|
|
409
|
+
self._stop_watcher.set()
|
|
410
|
+
self._watcher_thread = None
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# ---------- 全局单例 ----------
|
|
414
|
+
_rag_manager = None
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def get_rag_manager(kb_dir=None):
|
|
418
|
+
global _rag_manager
|
|
419
|
+
if _rag_manager is None:
|
|
420
|
+
_rag_manager = RAGManager(kb_dir=kb_dir)
|
|
421
|
+
if kb_dir and _rag_manager.kb_dir != Path(kb_dir):
|
|
422
|
+
_rag_manager.kb_dir = Path(kb_dir)
|
|
423
|
+
return _rag_manager
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def handle_rag(user_input, state):
|
|
427
|
+
"""处理 @RAG 前缀的请求"""
|
|
428
|
+
from fr_cli.ui.ui import CYAN, GREEN, RED, YELLOW, DIM, RESET
|
|
429
|
+
|
|
430
|
+
question = user_input[len("@RAG"):].strip()
|
|
431
|
+
if not question:
|
|
432
|
+
print(f"{RED}用法: @RAG <问题>{RESET}")
|
|
433
|
+
return
|
|
434
|
+
|
|
435
|
+
# 检查知识库目录
|
|
436
|
+
kb_dir = state.cfg.get("rag_dir", "")
|
|
437
|
+
if not kb_dir:
|
|
438
|
+
print(f"{YELLOW}未设置知识库目录。{RESET}")
|
|
439
|
+
path = input(f"{DIM}请输入知识库目录路径: {RESET}").strip()
|
|
440
|
+
if not path or not Path(path).exists():
|
|
441
|
+
print(f"{RED}目录不存在。{RESET}")
|
|
442
|
+
return
|
|
443
|
+
state.cfg["rag_dir"] = path
|
|
444
|
+
from fr_cli.conf.config import save_config
|
|
445
|
+
save_config(state.cfg)
|
|
446
|
+
kb_dir = path
|
|
447
|
+
|
|
448
|
+
mgr = get_rag_manager(kb_dir)
|
|
449
|
+
|
|
450
|
+
# 首次同步
|
|
451
|
+
print(f"{CYAN}📚 正在同步知识库...{RESET}")
|
|
452
|
+
ok, msg = mgr.sync_directory()
|
|
453
|
+
if ok:
|
|
454
|
+
print(f"{GREEN}{msg}{RESET}")
|
|
455
|
+
else:
|
|
456
|
+
print(f"{YELLOW}{msg}{RESET}")
|
|
457
|
+
|
|
458
|
+
# 如果独立守护进程在运行,不启动内置 watcher
|
|
459
|
+
watcher = RAGWatcherManager()
|
|
460
|
+
if watcher.is_running():
|
|
461
|
+
print(f"{DIM}ℹ️ 独立守护进程正在后台运行,知识库将自动同步。{RESET}")
|
|
462
|
+
else:
|
|
463
|
+
# 启动内置后台监控(如果未启动)
|
|
464
|
+
if mgr._watcher_thread is None or not mgr._watcher_thread.is_alive():
|
|
465
|
+
mgr.start_watcher()
|
|
466
|
+
|
|
467
|
+
print(f"{CYAN}🔍 正在检索知识库并生成回答...{RESET}")
|
|
468
|
+
result, err = mgr.query(question, state.client, state.model_name, state.lang)
|
|
469
|
+
if err:
|
|
470
|
+
print(f"{RED}{err}{RESET}")
|
|
471
|
+
else:
|
|
472
|
+
print(f"\n{GREEN}{result}{RESET}")
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
# ---------- 独立守护进程管理器 ----------
|
|
476
|
+
|
|
477
|
+
import subprocess
|
|
478
|
+
|
|
479
|
+
RAG_WATCHER_PID_FILE = Path.home() / ".fr_cli_rag_watcher.pid"
|
|
480
|
+
RAG_WATCHER_STOP_FILE = Path.home() / ".fr_cli_rag_watcher.stop"
|
|
481
|
+
RAG_WATCHER_LOG_FILE = Path.home() / ".fr_cli_rag_watcher.log"
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
class RAGWatcherManager:
|
|
485
|
+
"""RAG 知识库独立守护进程管理器 —— 藏经阁主宰
|
|
486
|
+
负责在主进程之外独立启动/停止/监控知识库文件监听守护进程。
|
|
487
|
+
守护进程脱离终端运行,用户退出 fr-cli 后仍继续工作。
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
@staticmethod
|
|
491
|
+
def _daemon_script_path():
|
|
492
|
+
return Path(__file__).with_name("rag_watcher_daemon.py")
|
|
493
|
+
|
|
494
|
+
@staticmethod
|
|
495
|
+
def _read_pid():
|
|
496
|
+
if RAG_WATCHER_PID_FILE.exists():
|
|
497
|
+
try:
|
|
498
|
+
return int(RAG_WATCHER_PID_FILE.read_text(encoding="utf-8").strip())
|
|
499
|
+
except Exception:
|
|
500
|
+
pass
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
@staticmethod
|
|
504
|
+
def _is_pid_alive(pid):
|
|
505
|
+
"""跨平台检测进程是否存活"""
|
|
506
|
+
try:
|
|
507
|
+
if sys.platform == "win32":
|
|
508
|
+
import ctypes
|
|
509
|
+
kernel32 = ctypes.windll.kernel32
|
|
510
|
+
handle = kernel32.OpenProcess(1, False, pid)
|
|
511
|
+
if handle:
|
|
512
|
+
kernel32.CloseHandle(handle)
|
|
513
|
+
return True
|
|
514
|
+
return False
|
|
515
|
+
else:
|
|
516
|
+
os.kill(pid, 0)
|
|
517
|
+
return True
|
|
518
|
+
except (OSError, ProcessLookupError):
|
|
519
|
+
return False
|
|
520
|
+
|
|
521
|
+
@staticmethod
|
|
522
|
+
def _cleanup_files():
|
|
523
|
+
for f in (RAG_WATCHER_PID_FILE, RAG_WATCHER_STOP_FILE):
|
|
524
|
+
if f.exists():
|
|
525
|
+
try:
|
|
526
|
+
f.unlink()
|
|
527
|
+
except Exception:
|
|
528
|
+
pass
|
|
529
|
+
|
|
530
|
+
def is_running(self):
|
|
531
|
+
pid = self._read_pid()
|
|
532
|
+
if pid and self._is_pid_alive(pid):
|
|
533
|
+
return True
|
|
534
|
+
if RAG_WATCHER_PID_FILE.exists():
|
|
535
|
+
self._cleanup_files()
|
|
536
|
+
return False
|
|
537
|
+
|
|
538
|
+
def start(self, kb_dir, db_path=None, interval=30):
|
|
539
|
+
"""启动独立守护进程"""
|
|
540
|
+
if self.is_running():
|
|
541
|
+
pid = self._read_pid()
|
|
542
|
+
return False, f"RAG 守护进程已在运行 (PID: {pid})"
|
|
543
|
+
|
|
544
|
+
self._cleanup_files()
|
|
545
|
+
daemon_script = self._daemon_script_path()
|
|
546
|
+
if not daemon_script.exists():
|
|
547
|
+
return False, f"守护进程脚本不存在: {daemon_script}"
|
|
548
|
+
|
|
549
|
+
target = Path(kb_dir)
|
|
550
|
+
if not target.exists():
|
|
551
|
+
return False, f"知识库目录不存在: {kb_dir}"
|
|
552
|
+
|
|
553
|
+
try:
|
|
554
|
+
kwargs = {}
|
|
555
|
+
if sys.platform == "win32":
|
|
556
|
+
kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
|
|
557
|
+
|
|
558
|
+
cmd = [
|
|
559
|
+
sys.executable, str(daemon_script),
|
|
560
|
+
"--kb_dir", str(target.resolve()),
|
|
561
|
+
"--interval", str(max(5, interval)),
|
|
562
|
+
]
|
|
563
|
+
if db_path:
|
|
564
|
+
cmd.extend(["--db_path", str(db_path)])
|
|
565
|
+
|
|
566
|
+
proc = subprocess.Popen(
|
|
567
|
+
cmd,
|
|
568
|
+
stdout=subprocess.DEVNULL,
|
|
569
|
+
stderr=subprocess.DEVNULL,
|
|
570
|
+
stdin=subprocess.DEVNULL,
|
|
571
|
+
close_fds=True,
|
|
572
|
+
**kwargs
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# 等待 PID 文件写入
|
|
576
|
+
for _ in range(10):
|
|
577
|
+
time.sleep(0.3)
|
|
578
|
+
pid = self._read_pid()
|
|
579
|
+
if pid and self._is_pid_alive(pid):
|
|
580
|
+
return True, f"RAG 守护进程已启动 (PID: {pid})"
|
|
581
|
+
if proc.poll() is not None:
|
|
582
|
+
return False, "守护进程启动后立即退出,请检查日志: ~/.fr_cli_rag_watcher.log"
|
|
583
|
+
|
|
584
|
+
return True, f"RAG 守护进程已启动 (PID: {proc.pid})"
|
|
585
|
+
except Exception as e:
|
|
586
|
+
return False, f"启动失败: {e}"
|
|
587
|
+
|
|
588
|
+
def stop(self):
|
|
589
|
+
"""停止独立守护进程"""
|
|
590
|
+
pid = self._read_pid()
|
|
591
|
+
if not pid:
|
|
592
|
+
self._cleanup_files()
|
|
593
|
+
return False, "RAG 守护进程未运行。"
|
|
594
|
+
|
|
595
|
+
if not self._is_pid_alive(pid):
|
|
596
|
+
self._cleanup_files()
|
|
597
|
+
return False, "RAG 守护进程未运行(已清理残留状态)。"
|
|
598
|
+
|
|
599
|
+
# 写入停止标记
|
|
600
|
+
try:
|
|
601
|
+
RAG_WATCHER_STOP_FILE.write_text("1", encoding="utf-8")
|
|
602
|
+
except Exception as e:
|
|
603
|
+
return False, f"发送停止信号失败: {e}"
|
|
604
|
+
|
|
605
|
+
# 等待进程自行退出
|
|
606
|
+
for _ in range(15):
|
|
607
|
+
if not self._is_pid_alive(pid):
|
|
608
|
+
self._cleanup_files()
|
|
609
|
+
return True, "RAG 守护进程已停止。"
|
|
610
|
+
time.sleep(0.5)
|
|
611
|
+
|
|
612
|
+
# 强制终止
|
|
613
|
+
try:
|
|
614
|
+
if sys.platform == "win32":
|
|
615
|
+
os.kill(pid, signal.CTRL_BREAK_EVENT)
|
|
616
|
+
else:
|
|
617
|
+
os.kill(pid, signal.SIGTERM)
|
|
618
|
+
except ProcessLookupError:
|
|
619
|
+
pass
|
|
620
|
+
except Exception:
|
|
621
|
+
pass
|
|
622
|
+
|
|
623
|
+
for _ in range(5):
|
|
624
|
+
if not self._is_pid_alive(pid):
|
|
625
|
+
self._cleanup_files()
|
|
626
|
+
return True, "RAG 守护进程已停止。"
|
|
627
|
+
time.sleep(0.5)
|
|
628
|
+
|
|
629
|
+
self._cleanup_files()
|
|
630
|
+
return True, "RAG 守护进程已强制停止。"
|
|
631
|
+
|
|
632
|
+
def status(self):
|
|
633
|
+
"""查询守护进程状态"""
|
|
634
|
+
pid = self._read_pid()
|
|
635
|
+
if not pid:
|
|
636
|
+
return "RAG 守护进程未运行。"
|
|
637
|
+
if self._is_pid_alive(pid):
|
|
638
|
+
return f"RAG 守护进程运行中 (PID: {pid})"
|
|
639
|
+
self._cleanup_files()
|
|
640
|
+
return "RAG 守护进程未运行(已清理残留状态)。"
|
|
641
|
+
|
|
642
|
+
@staticmethod
|
|
643
|
+
def get_log(lines=50):
|
|
644
|
+
"""读取守护进程日志最后 N 行"""
|
|
645
|
+
if not RAG_WATCHER_LOG_FILE.exists():
|
|
646
|
+
return "暂无日志。"
|
|
647
|
+
try:
|
|
648
|
+
with open(RAG_WATCHER_LOG_FILE, "r", encoding="utf-8") as f:
|
|
649
|
+
all_lines = f.readlines()
|
|
650
|
+
return "".join(all_lines[-lines:])
|
|
651
|
+
except Exception as e:
|
|
652
|
+
return f"读取日志失败: {e}"
|