jarvis-ai-assistant 0.1.91__py3-none-any.whl → 0.1.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/agent.py +2 -0
- jarvis/jarvis_codebase/main.py +268 -176
- jarvis/jarvis_platform/main.py +13 -2
- jarvis/jarvis_rag/main.py +185 -49
- jarvis/jarvis_smart_shell/main.py +16 -9
- jarvis/main.py +9 -0
- jarvis/models/ollama.py +3 -3
- jarvis/tools/thinker.py +25 -79
- jarvis/utils.py +5 -1
- {jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.92.dist-info}/METADATA +3 -1
- {jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.92.dist-info}/RECORD +16 -16
- {jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.92.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.92.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.92.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.92.dist-info}/top_level.txt +0 -0
jarvis/jarvis_rag/main.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import hashlib
|
|
3
2
|
import numpy as np
|
|
4
3
|
import faiss
|
|
5
4
|
from typing import List, Tuple, Optional, Dict
|
|
6
|
-
from sentence_transformers import SentenceTransformer
|
|
7
5
|
import pickle
|
|
8
|
-
from jarvis.utils import OutputType, PrettyOutput,
|
|
6
|
+
from jarvis.utils import OutputType, PrettyOutput, get_file_md5, get_max_context_length, load_embedding_model, load_rerank_model
|
|
9
7
|
from jarvis.utils import load_env_from_file
|
|
10
|
-
import tiktoken
|
|
11
8
|
from dataclasses import dataclass
|
|
12
9
|
from tqdm import tqdm
|
|
13
10
|
import fitz # PyMuPDF for PDF files
|
|
@@ -16,12 +13,16 @@ from pathlib import Path
|
|
|
16
13
|
from jarvis.models.registry import PlatformRegistry
|
|
17
14
|
import shutil
|
|
18
15
|
from datetime import datetime
|
|
16
|
+
import lzma # 添加 lzma 导入
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
18
|
+
from threading import Lock
|
|
19
19
|
|
|
20
20
|
@dataclass
|
|
21
21
|
class Document:
|
|
22
22
|
"""文档类,用于存储文档内容和元数据"""
|
|
23
23
|
content: str # 文档内容
|
|
24
24
|
metadata: Dict # 元数据(文件路径、位置等)
|
|
25
|
+
md5: str = "" # 文件MD5值,用于增量更新检测
|
|
25
26
|
|
|
26
27
|
class FileProcessor:
|
|
27
28
|
"""文件处理器基类"""
|
|
@@ -163,7 +164,9 @@ class RAGTool:
|
|
|
163
164
|
# 初始化缓存和索引
|
|
164
165
|
self.cache_path = os.path.join(self.data_dir, "cache.pkl")
|
|
165
166
|
self.documents: List[Document] = []
|
|
166
|
-
self.index = None
|
|
167
|
+
self.index = None # 用于搜索的IVF索引
|
|
168
|
+
self.flat_index = None # 用于存储原始向量
|
|
169
|
+
self.file_md5_cache = {} # 用于存储文件的MD5值
|
|
167
170
|
|
|
168
171
|
# 加载缓存
|
|
169
172
|
self._load_cache()
|
|
@@ -175,17 +178,23 @@ class RAGTool:
|
|
|
175
178
|
DocxProcessor()
|
|
176
179
|
]
|
|
177
180
|
|
|
181
|
+
# 添加线程相关配置
|
|
182
|
+
self.thread_count = int(os.environ.get("JARVIS_THREAD_COUNT", os.cpu_count() or 4))
|
|
183
|
+
self.vector_lock = Lock() # 用于保护向量列表的并发访问
|
|
184
|
+
|
|
178
185
|
def _load_cache(self):
|
|
179
186
|
"""加载缓存数据"""
|
|
180
187
|
if os.path.exists(self.cache_path):
|
|
181
188
|
try:
|
|
182
|
-
with open(self.cache_path, 'rb') as f:
|
|
189
|
+
with lzma.open(self.cache_path, 'rb') as f:
|
|
183
190
|
cache_data = pickle.load(f)
|
|
184
191
|
self.documents = cache_data["documents"]
|
|
185
192
|
vectors = cache_data["vectors"]
|
|
193
|
+
self.file_md5_cache = cache_data.get("file_md5_cache", {}) # 加载MD5缓存
|
|
186
194
|
|
|
187
195
|
# 重建索引
|
|
188
|
-
|
|
196
|
+
if vectors is not None:
|
|
197
|
+
self._build_index(vectors)
|
|
189
198
|
PrettyOutput.print(f"加载了 {len(self.documents)} 个文档片段",
|
|
190
199
|
output_type=OutputType.INFO)
|
|
191
200
|
except Exception as e:
|
|
@@ -193,16 +202,18 @@ class RAGTool:
|
|
|
193
202
|
output_type=OutputType.WARNING)
|
|
194
203
|
self.documents = []
|
|
195
204
|
self.index = None
|
|
205
|
+
self.flat_index = None
|
|
206
|
+
self.file_md5_cache = {}
|
|
196
207
|
|
|
197
208
|
def _save_cache(self, vectors: np.ndarray):
|
|
198
209
|
"""优化缓存保存"""
|
|
199
210
|
try:
|
|
200
|
-
# 添加版本号和时间戳
|
|
201
211
|
cache_data = {
|
|
202
212
|
"version": "1.0",
|
|
203
213
|
"timestamp": datetime.now().isoformat(),
|
|
204
214
|
"documents": self.documents,
|
|
205
|
-
"vectors": vectors,
|
|
215
|
+
"vectors": vectors.copy() if vectors is not None else None, # 创建数组的副本
|
|
216
|
+
"file_md5_cache": dict(self.file_md5_cache), # 创建字典的副本
|
|
206
217
|
"metadata": {
|
|
207
218
|
"vector_dim": self.vector_dim,
|
|
208
219
|
"total_docs": len(self.documents),
|
|
@@ -210,9 +221,12 @@ class RAGTool:
|
|
|
210
221
|
}
|
|
211
222
|
}
|
|
212
223
|
|
|
213
|
-
#
|
|
214
|
-
|
|
215
|
-
|
|
224
|
+
# 先将数据序列化为字节流
|
|
225
|
+
data = pickle.dumps(cache_data, protocol=pickle.HIGHEST_PROTOCOL)
|
|
226
|
+
|
|
227
|
+
# 然后使用 LZMA 压缩字节流
|
|
228
|
+
with lzma.open(self.cache_path, 'wb') as f:
|
|
229
|
+
f.write(data)
|
|
216
230
|
|
|
217
231
|
# 创建备份
|
|
218
232
|
backup_path = f"{self.cache_path}.backup"
|
|
@@ -223,22 +237,29 @@ class RAGTool:
|
|
|
223
237
|
except Exception as e:
|
|
224
238
|
PrettyOutput.print(f"保存缓存失败: {str(e)}",
|
|
225
239
|
output_type=OutputType.ERROR)
|
|
240
|
+
raise
|
|
226
241
|
|
|
227
242
|
def _build_index(self, vectors: np.ndarray):
|
|
228
243
|
"""构建FAISS索引"""
|
|
229
|
-
|
|
244
|
+
if vectors.shape[0] == 0:
|
|
245
|
+
self.index = None
|
|
246
|
+
self.flat_index = None
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
# 创建扁平索引存储原始向量,用于重建
|
|
250
|
+
self.flat_index = faiss.IndexFlatIP(self.vector_dim)
|
|
251
|
+
self.flat_index.add(vectors)
|
|
252
|
+
|
|
253
|
+
# 创建IVF索引用于快速搜索
|
|
230
254
|
nlist = max(4, int(vectors.shape[0] / 1000)) # 每1000个向量一个聚类中心
|
|
231
255
|
quantizer = faiss.IndexFlatIP(self.vector_dim)
|
|
232
256
|
self.index = faiss.IndexIVFFlat(quantizer, self.vector_dim, nlist, faiss.METRIC_INNER_PRODUCT)
|
|
233
257
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
self.index.nprobe = min(nlist, 10)
|
|
240
|
-
else:
|
|
241
|
-
self.index = None
|
|
258
|
+
# 训练并添加向量
|
|
259
|
+
self.index.train(vectors)
|
|
260
|
+
self.index.add(vectors)
|
|
261
|
+
# 设置搜索时探测的聚类数
|
|
262
|
+
self.index.nprobe = min(nlist, 10)
|
|
242
263
|
|
|
243
264
|
def _split_text(self, text: str) -> List[str]:
|
|
244
265
|
"""使用更智能的分块策略"""
|
|
@@ -302,16 +323,58 @@ class RAGTool:
|
|
|
302
323
|
show_progress_bar=False)
|
|
303
324
|
return np.array(embedding, dtype=np.float32)
|
|
304
325
|
|
|
305
|
-
def
|
|
306
|
-
"""
|
|
326
|
+
def _get_embedding_batch(self, texts: List[str]) -> np.ndarray:
|
|
327
|
+
"""批量获取文本的向量表示
|
|
307
328
|
|
|
308
329
|
Args:
|
|
309
|
-
|
|
330
|
+
texts: 文本列表
|
|
310
331
|
|
|
311
332
|
Returns:
|
|
312
|
-
|
|
333
|
+
np.ndarray: 向量表示数组
|
|
313
334
|
"""
|
|
314
335
|
try:
|
|
336
|
+
embeddings = self.embedding_model.encode(texts,
|
|
337
|
+
normalize_embeddings=True,
|
|
338
|
+
show_progress_bar=False,
|
|
339
|
+
batch_size=32) # 使用批处理提高效率
|
|
340
|
+
return np.array(embeddings, dtype=np.float32)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
PrettyOutput.print(f"获取向量表示失败: {str(e)}",
|
|
343
|
+
output_type=OutputType.ERROR)
|
|
344
|
+
return np.zeros((len(texts), self.vector_dim), dtype=np.float32)
|
|
345
|
+
|
|
346
|
+
def _process_document_batch(self, documents: List[Document]) -> List[np.ndarray]:
|
|
347
|
+
"""处理一批文档的向量化
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
documents: 文档列表
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
List[np.ndarray]: 向量列表
|
|
354
|
+
"""
|
|
355
|
+
texts = []
|
|
356
|
+
for doc in documents:
|
|
357
|
+
# 组合文档信息
|
|
358
|
+
combined_text = f"""
|
|
359
|
+
文件: {doc.metadata['file_path']}
|
|
360
|
+
内容: {doc.content}
|
|
361
|
+
"""
|
|
362
|
+
texts.append(combined_text)
|
|
363
|
+
|
|
364
|
+
return self._get_embedding_batch(texts)
|
|
365
|
+
|
|
366
|
+
def _process_file(self, file_path: str) -> List[Document]:
|
|
367
|
+
"""处理单个文件"""
|
|
368
|
+
try:
|
|
369
|
+
# 计算文件MD5
|
|
370
|
+
current_md5 = get_file_md5(file_path)
|
|
371
|
+
if not current_md5:
|
|
372
|
+
return []
|
|
373
|
+
|
|
374
|
+
# 检查文件是否需要重新处理
|
|
375
|
+
if file_path in self.file_md5_cache and self.file_md5_cache[file_path] == current_md5:
|
|
376
|
+
return []
|
|
377
|
+
|
|
315
378
|
# 查找合适的处理器
|
|
316
379
|
processor = None
|
|
317
380
|
for p in self.file_processors:
|
|
@@ -320,18 +383,14 @@ class RAGTool:
|
|
|
320
383
|
break
|
|
321
384
|
|
|
322
385
|
if not processor:
|
|
323
|
-
|
|
324
|
-
output_type=OutputType.WARNING)
|
|
386
|
+
# 如果找不到合适的处理器,则返回一个空的文档
|
|
325
387
|
return []
|
|
326
388
|
|
|
327
389
|
# 提取文本内容
|
|
328
390
|
content = processor.extract_text(file_path)
|
|
329
391
|
if not content.strip():
|
|
330
|
-
PrettyOutput.print(f"文件内容为空: {file_path}",
|
|
331
|
-
output_type=OutputType.WARNING)
|
|
332
392
|
return []
|
|
333
393
|
|
|
334
|
-
|
|
335
394
|
# 分割文本
|
|
336
395
|
chunks = self._split_text(content)
|
|
337
396
|
|
|
@@ -345,10 +404,13 @@ class RAGTool:
|
|
|
345
404
|
"file_type": Path(file_path).suffix.lower(),
|
|
346
405
|
"chunk_index": i,
|
|
347
406
|
"total_chunks": len(chunks)
|
|
348
|
-
}
|
|
407
|
+
},
|
|
408
|
+
md5=current_md5
|
|
349
409
|
)
|
|
350
410
|
documents.append(doc)
|
|
351
|
-
|
|
411
|
+
|
|
412
|
+
# 更新MD5缓存
|
|
413
|
+
self.file_md5_cache[file_path] = current_md5
|
|
352
414
|
return documents
|
|
353
415
|
|
|
354
416
|
except Exception as e:
|
|
@@ -361,43 +423,117 @@ class RAGTool:
|
|
|
361
423
|
# 获取所有文件
|
|
362
424
|
all_files = []
|
|
363
425
|
for root, _, files in os.walk(dir):
|
|
364
|
-
# 忽略特定目录
|
|
365
426
|
if any(ignored in root for ignored in ['.git', '__pycache__', 'node_modules']) or \
|
|
366
427
|
any(part.startswith('.jarvis-') for part in root.split(os.sep)):
|
|
367
428
|
continue
|
|
368
429
|
for file in files:
|
|
369
|
-
# 忽略 .jarvis- 开头的文件
|
|
370
430
|
if file.startswith('.jarvis-'):
|
|
371
431
|
continue
|
|
372
432
|
|
|
373
433
|
file_path = os.path.join(root, file)
|
|
374
|
-
# 跳过大文件
|
|
375
434
|
if os.path.getsize(file_path) > 100 * 1024 * 1024: # 100MB
|
|
376
435
|
PrettyOutput.print(f"跳过大文件: {file_path}",
|
|
377
436
|
output_type=OutputType.WARNING)
|
|
378
437
|
continue
|
|
379
438
|
all_files.append(file_path)
|
|
380
439
|
|
|
381
|
-
#
|
|
382
|
-
self.
|
|
383
|
-
for file_path in
|
|
384
|
-
|
|
385
|
-
|
|
440
|
+
# 清理已删除文件的缓存
|
|
441
|
+
deleted_files = set(self.file_md5_cache.keys()) - set(all_files)
|
|
442
|
+
for file_path in deleted_files:
|
|
443
|
+
del self.file_md5_cache[file_path]
|
|
444
|
+
# 移除相关的文档
|
|
445
|
+
self.documents = [doc for doc in self.documents if doc.metadata['file_path'] != file_path]
|
|
386
446
|
|
|
387
|
-
#
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
447
|
+
# 检查文件变化
|
|
448
|
+
files_to_process = []
|
|
449
|
+
unchanged_files = []
|
|
450
|
+
|
|
451
|
+
with tqdm(total=len(all_files), desc="检查文件状态") as pbar:
|
|
452
|
+
for file_path in all_files:
|
|
453
|
+
current_md5 = get_file_md5(file_path)
|
|
454
|
+
if current_md5: # 只处理能成功计算MD5的文件
|
|
455
|
+
if file_path in self.file_md5_cache and self.file_md5_cache[file_path] == current_md5:
|
|
456
|
+
# 文件未变化,记录但不重新处理
|
|
457
|
+
unchanged_files.append(file_path)
|
|
458
|
+
else:
|
|
459
|
+
# 新文件或已修改的文件
|
|
460
|
+
files_to_process.append(file_path)
|
|
461
|
+
pbar.update(1)
|
|
462
|
+
|
|
463
|
+
# 保留未变化文件的文档
|
|
464
|
+
unchanged_documents = [doc for doc in self.documents
|
|
465
|
+
if doc.metadata['file_path'] in unchanged_files]
|
|
466
|
+
|
|
467
|
+
# 处理新文件和修改的文件
|
|
468
|
+
new_documents = []
|
|
469
|
+
if files_to_process:
|
|
470
|
+
with tqdm(total=len(files_to_process), desc="处理文件") as pbar:
|
|
471
|
+
for file_path in files_to_process:
|
|
472
|
+
try:
|
|
473
|
+
docs = self._process_file(file_path)
|
|
474
|
+
if len(docs) > 0:
|
|
475
|
+
new_documents.extend(docs)
|
|
476
|
+
except Exception as e:
|
|
477
|
+
PrettyOutput.print(f"处理文件失败 {file_path}: {str(e)}",
|
|
478
|
+
output_type=OutputType.ERROR)
|
|
479
|
+
pbar.update(1)
|
|
480
|
+
|
|
481
|
+
# 更新文档列表
|
|
482
|
+
self.documents = unchanged_documents + new_documents
|
|
483
|
+
|
|
484
|
+
if not self.documents:
|
|
485
|
+
PrettyOutput.print("没有需要处理的文档", output_type=OutputType.WARNING)
|
|
486
|
+
return
|
|
487
|
+
|
|
488
|
+
# 只对新文档进行向量化
|
|
489
|
+
if new_documents:
|
|
490
|
+
PrettyOutput.print(f"开始处理 {len(new_documents)} 个新文档",
|
|
491
|
+
output_type=OutputType.INFO)
|
|
492
|
+
|
|
493
|
+
# 使用线程池并发处理向量化
|
|
494
|
+
batch_size = 32
|
|
495
|
+
new_vectors = []
|
|
496
|
+
|
|
497
|
+
with tqdm(total=len(new_documents), desc="生成向量") as pbar:
|
|
498
|
+
with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
|
|
499
|
+
for i in range(0, len(new_documents), batch_size):
|
|
500
|
+
batch = new_documents[i:i + batch_size]
|
|
501
|
+
future = executor.submit(self._process_document_batch, batch)
|
|
502
|
+
batch_vectors = future.result()
|
|
503
|
+
|
|
504
|
+
with self.vector_lock:
|
|
505
|
+
new_vectors.extend(batch_vectors)
|
|
506
|
+
|
|
507
|
+
pbar.update(len(batch))
|
|
508
|
+
|
|
509
|
+
# 合并新旧向量
|
|
510
|
+
if self.flat_index is not None:
|
|
511
|
+
# 获取未变化文档的向量
|
|
512
|
+
unchanged_vectors = []
|
|
513
|
+
for doc in unchanged_documents:
|
|
514
|
+
# 从现有索引中提取向量
|
|
515
|
+
doc_idx = next((i for i, d in enumerate(self.documents)
|
|
516
|
+
if d.metadata['file_path'] == doc.metadata['file_path']), None)
|
|
517
|
+
if doc_idx is not None:
|
|
518
|
+
# 从扁平索引中重建向量
|
|
519
|
+
vector = np.zeros((1, self.vector_dim), dtype=np.float32)
|
|
520
|
+
self.flat_index.reconstruct(doc_idx, vector.ravel())
|
|
521
|
+
unchanged_vectors.append(vector)
|
|
522
|
+
|
|
523
|
+
if unchanged_vectors:
|
|
524
|
+
unchanged_vectors = np.vstack(unchanged_vectors)
|
|
525
|
+
vectors = np.vstack([unchanged_vectors, np.vstack(new_vectors)])
|
|
526
|
+
else:
|
|
527
|
+
vectors = np.vstack(new_vectors)
|
|
528
|
+
else:
|
|
529
|
+
vectors = np.vstack(new_vectors)
|
|
392
530
|
|
|
393
|
-
if vectors:
|
|
394
|
-
vectors = np.vstack(vectors)
|
|
395
531
|
# 构建索引
|
|
396
532
|
self._build_index(vectors)
|
|
397
533
|
# 保存缓存
|
|
398
534
|
self._save_cache(vectors)
|
|
399
|
-
|
|
400
|
-
PrettyOutput.print(f"成功索引了 {len(self.documents)} 个文档片段",
|
|
535
|
+
|
|
536
|
+
PrettyOutput.print(f"成功索引了 {len(self.documents)} 个文档片段 (新增/修改: {len(new_documents)}, 未变化: {len(unchanged_documents)})",
|
|
401
537
|
output_type=OutputType.SUCCESS)
|
|
402
538
|
|
|
403
539
|
def search(self, query: str, top_k: int = 30) -> List[Tuple[Document, float]]:
|
|
@@ -4,6 +4,8 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
import readline
|
|
6
6
|
from typing import Optional
|
|
7
|
+
from yaspin import yaspin
|
|
8
|
+
from yaspin.spinners import Spinners
|
|
7
9
|
|
|
8
10
|
from jarvis.models.registry import PlatformRegistry
|
|
9
11
|
from jarvis.utils import PrettyOutput, OutputType, load_env_from_file
|
|
@@ -11,7 +13,7 @@ from jarvis.utils import PrettyOutput, OutputType, load_env_from_file
|
|
|
11
13
|
def execute_command(command: str) -> None:
|
|
12
14
|
"""显示命令并允许用户编辑,回车执行,Ctrl+C取消"""
|
|
13
15
|
try:
|
|
14
|
-
print("生成的命令 (可以编辑,回车执行,Ctrl+C取消):")
|
|
16
|
+
print("\n生成的命令 (可以编辑,回车执行,Ctrl+C取消):")
|
|
15
17
|
# 预填充输入行
|
|
16
18
|
readline.set_startup_hook(lambda: readline.insert_text(command))
|
|
17
19
|
try:
|
|
@@ -68,14 +70,19 @@ find . -name "*.py"
|
|
|
68
70
|
prefix = f"当前路径: {current_path}\n"
|
|
69
71
|
prefix += f"当前shell: {shell}\n"
|
|
70
72
|
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
# 使用yaspin显示Thinking状态
|
|
74
|
+
with yaspin(Spinners.dots, text="Thinking", color="yellow") as spinner:
|
|
75
|
+
# 处理请求
|
|
76
|
+
result = model.chat(prefix + request)
|
|
77
|
+
|
|
78
|
+
# 提取命令
|
|
79
|
+
if result and isinstance(result, str):
|
|
80
|
+
command = result.strip()
|
|
81
|
+
spinner.ok("✓")
|
|
82
|
+
return command
|
|
83
|
+
|
|
84
|
+
spinner.fail("✗")
|
|
85
|
+
return None
|
|
79
86
|
|
|
80
87
|
except Exception as e:
|
|
81
88
|
PrettyOutput.print(f"处理请求时发生错误: {str(e)}", OutputType.ERROR)
|
jarvis/main.py
CHANGED
|
@@ -54,6 +54,15 @@ def load_tasks() -> dict:
|
|
|
54
54
|
PrettyOutput.print("Warning: .jarvis file should contain a dictionary of task_name: task_description", OutputType.ERROR)
|
|
55
55
|
except Exception as e:
|
|
56
56
|
PrettyOutput.print(f"Error loading .jarvis file: {str(e)}", OutputType.ERROR)
|
|
57
|
+
|
|
58
|
+
# 读取方法论
|
|
59
|
+
method_path = os.path.expanduser("~/.jarvis_methodology")
|
|
60
|
+
if os.path.exists(method_path):
|
|
61
|
+
with open(method_path, "r", encoding="utf-8") as f:
|
|
62
|
+
methodology = yaml.safe_load(f)
|
|
63
|
+
if isinstance(methodology, dict):
|
|
64
|
+
for name, desc in methodology.items():
|
|
65
|
+
tasks[f"执行方法论:{str(name)}"] = str(desc)
|
|
57
66
|
|
|
58
67
|
return tasks
|
|
59
68
|
|
jarvis/models/ollama.py
CHANGED
|
@@ -29,15 +29,15 @@ class OllamaPlatform(BasePlatform):
|
|
|
29
29
|
PrettyOutput.print("1. 安装 Ollama: https://ollama.ai", OutputType.INFO)
|
|
30
30
|
PrettyOutput.print("2. 下载模型:", OutputType.INFO)
|
|
31
31
|
PrettyOutput.print(f" ollama pull {self.model_name}", OutputType.INFO)
|
|
32
|
-
|
|
32
|
+
PrettyOutput.print("Ollama没有可用的模型", OutputType.WARNING)
|
|
33
33
|
|
|
34
34
|
except requests.exceptions.ConnectionError:
|
|
35
|
-
PrettyOutput.print("\nOllama 服务未启动或无法连接", OutputType.
|
|
35
|
+
PrettyOutput.print("\nOllama 服务未启动或无法连接", OutputType.WARNING)
|
|
36
36
|
PrettyOutput.print("请确保已经:", OutputType.INFO)
|
|
37
37
|
PrettyOutput.print("1. 安装了 Ollama: https://ollama.ai", OutputType.INFO)
|
|
38
38
|
PrettyOutput.print("2. 启动了 Ollama 服务", OutputType.INFO)
|
|
39
39
|
PrettyOutput.print("3. 服务地址配置正确 (默认: http://localhost:11434)", OutputType.INFO)
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
|
|
42
42
|
self.messages = []
|
|
43
43
|
self.system_message = ""
|
jarvis/tools/thinker.py
CHANGED
|
@@ -17,11 +17,10 @@ class ThinkerTool:
|
|
|
17
17
|
"description": "问题相关的上下文信息或背景知识",
|
|
18
18
|
"default": ""
|
|
19
19
|
},
|
|
20
|
-
"
|
|
20
|
+
"goal": {
|
|
21
21
|
"type": "string",
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"default": "chain_of_thought"
|
|
22
|
+
"description": "期望达成的具体目标或结果",
|
|
23
|
+
"default": ""
|
|
25
24
|
}
|
|
26
25
|
},
|
|
27
26
|
"required": ["question"]
|
|
@@ -31,89 +30,36 @@ class ThinkerTool:
|
|
|
31
30
|
"""初始化思考工具"""
|
|
32
31
|
self.model = PlatformRegistry.get_global_platform_registry().get_thinking_platform()
|
|
33
32
|
|
|
34
|
-
def _generate_prompt(self, question: str, context: str,
|
|
33
|
+
def _generate_prompt(self, question: str, context: str, goal: str) -> str:
|
|
35
34
|
"""生成提示词
|
|
36
35
|
|
|
37
36
|
Args:
|
|
38
37
|
question: 问题
|
|
39
38
|
context: 上下文
|
|
40
|
-
|
|
39
|
+
goal: 期望目标
|
|
41
40
|
|
|
42
41
|
Returns:
|
|
43
42
|
str: 完整的提示词
|
|
44
43
|
"""
|
|
45
44
|
# 基础提示词
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# 根据不同的思考方式添加具体指导
|
|
49
|
-
approach_prompts = {
|
|
50
|
-
"chain_of_thought": """请使用思维链方式分析问题:
|
|
51
|
-
1. 仔细阅读问题和上下文
|
|
52
|
-
2. 逐步推理,每一步都要说明推理依据
|
|
53
|
-
3. 考虑多个可能的角度
|
|
54
|
-
4. 得出最终结论
|
|
55
|
-
|
|
56
|
-
请按以下格式输出:
|
|
57
|
-
思考过程:
|
|
58
|
-
1. [第一步推理]
|
|
59
|
-
2. [第二步推理]
|
|
60
|
-
...
|
|
61
|
-
|
|
62
|
-
结论:
|
|
63
|
-
[最终结论]""",
|
|
64
|
-
|
|
65
|
-
"tree_of_thought": """请使用思维树方式分析问题:
|
|
66
|
-
1. 将问题分解为多个子问题
|
|
67
|
-
2. 对每个子问题进行分支探索
|
|
68
|
-
3. 评估每个分支的可行性
|
|
69
|
-
4. 整合最优路径
|
|
70
|
-
|
|
71
|
-
请按以下格式输出:
|
|
72
|
-
问题分解:
|
|
73
|
-
- 子问题1
|
|
74
|
-
- 分支1.1
|
|
75
|
-
- 分支1.2
|
|
76
|
-
- 子问题2
|
|
77
|
-
- 分支2.1
|
|
78
|
-
- 分支2.2
|
|
79
|
-
|
|
80
|
-
分析过程:
|
|
81
|
-
[详细分析每个分支]
|
|
82
|
-
|
|
83
|
-
最优路径:
|
|
84
|
-
[说明选择原因]
|
|
85
|
-
|
|
86
|
-
结论:
|
|
87
|
-
[最终结论]""",
|
|
45
|
+
prompt = f"""你是一个擅长深度思考和逻辑推理的助手。请帮助分析问题并给出解决方案。
|
|
88
46
|
|
|
89
|
-
|
|
90
|
-
1.
|
|
91
|
-
2.
|
|
92
|
-
3.
|
|
93
|
-
4.
|
|
94
|
-
|
|
95
|
-
请按以下格式输出:
|
|
96
|
-
步骤分解:
|
|
97
|
-
步骤1: [具体内容]
|
|
98
|
-
步骤2: [具体内容]
|
|
99
|
-
...
|
|
100
|
-
|
|
101
|
-
执行分析:
|
|
102
|
-
[详细分析每个步骤]
|
|
103
|
-
|
|
104
|
-
解决方案:
|
|
105
|
-
[完整方案]"""
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
# 构建完整提示词
|
|
109
|
-
prompt = f"""{base_prompt}
|
|
110
|
-
|
|
111
|
-
{approach_prompts[approach]}
|
|
47
|
+
请按以下方式思考:
|
|
48
|
+
1. 仔细理解问题和目标
|
|
49
|
+
2. 进行系统性分析和推理
|
|
50
|
+
3. 考虑多个可能的解决方案
|
|
51
|
+
4. 给出最佳建议和具体行动步骤
|
|
112
52
|
|
|
113
53
|
问题:
|
|
114
54
|
{question}
|
|
115
|
-
|
|
116
55
|
"""
|
|
56
|
+
# 如果有目标,添加到提示词中
|
|
57
|
+
if goal:
|
|
58
|
+
prompt += f"""
|
|
59
|
+
期望目标:
|
|
60
|
+
{goal}
|
|
61
|
+
"""
|
|
62
|
+
|
|
117
63
|
# 如果有上下文,添加到提示词中
|
|
118
64
|
if context:
|
|
119
65
|
prompt += f"""
|
|
@@ -131,7 +77,7 @@ class ThinkerTool:
|
|
|
131
77
|
args: 包含参数的字典
|
|
132
78
|
- question: 问题
|
|
133
79
|
- context: 上下文(可选)
|
|
134
|
-
-
|
|
80
|
+
- goal: 期望目标(可选)
|
|
135
81
|
|
|
136
82
|
Returns:
|
|
137
83
|
Dict[str, Any]: 执行结果
|
|
@@ -140,16 +86,17 @@ class ThinkerTool:
|
|
|
140
86
|
# 获取参数
|
|
141
87
|
question = args["question"]
|
|
142
88
|
context = args.get("context", "")
|
|
143
|
-
|
|
89
|
+
goal = args.get("goal", "")
|
|
144
90
|
|
|
145
91
|
# 生成提示词
|
|
146
|
-
prompt = self._generate_prompt(question, context,
|
|
92
|
+
prompt = self._generate_prompt(question, context, goal)
|
|
147
93
|
|
|
148
94
|
# 记录开始分析
|
|
149
95
|
PrettyOutput.print(f"开始分析问题: {question}", OutputType.INFO)
|
|
150
96
|
if context:
|
|
151
97
|
PrettyOutput.print("包含上下文信息", OutputType.INFO)
|
|
152
|
-
|
|
98
|
+
if goal:
|
|
99
|
+
PrettyOutput.print(f"目标: {goal}", OutputType.INFO)
|
|
153
100
|
|
|
154
101
|
# 调用模型进行分析
|
|
155
102
|
response = self.model.chat(prompt)
|
|
@@ -182,15 +129,14 @@ def main():
|
|
|
182
129
|
parser = argparse.ArgumentParser(description='深度思考分析工具')
|
|
183
130
|
parser.add_argument('--question', required=True, help='需要分析的问题')
|
|
184
131
|
parser.add_argument('--context', help='问题相关的上下文信息')
|
|
185
|
-
parser.add_argument('--
|
|
186
|
-
default='chain_of_thought', help='思考方式')
|
|
132
|
+
parser.add_argument('--goal', help='期望达成的具体目标或结果')
|
|
187
133
|
args = parser.parse_args()
|
|
188
134
|
|
|
189
135
|
tool = ThinkerTool()
|
|
190
136
|
result = tool.execute({
|
|
191
137
|
"question": args.question,
|
|
192
138
|
"context": args.context,
|
|
193
|
-
"
|
|
139
|
+
"goal": args.goal
|
|
194
140
|
})
|
|
195
141
|
|
|
196
142
|
if result["success"]:
|
jarvis/utils.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
import sys
|
|
3
4
|
import time
|
|
@@ -276,4 +277,7 @@ def get_max_context_length():
|
|
|
276
277
|
return int(os.getenv('JARVIS_MAX_CONTEXT_LENGTH', '131072')) # 默认128k
|
|
277
278
|
|
|
278
279
|
def get_thread_count():
|
|
279
|
-
return int(os.getenv('JARVIS_THREAD_COUNT', '1'))
|
|
280
|
+
return int(os.getenv('JARVIS_THREAD_COUNT', '1'))
|
|
281
|
+
|
|
282
|
+
def get_file_md5(filepath: str)->str:
|
|
283
|
+
return hashlib.md5(open(filepath, "rb").read(100*1024*1024)).hexdigest()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: jarvis-ai-assistant
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.92
|
|
4
4
|
Summary: Jarvis: An AI assistant that uses tools to interact with the system
|
|
5
5
|
Home-page: https://github.com/skyfireitdiy/Jarvis
|
|
6
6
|
Author: skyfire
|
|
@@ -53,6 +53,8 @@ Requires-Dist: PyMuPDF>=1.21.0
|
|
|
53
53
|
Requires-Dist: python-docx>=0.8.11
|
|
54
54
|
Requires-Dist: tiktoken>=0.3.0
|
|
55
55
|
Requires-Dist: tqdm>=4.65.0
|
|
56
|
+
Requires-Dist: docx>=0.2.4
|
|
57
|
+
Requires-Dist: yaspin>=2.5.0
|
|
56
58
|
Provides-Extra: dev
|
|
57
59
|
Requires-Dist: pytest; extra == "dev"
|
|
58
60
|
Requires-Dist: black; extra == "dev"
|