jarvis-ai-assistant 0.1.90__py3-none-any.whl → 0.1.92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

jarvis/jarvis_rag/main.py CHANGED
@@ -1,13 +1,10 @@
1
1
  import os
2
- import hashlib
3
2
  import numpy as np
4
3
  import faiss
5
4
  from typing import List, Tuple, Optional, Dict
6
- from sentence_transformers import SentenceTransformer
7
5
  import pickle
8
- from jarvis.utils import OutputType, PrettyOutput, find_git_root, get_max_context_length, load_embedding_model, load_rerank_model
6
+ from jarvis.utils import OutputType, PrettyOutput, get_file_md5, get_max_context_length, load_embedding_model, load_rerank_model
9
7
  from jarvis.utils import load_env_from_file
10
- import tiktoken
11
8
  from dataclasses import dataclass
12
9
  from tqdm import tqdm
13
10
  import fitz # PyMuPDF for PDF files
@@ -16,12 +13,16 @@ from pathlib import Path
16
13
  from jarvis.models.registry import PlatformRegistry
17
14
  import shutil
18
15
  from datetime import datetime
16
+ import lzma # 添加 lzma 导入
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from threading import Lock
19
19
 
20
20
  @dataclass
21
21
  class Document:
22
22
  """文档类,用于存储文档内容和元数据"""
23
23
  content: str # 文档内容
24
24
  metadata: Dict # 元数据(文件路径、位置等)
25
+ md5: str = "" # 文件MD5值,用于增量更新检测
25
26
 
26
27
  class FileProcessor:
27
28
  """文件处理器基类"""
@@ -163,7 +164,9 @@ class RAGTool:
163
164
  # 初始化缓存和索引
164
165
  self.cache_path = os.path.join(self.data_dir, "cache.pkl")
165
166
  self.documents: List[Document] = []
166
- self.index = None
167
+ self.index = None # 用于搜索的IVF索引
168
+ self.flat_index = None # 用于存储原始向量
169
+ self.file_md5_cache = {} # 用于存储文件的MD5值
167
170
 
168
171
  # 加载缓存
169
172
  self._load_cache()
@@ -175,17 +178,23 @@ class RAGTool:
175
178
  DocxProcessor()
176
179
  ]
177
180
 
181
+ # 添加线程相关配置
182
+ self.thread_count = int(os.environ.get("JARVIS_THREAD_COUNT", os.cpu_count() or 4))
183
+ self.vector_lock = Lock() # 用于保护向量列表的并发访问
184
+
178
185
  def _load_cache(self):
179
186
  """加载缓存数据"""
180
187
  if os.path.exists(self.cache_path):
181
188
  try:
182
- with open(self.cache_path, 'rb') as f:
189
+ with lzma.open(self.cache_path, 'rb') as f:
183
190
  cache_data = pickle.load(f)
184
191
  self.documents = cache_data["documents"]
185
192
  vectors = cache_data["vectors"]
193
+ self.file_md5_cache = cache_data.get("file_md5_cache", {}) # 加载MD5缓存
186
194
 
187
195
  # 重建索引
188
- self._build_index(vectors)
196
+ if vectors is not None:
197
+ self._build_index(vectors)
189
198
  PrettyOutput.print(f"加载了 {len(self.documents)} 个文档片段",
190
199
  output_type=OutputType.INFO)
191
200
  except Exception as e:
@@ -193,16 +202,18 @@ class RAGTool:
193
202
  output_type=OutputType.WARNING)
194
203
  self.documents = []
195
204
  self.index = None
205
+ self.flat_index = None
206
+ self.file_md5_cache = {}
196
207
 
197
208
  def _save_cache(self, vectors: np.ndarray):
198
209
  """优化缓存保存"""
199
210
  try:
200
- # 添加版本号和时间戳
201
211
  cache_data = {
202
212
  "version": "1.0",
203
213
  "timestamp": datetime.now().isoformat(),
204
214
  "documents": self.documents,
205
- "vectors": vectors,
215
+ "vectors": vectors.copy() if vectors is not None else None, # 创建数组的副本
216
+ "file_md5_cache": dict(self.file_md5_cache), # 创建字典的副本
206
217
  "metadata": {
207
218
  "vector_dim": self.vector_dim,
208
219
  "total_docs": len(self.documents),
@@ -210,9 +221,12 @@ class RAGTool:
210
221
  }
211
222
  }
212
223
 
213
- # 使用压缩存储
214
- with open(self.cache_path, 'wb') as f:
215
- pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL)
224
+ # 先将数据序列化为字节流
225
+ data = pickle.dumps(cache_data, protocol=pickle.HIGHEST_PROTOCOL)
226
+
227
+ # 然后使用 LZMA 压缩字节流
228
+ with lzma.open(self.cache_path, 'wb') as f:
229
+ f.write(data)
216
230
 
217
231
  # 创建备份
218
232
  backup_path = f"{self.cache_path}.backup"
@@ -223,22 +237,29 @@ class RAGTool:
223
237
  except Exception as e:
224
238
  PrettyOutput.print(f"保存缓存失败: {str(e)}",
225
239
  output_type=OutputType.ERROR)
240
+ raise
226
241
 
227
242
  def _build_index(self, vectors: np.ndarray):
228
243
  """构建FAISS索引"""
229
- # 添加IVF索引以提高大规模检索性能
244
+ if vectors.shape[0] == 0:
245
+ self.index = None
246
+ self.flat_index = None
247
+ return
248
+
249
+ # 创建扁平索引存储原始向量,用于重建
250
+ self.flat_index = faiss.IndexFlatIP(self.vector_dim)
251
+ self.flat_index.add(vectors)
252
+
253
+ # 创建IVF索引用于快速搜索
230
254
  nlist = max(4, int(vectors.shape[0] / 1000)) # 每1000个向量一个聚类中心
231
255
  quantizer = faiss.IndexFlatIP(self.vector_dim)
232
256
  self.index = faiss.IndexIVFFlat(quantizer, self.vector_dim, nlist, faiss.METRIC_INNER_PRODUCT)
233
257
 
234
- if vectors.shape[0] > 0:
235
- # 训练IVF索引
236
- self.index.train(vectors)
237
- self.index.add(vectors)
238
- # 设置搜索时探测的聚类数
239
- self.index.nprobe = min(nlist, 10)
240
- else:
241
- self.index = None
258
+ # 训练并添加向量
259
+ self.index.train(vectors)
260
+ self.index.add(vectors)
261
+ # 设置搜索时探测的聚类数
262
+ self.index.nprobe = min(nlist, 10)
242
263
 
243
264
  def _split_text(self, text: str) -> List[str]:
244
265
  """使用更智能的分块策略"""
@@ -302,16 +323,58 @@ class RAGTool:
302
323
  show_progress_bar=False)
303
324
  return np.array(embedding, dtype=np.float32)
304
325
 
305
- def _process_file(self, file_path: str) -> List[Document]:
306
- """处理单个文件
326
+ def _get_embedding_batch(self, texts: List[str]) -> np.ndarray:
327
+ """批量获取文本的向量表示
307
328
 
308
329
  Args:
309
- file_path: 文件路径
330
+ texts: 文本列表
310
331
 
311
332
  Returns:
312
- 文档对象列表
333
+ np.ndarray: 向量表示数组
313
334
  """
314
335
  try:
336
+ embeddings = self.embedding_model.encode(texts,
337
+ normalize_embeddings=True,
338
+ show_progress_bar=False,
339
+ batch_size=32) # 使用批处理提高效率
340
+ return np.array(embeddings, dtype=np.float32)
341
+ except Exception as e:
342
+ PrettyOutput.print(f"获取向量表示失败: {str(e)}",
343
+ output_type=OutputType.ERROR)
344
+ return np.zeros((len(texts), self.vector_dim), dtype=np.float32)
345
+
346
+ def _process_document_batch(self, documents: List[Document]) -> List[np.ndarray]:
347
+ """处理一批文档的向量化
348
+
349
+ Args:
350
+ documents: 文档列表
351
+
352
+ Returns:
353
+ List[np.ndarray]: 向量列表
354
+ """
355
+ texts = []
356
+ for doc in documents:
357
+ # 组合文档信息
358
+ combined_text = f"""
359
+ 文件: {doc.metadata['file_path']}
360
+ 内容: {doc.content}
361
+ """
362
+ texts.append(combined_text)
363
+
364
+ return self._get_embedding_batch(texts)
365
+
366
+ def _process_file(self, file_path: str) -> List[Document]:
367
+ """处理单个文件"""
368
+ try:
369
+ # 计算文件MD5
370
+ current_md5 = get_file_md5(file_path)
371
+ if not current_md5:
372
+ return []
373
+
374
+ # 检查文件是否需要重新处理
375
+ if file_path in self.file_md5_cache and self.file_md5_cache[file_path] == current_md5:
376
+ return []
377
+
315
378
  # 查找合适的处理器
316
379
  processor = None
317
380
  for p in self.file_processors:
@@ -320,18 +383,14 @@ class RAGTool:
320
383
  break
321
384
 
322
385
  if not processor:
323
- PrettyOutput.print(f"跳过不支持的文件: {file_path}",
324
- output_type=OutputType.WARNING)
386
+ # 如果找不到合适的处理器,则返回一个空的文档
325
387
  return []
326
388
 
327
389
  # 提取文本内容
328
390
  content = processor.extract_text(file_path)
329
391
  if not content.strip():
330
- PrettyOutput.print(f"文件内容为空: {file_path}",
331
- output_type=OutputType.WARNING)
332
392
  return []
333
393
 
334
-
335
394
  # 分割文本
336
395
  chunks = self._split_text(content)
337
396
 
@@ -345,10 +404,13 @@ class RAGTool:
345
404
  "file_type": Path(file_path).suffix.lower(),
346
405
  "chunk_index": i,
347
406
  "total_chunks": len(chunks)
348
- }
407
+ },
408
+ md5=current_md5
349
409
  )
350
410
  documents.append(doc)
351
-
411
+
412
+ # 更新MD5缓存
413
+ self.file_md5_cache[file_path] = current_md5
352
414
  return documents
353
415
 
354
416
  except Exception as e:
@@ -361,43 +423,117 @@ class RAGTool:
361
423
  # 获取所有文件
362
424
  all_files = []
363
425
  for root, _, files in os.walk(dir):
364
- # 忽略特定目录
365
426
  if any(ignored in root for ignored in ['.git', '__pycache__', 'node_modules']) or \
366
427
  any(part.startswith('.jarvis-') for part in root.split(os.sep)):
367
428
  continue
368
429
  for file in files:
369
- # 忽略 .jarvis- 开头的文件
370
430
  if file.startswith('.jarvis-'):
371
431
  continue
372
432
 
373
433
  file_path = os.path.join(root, file)
374
- # 跳过大文件
375
434
  if os.path.getsize(file_path) > 100 * 1024 * 1024: # 100MB
376
435
  PrettyOutput.print(f"跳过大文件: {file_path}",
377
436
  output_type=OutputType.WARNING)
378
437
  continue
379
438
  all_files.append(file_path)
380
439
 
381
- # 处理所有文件
382
- self.documents = []
383
- for file_path in tqdm(all_files, desc="处理文件"):
384
- docs = self._process_file(file_path)
385
- self.documents.extend(docs)
440
+ # 清理已删除文件的缓存
441
+ deleted_files = set(self.file_md5_cache.keys()) - set(all_files)
442
+ for file_path in deleted_files:
443
+ del self.file_md5_cache[file_path]
444
+ # 移除相关的文档
445
+ self.documents = [doc for doc in self.documents if doc.metadata['file_path'] != file_path]
386
446
 
387
- # 获取所有文档的向量表示
388
- vectors = []
389
- for doc in tqdm(self.documents, desc="生成向量"):
390
- vector = self._get_embedding(doc.content)
391
- vectors.append(vector)
447
+ # 检查文件变化
448
+ files_to_process = []
449
+ unchanged_files = []
450
+
451
+ with tqdm(total=len(all_files), desc="检查文件状态") as pbar:
452
+ for file_path in all_files:
453
+ current_md5 = get_file_md5(file_path)
454
+ if current_md5: # 只处理能成功计算MD5的文件
455
+ if file_path in self.file_md5_cache and self.file_md5_cache[file_path] == current_md5:
456
+ # 文件未变化,记录但不重新处理
457
+ unchanged_files.append(file_path)
458
+ else:
459
+ # 新文件或已修改的文件
460
+ files_to_process.append(file_path)
461
+ pbar.update(1)
462
+
463
+ # 保留未变化文件的文档
464
+ unchanged_documents = [doc for doc in self.documents
465
+ if doc.metadata['file_path'] in unchanged_files]
466
+
467
+ # 处理新文件和修改的文件
468
+ new_documents = []
469
+ if files_to_process:
470
+ with tqdm(total=len(files_to_process), desc="处理文件") as pbar:
471
+ for file_path in files_to_process:
472
+ try:
473
+ docs = self._process_file(file_path)
474
+ if len(docs) > 0:
475
+ new_documents.extend(docs)
476
+ except Exception as e:
477
+ PrettyOutput.print(f"处理文件失败 {file_path}: {str(e)}",
478
+ output_type=OutputType.ERROR)
479
+ pbar.update(1)
480
+
481
+ # 更新文档列表
482
+ self.documents = unchanged_documents + new_documents
483
+
484
+ if not self.documents:
485
+ PrettyOutput.print("没有需要处理的文档", output_type=OutputType.WARNING)
486
+ return
487
+
488
+ # 只对新文档进行向量化
489
+ if new_documents:
490
+ PrettyOutput.print(f"开始处理 {len(new_documents)} 个新文档",
491
+ output_type=OutputType.INFO)
492
+
493
+ # 使用线程池并发处理向量化
494
+ batch_size = 32
495
+ new_vectors = []
496
+
497
+ with tqdm(total=len(new_documents), desc="生成向量") as pbar:
498
+ with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
499
+ for i in range(0, len(new_documents), batch_size):
500
+ batch = new_documents[i:i + batch_size]
501
+ future = executor.submit(self._process_document_batch, batch)
502
+ batch_vectors = future.result()
503
+
504
+ with self.vector_lock:
505
+ new_vectors.extend(batch_vectors)
506
+
507
+ pbar.update(len(batch))
508
+
509
+ # 合并新旧向量
510
+ if self.flat_index is not None:
511
+ # 获取未变化文档的向量
512
+ unchanged_vectors = []
513
+ for doc in unchanged_documents:
514
+ # 从现有索引中提取向量
515
+ doc_idx = next((i for i, d in enumerate(self.documents)
516
+ if d.metadata['file_path'] == doc.metadata['file_path']), None)
517
+ if doc_idx is not None:
518
+ # 从扁平索引中重建向量
519
+ vector = np.zeros((1, self.vector_dim), dtype=np.float32)
520
+ self.flat_index.reconstruct(doc_idx, vector.ravel())
521
+ unchanged_vectors.append(vector)
522
+
523
+ if unchanged_vectors:
524
+ unchanged_vectors = np.vstack(unchanged_vectors)
525
+ vectors = np.vstack([unchanged_vectors, np.vstack(new_vectors)])
526
+ else:
527
+ vectors = np.vstack(new_vectors)
528
+ else:
529
+ vectors = np.vstack(new_vectors)
392
530
 
393
- if vectors:
394
- vectors = np.vstack(vectors)
395
531
  # 构建索引
396
532
  self._build_index(vectors)
397
533
  # 保存缓存
398
534
  self._save_cache(vectors)
399
-
400
- PrettyOutput.print(f"成功索引了 {len(self.documents)} 个文档片段",
535
+
536
+ PrettyOutput.print(f"成功索引了 {len(self.documents)} 个文档片段 (新增/修改: {len(new_documents)}, 未变化: {len(unchanged_documents)})",
401
537
  output_type=OutputType.SUCCESS)
402
538
 
403
539
  def search(self, query: str, top_k: int = 30) -> List[Tuple[Document, float]]:
@@ -4,6 +4,8 @@ import os
4
4
  import sys
5
5
  import readline
6
6
  from typing import Optional
7
+ from yaspin import yaspin
8
+ from yaspin.spinners import Spinners
7
9
 
8
10
  from jarvis.models.registry import PlatformRegistry
9
11
  from jarvis.utils import PrettyOutput, OutputType, load_env_from_file
@@ -11,7 +13,7 @@ from jarvis.utils import PrettyOutput, OutputType, load_env_from_file
11
13
  def execute_command(command: str) -> None:
12
14
  """显示命令并允许用户编辑,回车执行,Ctrl+C取消"""
13
15
  try:
14
- print("生成的命令 (可以编辑,回车执行,Ctrl+C取消):")
16
+ print("\n生成的命令 (可以编辑,回车执行,Ctrl+C取消):")
15
17
  # 预填充输入行
16
18
  readline.set_startup_hook(lambda: readline.insert_text(command))
17
19
  try:
@@ -55,12 +57,6 @@ def process_request(request: str) -> Optional[str]:
55
57
  4. 不要添加任何换行或额外空格
56
58
  5. 如果需要多个命令,使用 && 连接
57
59
 
58
- 安全要求:
59
- - 生成的命令必须是安全的,不能包含危险操作
60
- - 如果需要sudo权限,要明确提示用户
61
- - 对于复杂操作,优先使用管道而不是临时文件
62
- - 确保命令的可移植性,优先使用通用的POSIX命令
63
-
64
60
  示例输入:
65
61
  "查找当前目录下的所有Python文件"
66
62
 
@@ -74,14 +70,19 @@ find . -name "*.py"
74
70
  prefix = f"当前路径: {current_path}\n"
75
71
  prefix += f"当前shell: {shell}\n"
76
72
 
77
- # 处理请求
78
- result = model.chat(prefix + request)
79
-
80
- # 提取命令 - 简化处理逻辑,因为现在应该只返回纯命令
81
- if result and isinstance(result, str):
82
- return result.strip()
83
-
84
- return None
73
+ # 使用yaspin显示Thinking状态
74
+ with yaspin(Spinners.dots, text="Thinking", color="yellow") as spinner:
75
+ # 处理请求
76
+ result = model.chat(prefix + request)
77
+
78
+ # 提取命令
79
+ if result and isinstance(result, str):
80
+ command = result.strip()
81
+ spinner.ok("✓")
82
+ return command
83
+
84
+ spinner.fail("✗")
85
+ return None
85
86
 
86
87
  except Exception as e:
87
88
  PrettyOutput.print(f"处理请求时发生错误: {str(e)}", OutputType.ERROR)
jarvis/main.py CHANGED
@@ -54,6 +54,15 @@ def load_tasks() -> dict:
54
54
  PrettyOutput.print("Warning: .jarvis file should contain a dictionary of task_name: task_description", OutputType.ERROR)
55
55
  except Exception as e:
56
56
  PrettyOutput.print(f"Error loading .jarvis file: {str(e)}", OutputType.ERROR)
57
+
58
+ # 读取方法论
59
+ method_path = os.path.expanduser("~/.jarvis_methodology")
60
+ if os.path.exists(method_path):
61
+ with open(method_path, "r", encoding="utf-8") as f:
62
+ methodology = yaml.safe_load(f)
63
+ if isinstance(methodology, dict):
64
+ for name, desc in methodology.items():
65
+ tasks[f"执行方法论:{str(name)}"] = str(desc)
57
66
 
58
67
  return tasks
59
68
 
jarvis/models/ollama.py CHANGED
@@ -29,15 +29,15 @@ class OllamaPlatform(BasePlatform):
29
29
  PrettyOutput.print("1. 安装 Ollama: https://ollama.ai", OutputType.INFO)
30
30
  PrettyOutput.print("2. 下载模型:", OutputType.INFO)
31
31
  PrettyOutput.print(f" ollama pull {self.model_name}", OutputType.INFO)
32
- raise Exception("No available models found")
32
+ PrettyOutput.print("Ollama没有可用的模型", OutputType.WARNING)
33
33
 
34
34
  except requests.exceptions.ConnectionError:
35
- PrettyOutput.print("\nOllama 服务未启动或无法连接", OutputType.ERROR)
35
+ PrettyOutput.print("\nOllama 服务未启动或无法连接", OutputType.WARNING)
36
36
  PrettyOutput.print("请确保已经:", OutputType.INFO)
37
37
  PrettyOutput.print("1. 安装了 Ollama: https://ollama.ai", OutputType.INFO)
38
38
  PrettyOutput.print("2. 启动了 Ollama 服务", OutputType.INFO)
39
39
  PrettyOutput.print("3. 服务地址配置正确 (默认: http://localhost:11434)", OutputType.INFO)
40
- raise Exception("Ollama service is not available")
40
+
41
41
 
42
42
  self.messages = []
43
43
  self.system_message = ""
jarvis/models/openai.py CHANGED
@@ -8,6 +8,10 @@ class OpenAIModel(BasePlatform):
8
8
  """DeepSeek模型实现"""
9
9
 
10
10
  platform_name = "openai"
11
+
12
+ def upload_files(self, file_list: List[str]):
13
+ """上传文件"""
14
+ PrettyOutput.print("OpenAI 不支持上传文件", OutputType.WARNING)
11
15
 
12
16
  def __init__(self):
13
17
  """
@@ -19,7 +23,7 @@ class OpenAIModel(BasePlatform):
19
23
  if not self.api_key:
20
24
  PrettyOutput.print("\n需要设置以下环境变量才能使用 OpenAI 模型:", OutputType.INFO)
21
25
  PrettyOutput.print(" • OPENAI_API_KEY: API 密钥", OutputType.INFO)
22
- PrettyOutput.print(" • OPENAI_API_BASE: (可选) API 基础地址,默认使用 https://api.openai.com", OutputType.INFO)
26
+ PrettyOutput.print(" • OPENAI_API_BASE: (可选) API 基础地址,默认使用 https://api.openai.com/v1", OutputType.INFO)
23
27
  PrettyOutput.print("\n可以通过以下方式设置:", OutputType.INFO)
24
28
  PrettyOutput.print("1. 创建或编辑 ~/.jarvis_env 文件:", OutputType.INFO)
25
29
  PrettyOutput.print(" OPENAI_API_KEY=your_api_key", OutputType.INFO)
@@ -31,7 +35,7 @@ class OpenAIModel(BasePlatform):
31
35
  PrettyOutput.print(" export OPENAI_MODEL_NAME=your_model_name", OutputType.INFO)
32
36
  PrettyOutput.print("OPENAI_API_KEY未设置", OutputType.WARNING)
33
37
 
34
- self.base_url = os.getenv("OPENAI_API_BASE", "https://api.openai.com")
38
+ self.base_url = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
35
39
  self.model_name = os.getenv("JARVIS_MODEL") or "gpt-4o"
36
40
 
37
41
 
@@ -0,0 +1,149 @@
1
+ from typing import Dict, Any
2
+ from jarvis.utils import OutputType, PrettyOutput, load_env_from_file
3
+ from jarvis.models.registry import PlatformRegistry
4
+
5
+ class ThinkerTool:
6
+ name = "thinker"
7
+ description = "使用思维链推理方式分析复杂问题,适用于需要多步推理、逻辑分析或创造性思考的场景"
8
+ parameters = {
9
+ "type": "object",
10
+ "properties": {
11
+ "question": {
12
+ "type": "string",
13
+ "description": "需要分析的问题或任务"
14
+ },
15
+ "context": {
16
+ "type": "string",
17
+ "description": "问题相关的上下文信息或背景知识",
18
+ "default": ""
19
+ },
20
+ "goal": {
21
+ "type": "string",
22
+ "description": "期望达成的具体目标或结果",
23
+ "default": ""
24
+ }
25
+ },
26
+ "required": ["question"]
27
+ }
28
+
29
+ def __init__(self):
30
+ """初始化思考工具"""
31
+ self.model = PlatformRegistry.get_global_platform_registry().get_thinking_platform()
32
+
33
+ def _generate_prompt(self, question: str, context: str, goal: str) -> str:
34
+ """生成提示词
35
+
36
+ Args:
37
+ question: 问题
38
+ context: 上下文
39
+ goal: 期望目标
40
+
41
+ Returns:
42
+ str: 完整的提示词
43
+ """
44
+ # 基础提示词
45
+ prompt = f"""你是一个擅长深度思考和逻辑推理的助手。请帮助分析问题并给出解决方案。
46
+
47
+ 请按以下方式思考:
48
+ 1. 仔细理解问题和目标
49
+ 2. 进行系统性分析和推理
50
+ 3. 考虑多个可能的解决方案
51
+ 4. 给出最佳建议和具体行动步骤
52
+
53
+ 问题:
54
+ {question}
55
+ """
56
+ # 如果有目标,添加到提示词中
57
+ if goal:
58
+ prompt += f"""
59
+ 期望目标:
60
+ {goal}
61
+ """
62
+
63
+ # 如果有上下文,添加到提示词中
64
+ if context:
65
+ prompt += f"""
66
+ 相关上下文:
67
+ {context}
68
+ """
69
+
70
+ prompt += "\n请开始分析:"
71
+ return prompt
72
+
73
+ def execute(self, args: Dict[str, Any]) -> Dict[str, Any]:
74
+ """执行思考分析
75
+
76
+ Args:
77
+ args: 包含参数的字典
78
+ - question: 问题
79
+ - context: 上下文(可选)
80
+ - goal: 期望目标(可选)
81
+
82
+ Returns:
83
+ Dict[str, Any]: 执行结果
84
+ """
85
+ try:
86
+ # 获取参数
87
+ question = args["question"]
88
+ context = args.get("context", "")
89
+ goal = args.get("goal", "")
90
+
91
+ # 生成提示词
92
+ prompt = self._generate_prompt(question, context, goal)
93
+
94
+ # 记录开始分析
95
+ PrettyOutput.print(f"开始分析问题: {question}", OutputType.INFO)
96
+ if context:
97
+ PrettyOutput.print("包含上下文信息", OutputType.INFO)
98
+ if goal:
99
+ PrettyOutput.print(f"目标: {goal}", OutputType.INFO)
100
+
101
+ # 调用模型进行分析
102
+ response = self.model.chat(prompt)
103
+
104
+ if not response:
105
+ return {
106
+ "success": False,
107
+ "error": "未能获得有效的分析结果"
108
+ }
109
+
110
+ return {
111
+ "success": True,
112
+ "stdout": response,
113
+ "stderr": ""
114
+ }
115
+
116
+ except Exception as e:
117
+ PrettyOutput.print(f"思考分析失败: {str(e)}", OutputType.ERROR)
118
+ return {
119
+ "success": False,
120
+ "error": f"执行失败: {str(e)}"
121
+ }
122
+
123
+ def main():
124
+ """命令行直接运行工具"""
125
+ import argparse
126
+
127
+ load_env_from_file()
128
+
129
+ parser = argparse.ArgumentParser(description='深度思考分析工具')
130
+ parser.add_argument('--question', required=True, help='需要分析的问题')
131
+ parser.add_argument('--context', help='问题相关的上下文信息')
132
+ parser.add_argument('--goal', help='期望达成的具体目标或结果')
133
+ args = parser.parse_args()
134
+
135
+ tool = ThinkerTool()
136
+ result = tool.execute({
137
+ "question": args.question,
138
+ "context": args.context,
139
+ "goal": args.goal
140
+ })
141
+
142
+ if result["success"]:
143
+ PrettyOutput.print("\n分析结果:", OutputType.INFO)
144
+ PrettyOutput.print(result["stdout"], OutputType.INFO)
145
+ else:
146
+ PrettyOutput.print(result["error"], OutputType.ERROR)
147
+
148
+ if __name__ == "__main__":
149
+ main()
jarvis/utils.py CHANGED
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  from pathlib import Path
2
3
  import sys
3
4
  import time
@@ -276,4 +277,7 @@ def get_max_context_length():
276
277
  return int(os.getenv('JARVIS_MAX_CONTEXT_LENGTH', '131072')) # 默认128k
277
278
 
278
279
  def get_thread_count():
279
- return int(os.getenv('JARVIS_THREAD_COUNT', '1'))
280
+ return int(os.getenv('JARVIS_THREAD_COUNT', '1'))
281
+
282
+ def get_file_md5(filepath: str)->str:
283
+ return hashlib.md5(open(filepath, "rb").read(100*1024*1024)).hexdigest()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: jarvis-ai-assistant
3
- Version: 0.1.90
3
+ Version: 0.1.92
4
4
  Summary: Jarvis: An AI assistant that uses tools to interact with the system
5
5
  Home-page: https://github.com/skyfireitdiy/Jarvis
6
6
  Author: skyfire
@@ -53,6 +53,8 @@ Requires-Dist: PyMuPDF>=1.21.0
53
53
  Requires-Dist: python-docx>=0.8.11
54
54
  Requires-Dist: tiktoken>=0.3.0
55
55
  Requires-Dist: tqdm>=4.65.0
56
+ Requires-Dist: docx>=0.2.4
57
+ Requires-Dist: yaspin>=2.5.0
56
58
  Provides-Extra: dev
57
59
  Requires-Dist: pytest; extra == "dev"
58
60
  Requires-Dist: black; extra == "dev"