jarvis-ai-assistant 0.1.76__py3-none-any.whl → 0.1.77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

jarvis/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Jarvis AI Assistant"""
2
2
 
3
- __version__ = "0.1.76"
3
+ __version__ = "0.1.77"
jarvis/agent.py CHANGED
@@ -8,7 +8,7 @@ import json
8
8
 
9
9
  from .models.registry import PlatformRegistry
10
10
  from .tools import ToolRegistry
11
- from .utils import PrettyOutput, OutputType, get_multiline_input, while_success
11
+ from .utils import PrettyOutput, OutputType, get_multiline_input, load_embedding_model, while_success
12
12
  import os
13
13
  from datetime import datetime
14
14
  from prompt_toolkit import prompt
@@ -37,15 +37,15 @@ class Agent:
37
37
 
38
38
  # 初始化嵌入模型
39
39
  try:
40
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
41
- PrettyOutput.print(f"正在加载嵌入模型: {self.embedding_model_name}...", OutputType.INFO)
42
- self.embedding_model = SentenceTransformer(self.embedding_model_name)
40
+ self.embedding_model = load_embedding_model(self.embedding_model_name)
43
41
 
44
42
  # 预热模型并获取正确的维度
45
43
  test_text = "这是一段测试文本,用于确保模型完全加载。"
46
- test_embedding = self.embedding_model.encode(test_text,
47
- convert_to_tensor=True,
48
- normalize_embeddings=True)
44
+ test_embedding = self.embedding_model.encode(
45
+ test_text,
46
+ convert_to_tensor=True,
47
+ normalize_embeddings=True
48
+ )
49
49
  self.embedding_dimension = len(test_embedding)
50
50
  PrettyOutput.print("嵌入模型加载完成", OutputType.SUCCESS)
51
51
 
@@ -291,37 +291,6 @@ class Agent:
291
291
  self.prompt = summary_prompt
292
292
  return self._call_model(self.prompt)
293
293
 
294
- def choose_tools(self, user_input: str) -> List[Dict]:
295
- """根据用户输入选择工具"""
296
- PrettyOutput.print("选择工具...", OutputType.PLANNING)
297
- tools = self.tool_registry.get_all_tools()
298
- prompt = f"""你是一个工具选择专家,请根据用户输入选择合适的工具,返回可能使用到的工具的名称。以下是可用工具:
299
- """
300
- for tool in tools:
301
- prompt += f"- {tool['name']}: {tool['description']}\n"
302
- prompt += f"用户输入: {user_input}\n"
303
- prompt += f"请返回可能使用到的工具的名称,如果无法确定,请返回空列表。"
304
- prompt += f"返回的格式为:\n"
305
- prompt += f"<TOOL_CHOICE_START>\n"
306
- prompt += f"tool_name1\n"
307
- prompt += f"tool_name2\n"
308
- prompt += f"<TOOL_CHOICE_END>\n"
309
- model = PlatformRegistry.get_global_platform()
310
- model.set_suppress_output(True)
311
- try:
312
- response = model.chat(prompt)
313
- response = response.replace("<TOOL_CHOICE_START>", "").replace("<TOOL_CHOICE_END>", "")
314
- tools_name = response.split("\n")
315
- choosed_tools = []
316
- for tool_name in tools_name:
317
- for tool in tools:
318
- if tool['name'] == tool_name:
319
- choosed_tools.append(tool)
320
- break
321
- return choosed_tools
322
- except Exception as e:
323
- PrettyOutput.print(f"工具选择失败: {str(e)}", OutputType.ERROR)
324
- return []
325
294
 
326
295
  def run(self, user_input: str, file_list: Optional[List[str]] = None, keep_history: bool = False) -> str:
327
296
  """处理用户输入并返回响应,返回任务总结报告
@@ -350,7 +319,7 @@ class Agent:
350
319
  tools_prompt = ""
351
320
 
352
321
  # 选择工具
353
- tools = self.choose_tools(user_input)
322
+ tools = self.tool_registry.get_all_tools()
354
323
  if tools:
355
324
  tools_prompt += "可用工具:\n"
356
325
  for tool in tools:
@@ -7,7 +7,7 @@ from jarvis.models.registry import PlatformRegistry
7
7
  import concurrent.futures
8
8
  from threading import Lock
9
9
  from concurrent.futures import ThreadPoolExecutor
10
- from jarvis.utils import OutputType, PrettyOutput, find_git_root
10
+ from jarvis.utils import OutputType, PrettyOutput, find_git_root, load_embedding_model
11
11
  from jarvis.utils import load_env_from_file
12
12
  import argparse
13
13
  from sentence_transformers import SentenceTransformer
@@ -43,9 +43,7 @@ class CodeBase:
43
43
 
44
44
  # 初始化嵌入模型,使用系统默认缓存目录
45
45
  try:
46
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
47
- PrettyOutput.print("正在加载/下载模型,请稍候...", output_type=OutputType.INFO)
48
- self.embedding_model = SentenceTransformer(self.embedding_model_name)
46
+ self.embedding_model = load_embedding_model(self.embedding_model_name)
49
47
 
50
48
  # 强制完全加载所有模型组件
51
49
  test_text = """
jarvis/rag/__init__.py ADDED
File without changes
jarvis/rag/main.py ADDED
@@ -0,0 +1,483 @@
1
+ import os
2
+ import hashlib
3
+ import numpy as np
4
+ import faiss
5
+ from typing import List, Tuple, Optional, Dict
6
+ from sentence_transformers import SentenceTransformer
7
+ import pickle
8
+ from jarvis.utils import OutputType, PrettyOutput, find_git_root, load_embedding_model
9
+ from jarvis.utils import load_env_from_file
10
+ import tiktoken
11
+ from dataclasses import dataclass
12
+ from tqdm import tqdm
13
+ import fitz # PyMuPDF for PDF files
14
+ from docx import Document as DocxDocument # python-docx for DOCX files
15
+ from pathlib import Path
16
+
17
+ @dataclass
18
+ class Document:
19
+ """文档类,用于存储文档内容和元数据"""
20
+ content: str # 文档内容
21
+ metadata: Dict # 元数据(文件路径、位置等)
22
+
23
+ class FileProcessor:
24
+ """文件处理器基类"""
25
+ @staticmethod
26
+ def can_handle(file_path: str) -> bool:
27
+ """判断是否可以处理该文件"""
28
+ raise NotImplementedError
29
+
30
+ @staticmethod
31
+ def extract_text(file_path: str) -> str:
32
+ """提取文件文本内容"""
33
+ raise NotImplementedError
34
+
35
+ class TextFileProcessor(FileProcessor):
36
+ """文本文件处理器"""
37
+ ENCODINGS = ['utf-8', 'gbk', 'gb2312', 'latin1']
38
+ SAMPLE_SIZE = 8192 # 读取前8KB来检测编码
39
+
40
+ @staticmethod
41
+ def can_handle(file_path: str) -> bool:
42
+ """判断文件是否为文本文件,通过尝试解码来判断"""
43
+ try:
44
+ # 读取文件开头的一小部分来检测
45
+ with open(file_path, 'rb') as f:
46
+ sample = f.read(TextFileProcessor.SAMPLE_SIZE)
47
+
48
+ # 检查是否包含空字节(通常表示二进制文件)
49
+ if b'\x00' in sample:
50
+ return False
51
+
52
+ # 检查是否包含过多的非打印字符(通常表示二进制文件)
53
+ non_printable = sum(1 for byte in sample if byte < 32 and byte not in (9, 10, 13)) # tab, newline, carriage return
54
+ if non_printable / len(sample) > 0.3: # 如果非打印字符超过30%,认为是二进制文件
55
+ return False
56
+
57
+ # 尝试用不同编码解码
58
+ for encoding in TextFileProcessor.ENCODINGS:
59
+ try:
60
+ sample.decode(encoding)
61
+ return True
62
+ except UnicodeDecodeError:
63
+ continue
64
+
65
+ return False
66
+
67
+ except Exception:
68
+ return False
69
+
70
+ @staticmethod
71
+ def extract_text(file_path: str) -> str:
72
+ """提取文本内容,使用检测到的正确编码"""
73
+ detected_encoding = None
74
+ try:
75
+ # 首先尝试检测编码
76
+ with open(file_path, 'rb') as f:
77
+ raw_data = f.read()
78
+
79
+ # 尝试不同的编码
80
+ for encoding in TextFileProcessor.ENCODINGS:
81
+ try:
82
+ raw_data.decode(encoding)
83
+ detected_encoding = encoding
84
+ break
85
+ except UnicodeDecodeError:
86
+ continue
87
+
88
+ if not detected_encoding:
89
+ raise UnicodeDecodeError(f"无法用支持的编码解码文件: {file_path}")
90
+
91
+ # 使用检测到的编码读取文件
92
+ with open(file_path, 'r', encoding=detected_encoding, errors='replace') as f:
93
+ content = f.read()
94
+
95
+ # 规范化Unicode字符
96
+ import unicodedata
97
+ content = unicodedata.normalize('NFKC', content)
98
+
99
+ return content
100
+
101
+ except Exception as e:
102
+ raise Exception(f"读取文件失败: {str(e)}")
103
+
104
+ class PDFProcessor(FileProcessor):
105
+ """PDF文件处理器"""
106
+ @staticmethod
107
+ def can_handle(file_path: str) -> bool:
108
+ return Path(file_path).suffix.lower() == '.pdf'
109
+
110
+ @staticmethod
111
+ def extract_text(file_path: str) -> str:
112
+ text_parts = []
113
+ with fitz.open(file_path) as doc:
114
+ for page in doc:
115
+ text_parts.append(page.get_text())
116
+ return "\n".join(text_parts)
117
+
118
+ class DocxProcessor(FileProcessor):
119
+ """DOCX文件处理器"""
120
+ @staticmethod
121
+ def can_handle(file_path: str) -> bool:
122
+ return Path(file_path).suffix.lower() == '.docx'
123
+
124
+ @staticmethod
125
+ def extract_text(file_path: str) -> str:
126
+ doc = DocxDocument(file_path)
127
+ return "\n".join([paragraph.text for paragraph in doc.paragraphs])
128
+
129
+ class RAGTool:
130
+ def __init__(self, root_dir: str):
131
+ """初始化RAG工具
132
+
133
+ Args:
134
+ root_dir: 项目根目录
135
+ """
136
+ load_env_from_file()
137
+ self.root_dir = root_dir
138
+ os.chdir(self.root_dir)
139
+
140
+ # 初始化配置
141
+ self.min_paragraph_length = int(os.environ.get("JARVIS_MIN_PARAGRAPH_LENGTH", "50")) # 最小段落长度
142
+ self.max_paragraph_length = int(os.environ.get("JARVIS_MAX_PARAGRAPH_LENGTH", "1000")) # 最大段落长度
143
+ self.embedding_model_name = os.environ.get("JARVIS_EMBEDDING_MODEL", "BAAI/bge-large-zh-v1.5")
144
+
145
+ # 初始化数据目录
146
+ self.data_dir = os.path.join(self.root_dir, ".jarvis-rag")
147
+ if not os.path.exists(self.data_dir):
148
+ os.makedirs(self.data_dir)
149
+
150
+ # 初始化嵌入模型
151
+ try:
152
+ self.embedding_model = load_embedding_model(self.embedding_model_name)
153
+ self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
154
+ PrettyOutput.print("模型加载完成", output_type=OutputType.SUCCESS)
155
+ except Exception as e:
156
+ PrettyOutput.print(f"加载模型失败: {str(e)}", output_type=OutputType.ERROR)
157
+ raise
158
+
159
+ # 初始化缓存和索引
160
+ self.cache_path = os.path.join(self.data_dir, "cache.pkl")
161
+ self.documents: List[Document] = []
162
+ self.index = None
163
+
164
+ # 加载缓存
165
+ self._load_cache()
166
+
167
+ # 注册文件处理器
168
+ self.file_processors = [
169
+ TextFileProcessor(),
170
+ PDFProcessor(),
171
+ DocxProcessor()
172
+ ]
173
+
174
+ def _load_cache(self):
175
+ """加载缓存数据"""
176
+ if os.path.exists(self.cache_path):
177
+ try:
178
+ with open(self.cache_path, 'rb') as f:
179
+ cache_data = pickle.load(f)
180
+ self.documents = cache_data["documents"]
181
+ vectors = cache_data["vectors"]
182
+
183
+ # 重建索引
184
+ self._build_index(vectors)
185
+ PrettyOutput.print(f"加载了 {len(self.documents)} 个文档片段",
186
+ output_type=OutputType.INFO)
187
+ except Exception as e:
188
+ PrettyOutput.print(f"加载缓存失败: {str(e)}",
189
+ output_type=OutputType.WARNING)
190
+ self.documents = []
191
+ self.index = None
192
+
193
+ def _save_cache(self, vectors: np.ndarray):
194
+ """保存缓存数据"""
195
+ try:
196
+ cache_data = {
197
+ "documents": self.documents,
198
+ "vectors": vectors
199
+ }
200
+ with open(self.cache_path, 'wb') as f:
201
+ pickle.dump(cache_data, f)
202
+ PrettyOutput.print(f"保存了 {len(self.documents)} 个文档片段",
203
+ output_type=OutputType.INFO)
204
+ except Exception as e:
205
+ PrettyOutput.print(f"保存缓存失败: {str(e)}",
206
+ output_type=OutputType.ERROR)
207
+
208
+ def _build_index(self, vectors: np.ndarray):
209
+ """构建FAISS索引"""
210
+ # 创建HNSW索引
211
+ hnsw_index = faiss.IndexHNSWFlat(self.vector_dim, 16)
212
+ hnsw_index.hnsw.efConstruction = 40
213
+ hnsw_index.hnsw.efSearch = 16
214
+
215
+ # 用IndexIDMap包装HNSW索引
216
+ self.index = faiss.IndexIDMap(hnsw_index)
217
+
218
+ # 添加向量到索引
219
+ if vectors.shape[0] > 0:
220
+ self.index.add_with_ids(vectors, np.arange(vectors.shape[0]))
221
+ else:
222
+ self.index = None
223
+
224
+ def _split_text(self, text: str) -> List[str]:
225
+ """将文本分割成段落
226
+
227
+ Args:
228
+ text: 要分割的文本
229
+
230
+ Returns:
231
+ 分割后的段落列表
232
+ """
233
+ # 首先按空行分割
234
+ paragraphs = []
235
+ current_paragraph = []
236
+
237
+ for line in text.split('\n'):
238
+ line = line.strip()
239
+ if not line: # 空行表示段落结束
240
+ if current_paragraph:
241
+ paragraph_text = ' '.join(current_paragraph)
242
+ if len(paragraph_text) >= self.min_paragraph_length:
243
+ paragraphs.append(paragraph_text)
244
+ current_paragraph = []
245
+ else:
246
+ current_paragraph.append(line)
247
+
248
+ # 处理最后一个段落
249
+ if current_paragraph:
250
+ paragraph_text = ' '.join(current_paragraph)
251
+ if len(paragraph_text) >= self.min_paragraph_length:
252
+ paragraphs.append(paragraph_text)
253
+
254
+ # 处理过长的段落
255
+ final_paragraphs = []
256
+ for paragraph in paragraphs:
257
+ if len(paragraph) <= self.max_paragraph_length:
258
+ final_paragraphs.append(paragraph)
259
+ else:
260
+ # 按句子分割过长的段落
261
+ sentences = []
262
+ current_sentence = []
263
+
264
+ # 中文句子结束标记
265
+ sentence_ends = {'。', '!', '?', '…', '.', '!', '?'}
266
+
267
+ for char in paragraph:
268
+ current_sentence.append(char)
269
+ if char in sentence_ends:
270
+ sentence = ''.join(current_sentence)
271
+ if sentence.strip():
272
+ sentences.append(sentence)
273
+ current_sentence = []
274
+
275
+ # 处理最后一个句子
276
+ if current_sentence:
277
+ sentence = ''.join(current_sentence)
278
+ if sentence.strip():
279
+ sentences.append(sentence)
280
+
281
+ # 组合句子成适当长度的段落
282
+ current_chunk = []
283
+ current_length = 0
284
+
285
+ for sentence in sentences:
286
+ sentence_length = len(sentence)
287
+ if current_length + sentence_length > self.max_paragraph_length:
288
+ if current_chunk:
289
+ final_paragraphs.append(''.join(current_chunk))
290
+ current_chunk = [sentence]
291
+ current_length = sentence_length
292
+ else:
293
+ current_chunk.append(sentence)
294
+ current_length += sentence_length
295
+
296
+ # 处理最后一个chunk
297
+ if current_chunk:
298
+ final_paragraphs.append(''.join(current_chunk))
299
+
300
+ # 过滤掉太短的段落
301
+ final_paragraphs = [p for p in final_paragraphs if len(p) >= self.min_paragraph_length]
302
+
303
+ return final_paragraphs
304
+
305
+ def _get_embedding(self, text: str) -> np.ndarray:
306
+ """获取文本的向量表示"""
307
+ embedding = self.embedding_model.encode(text,
308
+ normalize_embeddings=True,
309
+ show_progress_bar=False)
310
+ return np.array(embedding, dtype=np.float32)
311
+
312
+ def _process_file(self, file_path: str) -> List[Document]:
313
+ """处理单个文件
314
+
315
+ Args:
316
+ file_path: 文件路径
317
+
318
+ Returns:
319
+ 文档对象列表
320
+ """
321
+ try:
322
+ # 查找合适的处理器
323
+ processor = None
324
+ for p in self.file_processors:
325
+ if p.can_handle(file_path):
326
+ processor = p
327
+ break
328
+
329
+ if not processor:
330
+ PrettyOutput.print(f"跳过不支持的文件: {file_path}",
331
+ output_type=OutputType.WARNING)
332
+ return []
333
+
334
+ # 提取文本内容
335
+ content = processor.extract_text(file_path)
336
+ if not content.strip():
337
+ PrettyOutput.print(f"文件内容为空: {file_path}",
338
+ output_type=OutputType.WARNING)
339
+ return []
340
+
341
+
342
+ # 分割文本
343
+ chunks = self._split_text(content)
344
+
345
+ # 创建文档对象
346
+ documents = []
347
+ for i, chunk in enumerate(chunks):
348
+ doc = Document(
349
+ content=chunk,
350
+ metadata={
351
+ "file_path": file_path,
352
+ "file_type": Path(file_path).suffix.lower(),
353
+ "chunk_index": i,
354
+ "total_chunks": len(chunks)
355
+ }
356
+ )
357
+ documents.append(doc)
358
+
359
+ return documents
360
+
361
+ except Exception as e:
362
+ PrettyOutput.print(f"处理文件失败 {file_path}: {str(e)}",
363
+ output_type=OutputType.ERROR)
364
+ return []
365
+
366
+ def build_index(self):
367
+ """构建文档索引"""
368
+ # 获取所有文件
369
+ all_files = []
370
+ for root, _, files in os.walk(self.root_dir):
371
+ if any(ignored in root for ignored in ['.jarvis-rag', '.git', '__pycache__', 'node_modules']):
372
+ continue
373
+ for file in files:
374
+ file_path = os.path.join(root, file)
375
+ # 跳过大文件
376
+ if os.path.getsize(file_path) > 10 * 1024 * 1024: # 10MB
377
+ PrettyOutput.print(f"跳过大文件: {file_path}",
378
+ output_type=OutputType.WARNING)
379
+ continue
380
+ all_files.append(file_path)
381
+
382
+ # 处理所有文件
383
+ self.documents = []
384
+ for file_path in tqdm(all_files, desc="处理文件"):
385
+ docs = self._process_file(file_path)
386
+ self.documents.extend(docs)
387
+
388
+ # 获取所有文档的向量表示
389
+ vectors = []
390
+ for doc in tqdm(self.documents, desc="生成向量"):
391
+ vector = self._get_embedding(doc.content)
392
+ vectors.append(vector)
393
+
394
+ if vectors:
395
+ vectors = np.vstack(vectors)
396
+ # 构建索引
397
+ self._build_index(vectors)
398
+ # 保存缓存
399
+ self._save_cache(vectors)
400
+
401
+ PrettyOutput.print(f"成功索引了 {len(self.documents)} 个文档片段",
402
+ output_type=OutputType.SUCCESS)
403
+
404
+ def search(self, query: str, top_k: int = 5) -> List[Tuple[Document, float]]:
405
+ """搜索相关文档
406
+
407
+ Args:
408
+ query: 查询文本
409
+ top_k: 返回结果数量
410
+
411
+ Returns:
412
+ 文档和相似度得分的列表
413
+ """
414
+ if not self.index:
415
+ raise ValueError("索引未构建,请先调用build_index()")
416
+
417
+ # 获取查询的向量表示
418
+ query_vector = self._get_embedding(query)
419
+ query_vector = query_vector.reshape(1, -1)
420
+
421
+ # 搜索最相似的向量
422
+ distances, indices = self.index.search(query_vector, top_k)
423
+
424
+ # 返回结果
425
+ results = []
426
+ for idx, distance in zip(indices[0], distances[0]):
427
+ if idx == -1: # FAISS返回-1表示无效结果
428
+ continue
429
+ similarity = 1.0 / (1.0 + float(distance))
430
+ results.append((self.documents[idx], similarity))
431
+
432
+ return results
433
+
434
+ def main():
435
+ """命令行入口"""
436
+ import argparse
437
+ import sys
438
+
439
+ # 设置标准输出编码为UTF-8
440
+ if sys.stdout.encoding != 'utf-8':
441
+ import codecs
442
+ sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
443
+ sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict')
444
+
445
+ parser = argparse.ArgumentParser(description='RAG工具')
446
+ parser.add_argument('--dir', type=str, default=os.getcwd(), help='项目根目录')
447
+ parser.add_argument('--build', action='store_true', help='构建索引')
448
+ parser.add_argument('--query', type=str, help='搜索查询')
449
+ parser.add_argument('--top-k', type=int, default=5, help='返回结果数量')
450
+
451
+ args = parser.parse_args()
452
+
453
+ try:
454
+ rag = RAGTool(args.dir)
455
+
456
+ if args.build:
457
+ rag.build_index()
458
+
459
+ if args.query:
460
+ results = rag.search(args.query, args.top_k)
461
+
462
+ if not results:
463
+ PrettyOutput.print("未找到相关内容", output_type=OutputType.WARNING)
464
+ return
465
+
466
+ PrettyOutput.print("\n搜索结果:", output_type=OutputType.INFO)
467
+ for doc, score in results:
468
+ PrettyOutput.print("\n" + "="*50, output_type=OutputType.INFO)
469
+ PrettyOutput.print(f"文件: {doc.metadata['file_path']}", output_type=OutputType.INFO)
470
+ PrettyOutput.print(f"相似度: {score:.3f}", output_type=OutputType.INFO)
471
+ PrettyOutput.print(f"片段 {doc.metadata['chunk_index'] + 1}/{doc.metadata['total_chunks']}",
472
+ output_type=OutputType.INFO)
473
+ PrettyOutput.print("\n内容:", output_type=OutputType.INFO)
474
+ # 确保内容是UTF-8编码
475
+ content = doc.content.encode('utf-8', errors='replace').decode('utf-8')
476
+ PrettyOutput.print(content, output_type=OutputType.INFO)
477
+
478
+ except Exception as e:
479
+ PrettyOutput.print(f"执行失败: {str(e)}", output_type=OutputType.ERROR)
480
+ return 1
481
+
482
+ if __name__ == "__main__":
483
+ main()
jarvis/tools/coder.py CHANGED
@@ -7,21 +7,21 @@ class CoderTool:
7
7
  """代码修改工具"""
8
8
 
9
9
  name = "coder"
10
- description = "用于自动修改和生成代码的工具"
10
+ description = "分析并修改现有代码,用于实现新功能、修复bug、重构代码等。能理解代码上下文并进行精确的代码编辑。"
11
11
  parameters = {
12
12
  "feature": {
13
13
  "type": "string",
14
- "description": "要实现的功能描述",
14
+ "description": "要实现的功能描述或需要修改的内容,例如:'添加日志功能'、'修复内存泄漏'、'优化性能'等",
15
15
  "required": True
16
16
  },
17
17
  "dir": {
18
18
  "type": "string",
19
- "description": "项目根目录",
19
+ "description": "项目根目录,默认为当前目录",
20
20
  "required": False
21
21
  },
22
22
  "language": {
23
23
  "type": "string",
24
- "description": "编程语言",
24
+ "description": "项目的主要编程语言,默认为python",
25
25
  "required": False
26
26
  }
27
27
  }
jarvis/tools/search.py CHANGED
@@ -8,33 +8,58 @@ from urllib.parse import quote
8
8
  def bing_search(query):
9
9
  try:
10
10
  with sync_playwright() as p:
11
- browser = p.chromium.launch()
12
- page = browser.new_page()
13
- page.goto(
14
- f"https://www.bing.com/search?form=QBRE&q={quote(query)}&cc=US"
11
+ # 启动浏览器时设置参数
12
+ browser = p.chromium.launch(
13
+ headless=True, # 无头模式
14
+ args=['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage']
15
15
  )
16
-
17
- page.wait_for_selector("#b_results", timeout=10000)
18
16
 
17
+ # 创建新页面并设置超时
18
+ page = browser.new_page(
19
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
20
+ viewport={'width': 1920, 'height': 1080}
21
+ )
22
+
23
+ # 设置页面超时
24
+ page.set_default_timeout(60000)
25
+
26
+ # 访问搜索页面
27
+ url = f"https://www.bing.com/search?q={quote(query)}&form=QBLH&sp=-1"
28
+ page.goto(url, wait_until="networkidle")
29
+
30
+ # 等待搜索结果加载
31
+ page.wait_for_selector("#b_results", state="visible", timeout=30000)
32
+
33
+ # 等待一下以确保结果完全加载
34
+ page.wait_for_timeout(1000)
35
+
36
+ # 提取搜索结果
19
37
  summaries = page.evaluate("""() => {
20
- const liElements = Array.from(
21
- document.querySelectorAll("#b_results > .b_algo")
22
- );
23
- return liElements.map((li) => {
24
- const abstractElement = li.querySelector(".b_caption > p");
25
- const linkElement = li.querySelector("a");
26
- const href = linkElement.getAttribute("href");
27
- const title = linkElement.textContent;
28
- const abstract = abstractElement ? abstractElement.textContent : "";
29
- return { href, title, abstract };
30
- });
38
+ const results = [];
39
+ const elements = document.querySelectorAll("#b_results > .b_algo");
40
+
41
+ for (const el of elements) {
42
+ const titleEl = el.querySelector("h2");
43
+ const linkEl = titleEl ? titleEl.querySelector("a") : null;
44
+ const abstractEl = el.querySelector(".b_caption p");
45
+
46
+ if (linkEl) {
47
+ results.push({
48
+ title: titleEl.innerText.trim(),
49
+ href: linkEl.href,
50
+ abstract: abstractEl ? abstractEl.innerText.trim() : ""
51
+ });
52
+ }
53
+ }
54
+ return results;
31
55
  }""")
32
56
 
33
57
  browser.close()
34
- print(summaries)
35
58
  return summaries
59
+
36
60
  except Exception as error:
37
- print("An error occurred:", error)
61
+ PrettyOutput.print(f"搜索出错: {str(error)}", OutputType.ERROR)
62
+ return None
38
63
 
39
64
  class SearchTool:
40
65
  name = "search"
@@ -158,4 +183,46 @@ class SearchTool:
158
183
  return {
159
184
  "success": False,
160
185
  "error": f"搜索失败: {str(e)}"
161
- }
186
+ }
187
+
188
+ def main():
189
+ """命令行直接运行搜索工具"""
190
+ import argparse
191
+ import sys
192
+
193
+ parser = argparse.ArgumentParser(description='Bing搜索工具')
194
+ parser.add_argument('query', help='搜索关键词')
195
+ parser.add_argument('--max', type=int, default=5, help='最大结果数量(默认5)')
196
+ parser.add_argument('--url-only', action='store_true', help='只显示URL')
197
+ args = parser.parse_args()
198
+
199
+ try:
200
+ PrettyOutput.print(f"正在搜索: {args.query}", OutputType.INFO)
201
+
202
+ results = bing_search(args.query)
203
+
204
+ if not results:
205
+ PrettyOutput.print("未找到搜索结果", OutputType.WARNING)
206
+ sys.exit(1)
207
+
208
+ PrettyOutput.print(f"\n找到 {len(results)} 条结果:", OutputType.INFO)
209
+
210
+ for i, result in enumerate(results[:args.max], 1):
211
+ PrettyOutput.print(f"\n{'-'*50}", OutputType.INFO)
212
+ if args.url_only:
213
+ PrettyOutput.print(f"{i}. {result['href']}", OutputType.INFO)
214
+ else:
215
+ PrettyOutput.print(f"{i}. {result['title']}", OutputType.INFO)
216
+ PrettyOutput.print(f"链接: {result['href']}", OutputType.INFO)
217
+ if result['abstract']:
218
+ PrettyOutput.print(f"摘要: {result['abstract']}", OutputType.INFO)
219
+
220
+ except KeyboardInterrupt:
221
+ PrettyOutput.print("\n搜索已取消", OutputType.WARNING)
222
+ sys.exit(1)
223
+ except Exception as e:
224
+ PrettyOutput.print(f"执行出错: {str(e)}", OutputType.ERROR)
225
+ sys.exit(1)
226
+
227
+ if __name__ == "__main__":
228
+ main()
jarvis/utils.py CHANGED
@@ -9,6 +9,7 @@ from colorama import Fore, Style as ColoramaStyle
9
9
  from prompt_toolkit import PromptSession
10
10
  from prompt_toolkit.styles import Style as PromptStyle
11
11
  from prompt_toolkit.formatted_text import FormattedText
12
+ from sentence_transformers import SentenceTransformer
12
13
 
13
14
  # 初始化colorama
14
15
  colorama.init()
@@ -206,4 +207,27 @@ def find_git_root(dir="."):
206
207
  os.chdir(dir)
207
208
  ret = os.popen("git rev-parse --show-toplevel").read().strip()
208
209
  os.chdir(curr_dir)
209
- return ret
210
+ return ret
211
+
212
+ def load_embedding_model(model_name: str):
213
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
214
+ PrettyOutput.print(f"正在加载嵌入模型: {model_name}...", OutputType.INFO)
215
+ try:
216
+ # 首先尝试离线加载
217
+ embedding_model = SentenceTransformer(
218
+ model_name,
219
+ device="cpu",
220
+ cache_folder=os.path.expanduser("~/.cache/huggingface/hub"),
221
+ local_files_only=True
222
+ )
223
+ PrettyOutput.print("使用本地缓存加载模型成功", OutputType.SUCCESS)
224
+ except Exception as local_error:
225
+ PrettyOutput.print(f"本地加载失败,尝试在线下载: {str(local_error)}", OutputType.WARNING)
226
+ # 如果离线加载失败,尝试在线下载
227
+ embedding_model = SentenceTransformer(
228
+ model_name,
229
+ device="cpu",
230
+ cache_folder=os.path.expanduser("~/.cache/huggingface/hub")
231
+ )
232
+ PrettyOutput.print("模型下载并加载成功", OutputType.SUCCESS)
233
+ return embedding_model
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: jarvis-ai-assistant
3
- Version: 0.1.76
3
+ Version: 0.1.77
4
4
  Summary: Jarvis: An AI assistant that uses tools to interact with the system
5
5
  Home-page: https://github.com/skyfireitdiy/Jarvis
6
6
  Author: skyfire
@@ -35,6 +35,7 @@ Classifier: Programming Language :: Python :: 3.8
35
35
  Classifier: Programming Language :: Python :: 3.9
36
36
  Classifier: Programming Language :: Python :: 3.10
37
37
  Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Operating System :: POSIX :: Linux
38
39
  Requires-Python: >=3.8
39
40
  Description-Content-Type: text/markdown
40
41
  License-File: LICENSE
@@ -48,6 +49,10 @@ Requires-Dist: numpy>=1.24.0
48
49
  Requires-Dist: faiss-cpu>=1.8.0
49
50
  Requires-Dist: sentence-transformers>=2.2.2
50
51
  Requires-Dist: bs4>=0.0.1
52
+ Requires-Dist: PyMuPDF>=1.21.0
53
+ Requires-Dist: python-docx>=0.8.11
54
+ Requires-Dist: tiktoken>=0.3.0
55
+ Requires-Dist: tqdm>=4.65.0
51
56
  Provides-Extra: dev
52
57
  Requires-Dist: pytest; extra == "dev"
53
58
  Requires-Dist: black; extra == "dev"
@@ -124,6 +129,7 @@ Jarvis supports configuration through environment variables that can be set in t
124
129
  |---------|------|--------|------|
125
130
  | JARVIS_PLATFORM | AI platform to use, supports kimi/openai/ai8 etc | kimi | Yes |
126
131
  | JARVIS_MODEL | Model name to use | - | No |
132
+ | JARVIS_THREAD_COUNT | Number of threads for parallel processing | 10 | No |
127
133
  | JARVIS_CODEGEN_PLATFORM | AI platform for code generation | Same as JARVIS_PLATFORM | No |
128
134
  | JARVIS_CODEGEN_MODEL | Model name for code generation | Same as JARVIS_MODEL | No |
129
135
  | JARVIS_CHEAP_PLATFORM | AI platform for cheap operations | Same as JARVIS_PLATFORM | No |
@@ -141,36 +147,47 @@ Jarvis supports configuration through environment variables that can be set in t
141
147
 
142
148
  ## 🎯 Usage
143
149
 
144
- ### Basic Usage
150
+ ### Main Assistant
145
151
  ```bash
146
152
  jarvis
147
153
  ```
148
154
 
149
-
150
- ### With Specific Model
155
+ ### Code Generation
151
156
  ```bash
152
- jarvis -p kimi # Use Kimi platform
153
- jarvis -p openai # Use OpenAI platform
154
- ```
155
-
156
- ### Code Modification
157
- ```bash
158
- jarvis-coder --feature "Add new feature" # Modify code to add new feature
157
+ jarvis-coder
159
158
  ```
160
159
 
161
160
  ### Codebase Search
162
161
  ```bash
163
- jarvis-codebase --search "database connection" # Search codebase
162
+ # Generate codebase index
163
+ jarvis-codebase --generate
164
+
165
+ # Search similar code
166
+ jarvis-codebase --search "your search query"
167
+
168
+ # Ask questions about codebase
169
+ jarvis-codebase --ask "your question"
164
170
  ```
165
171
 
166
- ### Codebase Question
172
+ ### Document Analysis (RAG)
167
173
  ```bash
168
- jarvis-codebase --ask "How to use the database?" # Ask about codebase
174
+ # Build document index
175
+ jarvis-rag --dir /path/to/documents --build
176
+
177
+ # Search documents
178
+ jarvis-rag --query "your search query"
169
179
  ```
170
180
 
171
- ### Keep Chat History
181
+ ### Search Tool
172
182
  ```bash
173
- jarvis --keep-history # Don't delete chat session after completion
183
+ # Basic search
184
+ jarvis-search "your query"
185
+
186
+ # Show only URLs
187
+ jarvis-search "your query" --url-only
188
+
189
+ # Limit results
190
+ jarvis-search "your query" --max 3
174
191
  ```
175
192
 
176
193
  ## 🛠️ Tools
@@ -1,9 +1,9 @@
1
- jarvis/__init__.py,sha256=mqMOFwEAV4m463m3kW2Lz8KGIkWz1vJScSEYFQi4jsc,50
2
- jarvis/agent.py,sha256=10YxntRu9CWp-xZt7PmUYbzj_knwGM0QqyRbM4FaKSk,21473
1
+ jarvis/__init__.py,sha256=WZAfXLVIclhkO-zAnoGnZ9Gc0F0C_LUd0MwIpGBFrdw,50
2
+ jarvis/agent.py,sha256=lUYH9wijs4wwbXMxqZBtOkEfL0jHLv9rw82r9EjPpXs,19743
3
3
  jarvis/main.py,sha256=7EcSlxa5JFFXBujzKDWdNtwX6axLhFFdJMc2GxTjfdk,6295
4
- jarvis/utils.py,sha256=vZV8sHj0ggZy4Rb8RxIujQhRWgeNEomhqVl4WXmpq7c,7498
4
+ jarvis/utils.py,sha256=TKKO7vbWLJ80n3JqL30HpbW-GuFfJTgR0GqBrsPPiwk,8566
5
5
  jarvis/jarvis_codebase/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- jarvis/jarvis_codebase/main.py,sha256=G8ADdTgjZTxEjvP11IBbjHiNWCvuTkFiLkDKq_8UpI0,26947
6
+ jarvis/jarvis_codebase/main.py,sha256=aGhfH9JTW-CX97OJErzxztJGhL_CH6lXeabfwZxHHxQ,26807
7
7
  jarvis/jarvis_coder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  jarvis/jarvis_coder/main.py,sha256=4YhQ20MD8ntdD5hjmjJbBHYEDPDZ2aPIgAviJCvtOXE,24458
9
9
  jarvis/models/__init__.py,sha256=mrOt67nselz_H1gX9wdAO4y2DY5WPXzABqJbr5Des8k,63
@@ -13,21 +13,23 @@ jarvis/models/kimi.py,sha256=1iTB0Z_WOmCML3Ufsge6jmeKOYvccr7I5lS3JUXymU4,17611
13
13
  jarvis/models/openai.py,sha256=ayaBWAN5VexMcKVrjEPDNB-Q9wx0sCV9Z4BCrvwYJ9w,4315
14
14
  jarvis/models/oyi.py,sha256=X2c5SWDIuQDCCFBcEKbzIWEz3I34eOAi0d1XAFgxlpw,15001
15
15
  jarvis/models/registry.py,sha256=YpooKSpk5pSWfb5cBDz5wRfPK-abb9uuUZr4WBejqwI,7762
16
+ jarvis/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ jarvis/rag/main.py,sha256=sgtOzmqi9cO9DcCenSXSad5Xr7MI0U7Ji2GtQL-YaO4,18161
16
18
  jarvis/tools/__init__.py,sha256=7Rqyj5hBAv5cWDVr5T9ZTZASO7ssBHeQNm2_4ZARdkA,72
17
19
  jarvis/tools/base.py,sha256=EGRGbdfbLXDLwtyoWdvp9rlxNX7bzc20t0Vc2VkwIEY,652
18
20
  jarvis/tools/codebase_qa.py,sha256=LsowsgL7HBmdBwa7zXcYi_OkwOok4qbnzYWYsuZxHtU,2413
19
- jarvis/tools/coder.py,sha256=ZJfPInKms4Hj3-eQlBwamVsvZ-2nlZ-4jsqJ-tJc6mg,2040
21
+ jarvis/tools/coder.py,sha256=kmotT2Klsug44S51QoSW9DzkxLzcF-XonyYAEoWZV6c,2295
20
22
  jarvis/tools/file_ops.py,sha256=h8g0eT9UvlJf4kt0DLXvdSsjcPj7x19lxWdDApeDfpg,3842
21
23
  jarvis/tools/generator.py,sha256=vVP3eN5cCDpRXf_fn0skETkPXAW1XZFWx9pt2_ahK48,5999
22
24
  jarvis/tools/methodology.py,sha256=UG6s5VYRcd9wrKX4cg6f7zJhet5AIcthFGMOAdevBiw,5175
23
25
  jarvis/tools/registry.py,sha256=MeTYNdZNRdhlgABviVxzbDPSgLpwDp2Nx2dGzedRu8U,7212
24
- jarvis/tools/search.py,sha256=1EqOVvLhg2Csh-i03-XeCrusbyfmH69FZ8khwZt8Tow,6131
26
+ jarvis/tools/search.py,sha256=3FX5oAkPiKD4Bvu8gBpKLQveOgl_K5jkvxRvkRPoH3M,8787
25
27
  jarvis/tools/shell.py,sha256=UPKshPyOaUwTngresUw-ot1jHjQIb4wCY5nkJqa38lU,2520
26
28
  jarvis/tools/sub_agent.py,sha256=rEtAmSVY2ZjFOZEKr5m5wpACOQIiM9Zr_3dT92FhXYU,2621
27
29
  jarvis/tools/webpage.py,sha256=d3w3Jcjcu1ESciezTkz3n3Zf-rp_l91PrVoDEZnckOo,2391
28
- jarvis_ai_assistant-0.1.76.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
29
- jarvis_ai_assistant-0.1.76.dist-info/METADATA,sha256=X1sBPb59GZyR4UjtnFntEQY4SzIe0q_KOgmYt62hMcU,12399
30
- jarvis_ai_assistant-0.1.76.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
31
- jarvis_ai_assistant-0.1.76.dist-info/entry_points.txt,sha256=QNUeqmUJd7nHufel2FO7cRttS1uKFfnbIyObv8eVyOY,140
32
- jarvis_ai_assistant-0.1.76.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
33
- jarvis_ai_assistant-0.1.76.dist-info/RECORD,,
30
+ jarvis_ai_assistant-0.1.77.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
31
+ jarvis_ai_assistant-0.1.77.dist-info/METADATA,sha256=Fo61TgkIMKSfekG4kV5RR_kfMDAgEpl5D2_2jl8geqU,12736
32
+ jarvis_ai_assistant-0.1.77.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
33
+ jarvis_ai_assistant-0.1.77.dist-info/entry_points.txt,sha256=iEvZ0rn-muMxWCNH5QEvw_mTTy_EVhyQyxDxWXqQQVo,174
34
+ jarvis_ai_assistant-0.1.77.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
35
+ jarvis_ai_assistant-0.1.77.dist-info/RECORD,,
@@ -2,3 +2,4 @@
2
2
  jarvis = jarvis.main:main
3
3
  jarvis-codebase = jarvis.jarvis_codebase.main:main
4
4
  jarvis-coder = jarvis.jarvis_coder.main:main
5
+ jarvis-rag = jarvis.rag.main:main