jarvis-ai-assistant 0.1.76__py3-none-any.whl → 0.1.78__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

jarvis/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Jarvis AI Assistant"""
2
2
 
3
- __version__ = "0.1.76"
3
+ __version__ = "0.1.78"
jarvis/agent.py CHANGED
@@ -8,7 +8,7 @@ import json
8
8
 
9
9
  from .models.registry import PlatformRegistry
10
10
  from .tools import ToolRegistry
11
- from .utils import PrettyOutput, OutputType, get_multiline_input, while_success
11
+ from .utils import PrettyOutput, OutputType, get_multiline_input, load_embedding_model, while_success
12
12
  import os
13
13
  from datetime import datetime
14
14
  from prompt_toolkit import prompt
@@ -24,7 +24,7 @@ class Agent:
24
24
  name: Agent名称,默认为"Jarvis"
25
25
  is_sub_agent: 是否为子Agent,默认为False
26
26
  """
27
- self.model = PlatformRegistry.get_global_platform()
27
+ self.model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
28
28
  self.tool_registry = ToolRegistry.get_global_tool_registry()
29
29
  self.name = name
30
30
  self.is_sub_agent = is_sub_agent
@@ -32,20 +32,19 @@ class Agent:
32
32
  self.conversation_turns = 0
33
33
 
34
34
  # 从环境变量加载嵌入模型配置
35
- self.embedding_model_name = os.environ.get("JARVIS_EMBEDDING_MODEL", "BAAI/bge-large-zh-v1.5")
36
35
  self.embedding_dimension = 1536 # Default for many embedding models
37
36
 
38
37
  # 初始化嵌入模型
39
38
  try:
40
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
41
- PrettyOutput.print(f"正在加载嵌入模型: {self.embedding_model_name}...", OutputType.INFO)
42
- self.embedding_model = SentenceTransformer(self.embedding_model_name)
39
+ self.embedding_model = load_embedding_model()
43
40
 
44
41
  # 预热模型并获取正确的维度
45
42
  test_text = "这是一段测试文本,用于确保模型完全加载。"
46
- test_embedding = self.embedding_model.encode(test_text,
47
- convert_to_tensor=True,
48
- normalize_embeddings=True)
43
+ test_embedding = self.embedding_model.encode(
44
+ test_text,
45
+ convert_to_tensor=True,
46
+ normalize_embeddings=True
47
+ )
49
48
  self.embedding_dimension = len(test_embedding)
50
49
  PrettyOutput.print("嵌入模型加载完成", OutputType.SUCCESS)
51
50
 
@@ -291,37 +290,6 @@ class Agent:
291
290
  self.prompt = summary_prompt
292
291
  return self._call_model(self.prompt)
293
292
 
294
- def choose_tools(self, user_input: str) -> List[Dict]:
295
- """根据用户输入选择工具"""
296
- PrettyOutput.print("选择工具...", OutputType.PLANNING)
297
- tools = self.tool_registry.get_all_tools()
298
- prompt = f"""你是一个工具选择专家,请根据用户输入选择合适的工具,返回可能使用到的工具的名称。以下是可用工具:
299
- """
300
- for tool in tools:
301
- prompt += f"- {tool['name']}: {tool['description']}\n"
302
- prompt += f"用户输入: {user_input}\n"
303
- prompt += f"请返回可能使用到的工具的名称,如果无法确定,请返回空列表。"
304
- prompt += f"返回的格式为:\n"
305
- prompt += f"<TOOL_CHOICE_START>\n"
306
- prompt += f"tool_name1\n"
307
- prompt += f"tool_name2\n"
308
- prompt += f"<TOOL_CHOICE_END>\n"
309
- model = PlatformRegistry.get_global_platform()
310
- model.set_suppress_output(True)
311
- try:
312
- response = model.chat(prompt)
313
- response = response.replace("<TOOL_CHOICE_START>", "").replace("<TOOL_CHOICE_END>", "")
314
- tools_name = response.split("\n")
315
- choosed_tools = []
316
- for tool_name in tools_name:
317
- for tool in tools:
318
- if tool['name'] == tool_name:
319
- choosed_tools.append(tool)
320
- break
321
- return choosed_tools
322
- except Exception as e:
323
- PrettyOutput.print(f"工具选择失败: {str(e)}", OutputType.ERROR)
324
- return []
325
293
 
326
294
  def run(self, user_input: str, file_list: Optional[List[str]] = None, keep_history: bool = False) -> str:
327
295
  """处理用户输入并返回响应,返回任务总结报告
@@ -350,7 +318,7 @@ class Agent:
350
318
  tools_prompt = ""
351
319
 
352
320
  # 选择工具
353
- tools = self.choose_tools(user_input)
321
+ tools = self.tool_registry.get_all_tools()
354
322
  if tools:
355
323
  tools_prompt += "可用工具:\n"
356
324
  for tool in tools:
@@ -7,7 +7,7 @@ from jarvis.models.registry import PlatformRegistry
7
7
  import concurrent.futures
8
8
  from threading import Lock
9
9
  from concurrent.futures import ThreadPoolExecutor
10
- from jarvis.utils import OutputType, PrettyOutput, find_git_root
10
+ from jarvis.utils import OutputType, PrettyOutput, find_git_root, load_embedding_model
11
11
  from jarvis.utils import load_env_from_file
12
12
  import argparse
13
13
  from sentence_transformers import SentenceTransformer
@@ -19,22 +19,6 @@ class CodeBase:
19
19
  self.root_dir = root_dir
20
20
  os.chdir(self.root_dir)
21
21
  self.thread_count = int(os.environ.get("JARVIS_THREAD_COUNT") or 10)
22
- self.cheap_platform = os.environ.get("JARVIS_CHEAP_PLATFORM") or os.environ.get("JARVIS_PLATFORM") or "kimi"
23
- self.cheap_model = os.environ.get("JARVIS_CHEAP_MODEL") or os.environ.get("JARVIS_MODEL") or "kimi"
24
- self.normal_platform = os.environ.get("JARVIS_PLATFORM") or "kimi"
25
- self.codegen_platform = os.environ.get("JARVIS_CODEGEN_PLATFORM") or os.environ.get("JARVIS_PLATFORM") or "kimi"
26
- self.codegen_model = os.environ.get("JARVIS_CODEGEN_MODEL") or os.environ.get("JARVIS_MODEL") or "kimi"
27
- self.normal_model = os.environ.get("JARVIS_MODEL") or "kimi"
28
- self.embedding_model_name = os.environ.get("JARVIS_EMBEDDING_MODEL") or "BAAI/bge-large-zh-v1.5"
29
- if not self.cheap_platform or not self.cheap_model or not self.codegen_platform or not self.codegen_model or not self.embedding_model_name or not self.normal_platform or not self.normal_model:
30
- raise ValueError("JARVIS_CHEAP_PLATFORM or JARVIS_CHEAP_MODEL or JARVIS_CODEGEN_PLATFORM or JARVIS_CODEGEN_MODEL or JARVIS_EMBEDDING_MODEL or JARVIS_PLATFORM or JARVIS_MODEL is not set")
31
-
32
- PrettyOutput.print(f"廉价模型使用平台: {self.cheap_platform} 模型: {self.cheap_model}", output_type=OutputType.INFO)
33
- PrettyOutput.print(f"代码生成模型使用平台: {self.codegen_platform} 模型: {self.codegen_model}", output_type=OutputType.INFO)
34
- PrettyOutput.print(f"分析模型使用平台: {self.normal_platform} 模型: {self.normal_model}", output_type=OutputType.INFO)
35
- PrettyOutput.print(f"嵌入模型: {self.embedding_model_name}", output_type=OutputType.INFO)
36
- PrettyOutput.print(f"索引建立线程数: {self.thread_count}", output_type=OutputType.INFO)
37
- PrettyOutput.print(f"检索算法:分层导航小世界算法", output_type=OutputType.INFO)
38
22
 
39
23
  # 初始化数据目录
40
24
  self.data_dir = os.path.join(self.root_dir, ".jarvis-codebase")
@@ -43,9 +27,7 @@ class CodeBase:
43
27
 
44
28
  # 初始化嵌入模型,使用系统默认缓存目录
45
29
  try:
46
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
47
- PrettyOutput.print("正在加载/下载模型,请稍候...", output_type=OutputType.INFO)
48
- self.embedding_model = SentenceTransformer(self.embedding_model_name)
30
+ self.embedding_model = load_embedding_model()
49
31
 
50
32
  # 强制完全加载所有模型组件
51
33
  test_text = """
@@ -64,7 +46,7 @@ class CodeBase:
64
46
  self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
65
47
 
66
48
  self.git_file_list = self.get_git_file_list()
67
- self.platform_registry = PlatformRegistry().get_global_platform_registry()
49
+ self.platform_registry = PlatformRegistry.get_global_platform_registry()
68
50
 
69
51
  # 初始化缓存和索引
70
52
  self.cache_path = os.path.join(self.data_dir, "cache.pkl")
@@ -104,8 +86,7 @@ class CodeBase:
104
86
  return False
105
87
 
106
88
  def make_description(self, file_path: str) -> str:
107
- model = self.platform_registry.create_platform(self.cheap_platform)
108
- model.set_model_name(self.cheap_model)
89
+ model = PlatformRegistry.get_global_platform_registry().get_codegen_platform()
109
90
  model.set_suppress_output(True)
110
91
  content = open(file_path, "r", encoding="utf-8").read()
111
92
  prompt = f"""请分析以下代码文件,并生成一个详细的描述。描述应该包含以下要点:
@@ -413,8 +394,7 @@ class CodeBase:
413
394
  if not initial_results:
414
395
  return []
415
396
 
416
- model = self.platform_registry.create_platform(self.normal_platform)
417
- model.set_model_name(self.normal_model)
397
+ model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
418
398
  model.set_suppress_output(True)
419
399
 
420
400
  try:
@@ -472,8 +452,7 @@ class CodeBase:
472
452
 
473
453
  def search_similar(self, query: str, top_k: int = 20) -> List[Tuple[str, float, str]]:
474
454
  """搜索相似文件"""
475
- model = self.platform_registry.create_platform(self.normal_platform)
476
- model.set_model_name(self.normal_model)
455
+ model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
477
456
  model.set_suppress_output(True)
478
457
 
479
458
  try:
@@ -556,8 +535,7 @@ class CodeBase:
556
535
 
557
536
  请用专业的语言回答用户的问题,如果给出的文件内容不足以回答用户的问题,请告诉用户,绝对不要胡编乱造。
558
537
  """
559
- model = self.platform_registry.create_platform(self.codegen_platform)
560
- model.set_model_name(self.codegen_model)
538
+ model = PlatformRegistry.get_global_platform_registry().get_codegen_platform()
561
539
  try:
562
540
  response = model.chat(prompt)
563
541
  return response
@@ -21,13 +21,7 @@ index_lock = threading.Lock()
21
21
  class JarvisCoder:
22
22
  def __init__(self, root_dir: str, language: str):
23
23
  """初始化代码修改工具"""
24
-
25
- self.platform = os.environ.get("JARVIS_CODEGEN_PLATFORM") or os.environ.get("JARVIS_PLATFORM")
26
- self.model = os.environ.get("JARVIS_CODEGEN_MODEL") or os.environ.get("JARVIS_MODEL")
27
-
28
24
 
29
- if not self.platform or not self.model:
30
- raise ValueError("JARVIS_CODEGEN_PLATFORM or JARVIS_CODEGEN_MODEL is not set")
31
25
 
32
26
  self.root_dir = find_git_root(root_dir)
33
27
  if not self.root_dir:
@@ -486,8 +480,6 @@ def main():
486
480
  load_env_from_file()
487
481
 
488
482
  parser = argparse.ArgumentParser(description='代码修改工具')
489
- parser.add_argument('-p', '--platform', help='AI平台名称', default=os.environ.get('JARVIS_CODEGEN_PLATFORM'))
490
- parser.add_argument('-m', '--model', help='模型名称', default=os.environ.get('JARVIS_CODEGEN_MODEL'))
491
483
  parser.add_argument('-d', '--dir', help='项目根目录', default=os.getcwd())
492
484
  parser.add_argument('-l', '--language', help='编程语言', default="python")
493
485
  args = parser.parse_args()
@@ -495,9 +487,6 @@ def main():
495
487
  # 设置平台
496
488
  if not args.platform:
497
489
  print("错误: 未指定AI平台,请使用 -p 参数")
498
- # 设置模型
499
- if args.model:
500
- os.environ['JARVIS_CODEGEN_MODEL'] = args.model
501
490
 
502
491
  tool = JarvisCoder(args.dir, args.language)
503
492
 
jarvis/main.py CHANGED
@@ -104,31 +104,16 @@ def main():
104
104
  parser = argparse.ArgumentParser(description='Jarvis AI 助手')
105
105
  parser.add_argument('-f', '--files', nargs='*', help='要处理的文件列表')
106
106
  parser.add_argument('--keep-history', action='store_true', help='保持聊天历史(不删除会话)')
107
- parser.add_argument('-p', '--platform', default=os.getenv('JARVIS_PLATFORM') or 'kimi', help='选择AI平台')
108
- parser.add_argument('-m', '--model', default=os.getenv('JARVIS_MODEL') or '', help='模型') # 用于指定使用的模型名称,默认使用环境变量或平台默认模型
109
107
  args = parser.parse_args()
110
108
 
111
- platform = args.platform if args.platform else os.getenv('JARVIS_PLATFORM')
112
-
113
- if not platform:
114
- PrettyOutput.print("未指定AI平台,请使用 -p 参数或者设置 JARVIS_PLATFORM 环境变量", OutputType.ERROR)
115
- return 1
116
-
117
- PlatformRegistry.get_global_platform_registry().set_global_platform_name(platform)
118
-
119
- if args.model:
120
- os.environ["JARVIS_MODEL"] = args.model
121
-
122
109
  try:
123
110
  # 获取全局模型实例
124
111
  agent = Agent()
125
112
 
126
113
  # 如果用户传入了模型参数,则更换当前模型为用户指定的模型
127
- if args.model:
128
- agent.model.set_model_name(args.model)
129
114
 
130
115
  # 欢迎信息
131
- PrettyOutput.print(f"Jarvis 已初始化 - With {platform} 平台,模型: {agent.model.name()}", OutputType.SYSTEM)
116
+ PrettyOutput.print(f"Jarvis 已初始化 - With {agent.model.name()}", OutputType.SYSTEM)
132
117
  if args.keep_history:
133
118
  PrettyOutput.print("已启用历史保留模式", OutputType.INFO)
134
119
 
jarvis/models/registry.py CHANGED
@@ -152,14 +152,26 @@ class PlatformRegistry:
152
152
  """
153
153
  self.platforms: Dict[str, Type[BasePlatform]] = {}
154
154
 
155
- @staticmethod
156
- def get_global_platform() -> BasePlatform:
157
- """获取全局平台实例"""
158
- platform = PlatformRegistry.get_global_platform_registry().create_platform(PlatformRegistry.global_platform_name)
159
- if not platform:
160
- raise Exception(f"Failed to create platform: {PlatformRegistry.global_platform_name}")
155
+ def get_normal_platform(self) -> BasePlatform:
156
+ platform_name = os.environ.get("JARVIS_PLATFORM", "kimi")
157
+ model_name = os.environ.get("JARVIS_MODEL", "kimi")
158
+ platform = self.create_platform(platform_name)
159
+ platform.set_model_name(model_name)
160
+ return platform
161
+
162
+ def get_codegen_platform(self) -> BasePlatform:
163
+ platform_name = os.environ.get("JARVIS_CODEGEN_PLATFORM", os.environ.get("JARVIS_PLATFORM", "kimi"))
164
+ model_name = os.environ.get("JARVIS_CODEGEN_MODEL", os.environ.get("JARVIS_MODEL", "kimi"))
165
+ platform = self.create_platform(platform_name)
166
+ platform.set_model_name(model_name)
167
+ return platform
168
+
169
+ def get_cheap_platform(self) -> BasePlatform:
170
+ platform_name = os.environ.get("JARVIS_CHEAP_PLATFORM", os.environ.get("JARVIS_PLATFORM", "kimi"))
171
+ model_name = os.environ.get("JARVIS_CHEAP_MODEL", os.environ.get("JARVIS_MODEL", "kimi"))
172
+ platform = self.create_platform(platform_name)
173
+ platform.set_model_name(model_name)
161
174
  return platform
162
-
163
175
 
164
176
  def register_platform(self, name: str, platform_class: Type[BasePlatform]):
165
177
  """注册平台类
@@ -195,10 +207,3 @@ class PlatformRegistry:
195
207
  """获取可用平台列表"""
196
208
  return list(self.platforms.keys())
197
209
 
198
- def set_global_platform_name(self, platform_name: str):
199
- """设置全局平台"""
200
- PlatformRegistry.global_platform_name = platform_name
201
-
202
- def get_global_platform_name(self) -> str:
203
- """获取全局平台名称"""
204
- return PlatformRegistry.global_platform_name
jarvis/rag/__init__.py ADDED
File without changes
jarvis/rag/main.py ADDED
@@ -0,0 +1,566 @@
1
+ import os
2
+ import hashlib
3
+ import numpy as np
4
+ import faiss
5
+ from typing import List, Tuple, Optional, Dict
6
+ from sentence_transformers import SentenceTransformer
7
+ import pickle
8
+ from jarvis.utils import OutputType, PrettyOutput, find_git_root, load_embedding_model
9
+ from jarvis.utils import load_env_from_file
10
+ import tiktoken
11
+ from dataclasses import dataclass
12
+ from tqdm import tqdm
13
+ import fitz # PyMuPDF for PDF files
14
+ from docx import Document as DocxDocument # python-docx for DOCX files
15
+ from pathlib import Path
16
+ from jarvis.models.registry import PlatformRegistry
17
+
18
+ @dataclass
19
+ class Document:
20
+ """文档类,用于存储文档内容和元数据"""
21
+ content: str # 文档内容
22
+ metadata: Dict # 元数据(文件路径、位置等)
23
+
24
+ class FileProcessor:
25
+ """文件处理器基类"""
26
+ @staticmethod
27
+ def can_handle(file_path: str) -> bool:
28
+ """判断是否可以处理该文件"""
29
+ raise NotImplementedError
30
+
31
+ @staticmethod
32
+ def extract_text(file_path: str) -> str:
33
+ """提取文件文本内容"""
34
+ raise NotImplementedError
35
+
36
+ class TextFileProcessor(FileProcessor):
37
+ """文本文件处理器"""
38
+ ENCODINGS = ['utf-8', 'gbk', 'gb2312', 'latin1']
39
+ SAMPLE_SIZE = 8192 # 读取前8KB来检测编码
40
+
41
+ @staticmethod
42
+ def can_handle(file_path: str) -> bool:
43
+ """判断文件是否为文本文件,通过尝试解码来判断"""
44
+ try:
45
+ # 读取文件开头的一小部分来检测
46
+ with open(file_path, 'rb') as f:
47
+ sample = f.read(TextFileProcessor.SAMPLE_SIZE)
48
+
49
+ # 检查是否包含空字节(通常表示二进制文件)
50
+ if b'\x00' in sample:
51
+ return False
52
+
53
+ # 检查是否包含过多的非打印字符(通常表示二进制文件)
54
+ non_printable = sum(1 for byte in sample if byte < 32 and byte not in (9, 10, 13)) # tab, newline, carriage return
55
+ if non_printable / len(sample) > 0.3: # 如果非打印字符超过30%,认为是二进制文件
56
+ return False
57
+
58
+ # 尝试用不同编码解码
59
+ for encoding in TextFileProcessor.ENCODINGS:
60
+ try:
61
+ sample.decode(encoding)
62
+ return True
63
+ except UnicodeDecodeError:
64
+ continue
65
+
66
+ return False
67
+
68
+ except Exception:
69
+ return False
70
+
71
+ @staticmethod
72
+ def extract_text(file_path: str) -> str:
73
+ """提取文本内容,使用检测到的正确编码"""
74
+ detected_encoding = None
75
+ try:
76
+ # 首先尝试检测编码
77
+ with open(file_path, 'rb') as f:
78
+ raw_data = f.read()
79
+
80
+ # 尝试不同的编码
81
+ for encoding in TextFileProcessor.ENCODINGS:
82
+ try:
83
+ raw_data.decode(encoding)
84
+ detected_encoding = encoding
85
+ break
86
+ except UnicodeDecodeError:
87
+ continue
88
+
89
+ if not detected_encoding:
90
+ raise UnicodeDecodeError(f"无法用支持的编码解码文件: {file_path}")
91
+
92
+ # 使用检测到的编码读取文件
93
+ with open(file_path, 'r', encoding=detected_encoding, errors='replace') as f:
94
+ content = f.read()
95
+
96
+ # 规范化Unicode字符
97
+ import unicodedata
98
+ content = unicodedata.normalize('NFKC', content)
99
+
100
+ return content
101
+
102
+ except Exception as e:
103
+ raise Exception(f"读取文件失败: {str(e)}")
104
+
105
+ class PDFProcessor(FileProcessor):
106
+ """PDF文件处理器"""
107
+ @staticmethod
108
+ def can_handle(file_path: str) -> bool:
109
+ return Path(file_path).suffix.lower() == '.pdf'
110
+
111
+ @staticmethod
112
+ def extract_text(file_path: str) -> str:
113
+ text_parts = []
114
+ with fitz.open(file_path) as doc:
115
+ for page in doc:
116
+ text_parts.append(page.get_text())
117
+ return "\n".join(text_parts)
118
+
119
+ class DocxProcessor(FileProcessor):
120
+ """DOCX文件处理器"""
121
+ @staticmethod
122
+ def can_handle(file_path: str) -> bool:
123
+ return Path(file_path).suffix.lower() == '.docx'
124
+
125
+ @staticmethod
126
+ def extract_text(file_path: str) -> str:
127
+ doc = DocxDocument(file_path)
128
+ return "\n".join([paragraph.text for paragraph in doc.paragraphs])
129
+
130
+ class RAGTool:
131
+ def __init__(self, root_dir: str):
132
+ """初始化RAG工具
133
+
134
+ Args:
135
+ root_dir: 项目根目录
136
+ """
137
+ load_env_from_file()
138
+ self.root_dir = root_dir
139
+ os.chdir(self.root_dir)
140
+
141
+ # 初始化配置
142
+ self.min_paragraph_length = int(os.environ.get("JARVIS_MIN_PARAGRAPH_LENGTH", "50")) # 最小段落长度
143
+ self.max_paragraph_length = int(os.environ.get("JARVIS_MAX_PARAGRAPH_LENGTH", "1000")) # 最大段落长度
144
+
145
+ # 初始化数据目录
146
+ self.data_dir = os.path.join(self.root_dir, ".jarvis-rag")
147
+ if not os.path.exists(self.data_dir):
148
+ os.makedirs(self.data_dir)
149
+
150
+ # 初始化嵌入模型
151
+ try:
152
+ self.embedding_model = load_embedding_model()
153
+ self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
154
+ PrettyOutput.print("模型加载完成", output_type=OutputType.SUCCESS)
155
+ except Exception as e:
156
+ PrettyOutput.print(f"加载模型失败: {str(e)}", output_type=OutputType.ERROR)
157
+ raise
158
+
159
+ # 初始化缓存和索引
160
+ self.cache_path = os.path.join(self.data_dir, "cache.pkl")
161
+ self.documents: List[Document] = []
162
+ self.index = None
163
+
164
+ # 加载缓存
165
+ self._load_cache()
166
+
167
+ # 注册文件处理器
168
+ self.file_processors = [
169
+ TextFileProcessor(),
170
+ PDFProcessor(),
171
+ DocxProcessor()
172
+ ]
173
+
174
+ def _load_cache(self):
175
+ """加载缓存数据"""
176
+ if os.path.exists(self.cache_path):
177
+ try:
178
+ with open(self.cache_path, 'rb') as f:
179
+ cache_data = pickle.load(f)
180
+ self.documents = cache_data["documents"]
181
+ vectors = cache_data["vectors"]
182
+
183
+ # 重建索引
184
+ self._build_index(vectors)
185
+ PrettyOutput.print(f"加载了 {len(self.documents)} 个文档片段",
186
+ output_type=OutputType.INFO)
187
+ except Exception as e:
188
+ PrettyOutput.print(f"加载缓存失败: {str(e)}",
189
+ output_type=OutputType.WARNING)
190
+ self.documents = []
191
+ self.index = None
192
+
193
+ def _save_cache(self, vectors: np.ndarray):
194
+ """保存缓存数据"""
195
+ try:
196
+ cache_data = {
197
+ "documents": self.documents,
198
+ "vectors": vectors
199
+ }
200
+ with open(self.cache_path, 'wb') as f:
201
+ pickle.dump(cache_data, f)
202
+ PrettyOutput.print(f"保存了 {len(self.documents)} 个文档片段",
203
+ output_type=OutputType.INFO)
204
+ except Exception as e:
205
+ PrettyOutput.print(f"保存缓存失败: {str(e)}",
206
+ output_type=OutputType.ERROR)
207
+
208
+ def _build_index(self, vectors: np.ndarray):
209
+ """构建FAISS索引"""
210
+ # 创建HNSW索引
211
+ hnsw_index = faiss.IndexHNSWFlat(self.vector_dim, 16)
212
+ hnsw_index.hnsw.efConstruction = 40
213
+ hnsw_index.hnsw.efSearch = 16
214
+
215
+ # 用IndexIDMap包装HNSW索引
216
+ self.index = faiss.IndexIDMap(hnsw_index)
217
+
218
+ # 添加向量到索引
219
+ if vectors.shape[0] > 0:
220
+ self.index.add_with_ids(vectors, np.arange(vectors.shape[0]))
221
+ else:
222
+ self.index = None
223
+
224
+ def _split_text(self, text: str) -> List[str]:
225
+ """将文本分割成段落
226
+
227
+ Args:
228
+ text: 要分割的文本
229
+
230
+ Returns:
231
+ 分割后的段落列表
232
+ """
233
+ # 首先按空行分割
234
+ paragraphs = []
235
+ current_paragraph = []
236
+
237
+ for line in text.split('\n'):
238
+ line = line.strip()
239
+ if not line: # 空行表示段落结束
240
+ if current_paragraph:
241
+ paragraph_text = ' '.join(current_paragraph)
242
+ if len(paragraph_text) >= self.min_paragraph_length:
243
+ paragraphs.append(paragraph_text)
244
+ current_paragraph = []
245
+ else:
246
+ current_paragraph.append(line)
247
+
248
+ # 处理最后一个段落
249
+ if current_paragraph:
250
+ paragraph_text = ' '.join(current_paragraph)
251
+ if len(paragraph_text) >= self.min_paragraph_length:
252
+ paragraphs.append(paragraph_text)
253
+
254
+ # 处理过长的段落
255
+ final_paragraphs = []
256
+ for paragraph in paragraphs:
257
+ if len(paragraph) <= self.max_paragraph_length:
258
+ final_paragraphs.append(paragraph)
259
+ else:
260
+ # 按句子分割过长的段落
261
+ sentences = []
262
+ current_sentence = []
263
+
264
+ # 中文句子结束标记
265
+ sentence_ends = {'。', '!', '?', '…', '.', '!', '?'}
266
+
267
+ for char in paragraph:
268
+ current_sentence.append(char)
269
+ if char in sentence_ends:
270
+ sentence = ''.join(current_sentence)
271
+ if sentence.strip():
272
+ sentences.append(sentence)
273
+ current_sentence = []
274
+
275
+ # 处理最后一个句子
276
+ if current_sentence:
277
+ sentence = ''.join(current_sentence)
278
+ if sentence.strip():
279
+ sentences.append(sentence)
280
+
281
+ # 组合句子成适当长度的段落
282
+ current_chunk = []
283
+ current_length = 0
284
+
285
+ for sentence in sentences:
286
+ sentence_length = len(sentence)
287
+ if current_length + sentence_length > self.max_paragraph_length:
288
+ if current_chunk:
289
+ final_paragraphs.append(''.join(current_chunk))
290
+ current_chunk = [sentence]
291
+ current_length = sentence_length
292
+ else:
293
+ current_chunk.append(sentence)
294
+ current_length += sentence_length
295
+
296
+ # 处理最后一个chunk
297
+ if current_chunk:
298
+ final_paragraphs.append(''.join(current_chunk))
299
+
300
+ # 过滤掉太短的段落
301
+ final_paragraphs = [p for p in final_paragraphs if len(p) >= self.min_paragraph_length]
302
+
303
+ return final_paragraphs
304
+
305
+ def _get_embedding(self, text: str) -> np.ndarray:
306
+ """获取文本的向量表示"""
307
+ embedding = self.embedding_model.encode(text,
308
+ normalize_embeddings=True,
309
+ show_progress_bar=False)
310
+ return np.array(embedding, dtype=np.float32)
311
+
312
+ def _process_file(self, file_path: str) -> List[Document]:
313
+ """处理单个文件
314
+
315
+ Args:
316
+ file_path: 文件路径
317
+
318
+ Returns:
319
+ 文档对象列表
320
+ """
321
+ try:
322
+ # 查找合适的处理器
323
+ processor = None
324
+ for p in self.file_processors:
325
+ if p.can_handle(file_path):
326
+ processor = p
327
+ break
328
+
329
+ if not processor:
330
+ PrettyOutput.print(f"跳过不支持的文件: {file_path}",
331
+ output_type=OutputType.WARNING)
332
+ return []
333
+
334
+ # 提取文本内容
335
+ content = processor.extract_text(file_path)
336
+ if not content.strip():
337
+ PrettyOutput.print(f"文件内容为空: {file_path}",
338
+ output_type=OutputType.WARNING)
339
+ return []
340
+
341
+
342
+ # 分割文本
343
+ chunks = self._split_text(content)
344
+
345
+ # 创建文档对象
346
+ documents = []
347
+ for i, chunk in enumerate(chunks):
348
+ doc = Document(
349
+ content=chunk,
350
+ metadata={
351
+ "file_path": file_path,
352
+ "file_type": Path(file_path).suffix.lower(),
353
+ "chunk_index": i,
354
+ "total_chunks": len(chunks)
355
+ }
356
+ )
357
+ documents.append(doc)
358
+
359
+ return documents
360
+
361
+ except Exception as e:
362
+ PrettyOutput.print(f"处理文件失败 {file_path}: {str(e)}",
363
+ output_type=OutputType.ERROR)
364
+ return []
365
+
366
+ def build_index(self, dir: str):
367
+ """构建文档索引"""
368
+ # 获取所有文件
369
+ all_files = []
370
+ for root, _, files in os.walk(dir):
371
+ if any(ignored in root for ignored in ['.jarvis-rag', '.git', '__pycache__', 'node_modules']):
372
+ continue
373
+ for file in files:
374
+ file_path = os.path.join(root, file)
375
+ # 跳过大文件
376
+ if os.path.getsize(file_path) > 10 * 1024 * 1024: # 10MB
377
+ PrettyOutput.print(f"跳过大文件: {file_path}",
378
+ output_type=OutputType.WARNING)
379
+ continue
380
+ all_files.append(file_path)
381
+
382
+ # 处理所有文件
383
+ self.documents = []
384
+ for file_path in tqdm(all_files, desc="处理文件"):
385
+ docs = self._process_file(file_path)
386
+ self.documents.extend(docs)
387
+
388
+ # 获取所有文档的向量表示
389
+ vectors = []
390
+ for doc in tqdm(self.documents, desc="生成向量"):
391
+ vector = self._get_embedding(doc.content)
392
+ vectors.append(vector)
393
+
394
+ if vectors:
395
+ vectors = np.vstack(vectors)
396
+ # 构建索引
397
+ self._build_index(vectors)
398
+ # 保存缓存
399
+ self._save_cache(vectors)
400
+
401
+ PrettyOutput.print(f"成功索引了 {len(self.documents)} 个文档片段",
402
+ output_type=OutputType.SUCCESS)
403
+
404
+ def search(self, query: str, top_k: int = 5) -> List[Tuple[Document, float]]:
405
+ """搜索相关文档
406
+
407
+ Args:
408
+ query: 查询文本
409
+ top_k: 返回结果数量
410
+
411
+ Returns:
412
+ 文档和相似度得分的列表
413
+ """
414
+ if not self.index:
415
+ raise ValueError("索引未构建,请先调用build_index()")
416
+
417
+ # 获取查询的向量表示
418
+ query_vector = self._get_embedding(query)
419
+ query_vector = query_vector.reshape(1, -1)
420
+
421
+ # 搜索最相似的向量
422
+ distances, indices = self.index.search(query_vector, top_k)
423
+
424
+ # 返回结果
425
+ results = []
426
+ for idx, distance in zip(indices[0], distances[0]):
427
+ if idx == -1: # FAISS返回-1表示无效结果
428
+ continue
429
+ similarity = 1.0 / (1.0 + float(distance))
430
+ results.append((self.documents[idx], similarity))
431
+
432
+ return results
433
+
434
+ def is_index_built(self):
435
+ """检查索引是否已构建"""
436
+ return self.index is not None
437
+
438
+ def query(self, query: str) -> List[Document]:
439
+ """查询相关文档
440
+
441
+ Args:
442
+ query: 查询文本
443
+
444
+ Returns:
445
+ 相关文档列表
446
+ """
447
+ if not self.is_index_built():
448
+ raise ValueError("索引未构建,请先调用build_index()")
449
+
450
+ results = self.search(query)
451
+ return [doc for doc, _ in results]
452
+
453
+ def ask(self, question: str) -> Optional[str]:
454
+ """询问关于文档的问题
455
+
456
+ Args:
457
+ question: 用户问题
458
+
459
+ Returns:
460
+ 模型回答,如果失败则返回 None
461
+ """
462
+ try:
463
+ # 搜索相关文档片段
464
+ results = self.query(question)
465
+ if not results:
466
+ return None
467
+
468
+ # 构建上下文
469
+ context = []
470
+ for doc in results:
471
+ context.append(f"""
472
+ 来源文件: {doc.metadata['file_path']}
473
+ 片段位置: {doc.metadata['chunk_index'] + 1}/{doc.metadata['total_chunks']}
474
+ 内容:
475
+ {doc.content}
476
+ ---
477
+ """)
478
+
479
+ # 构建提示词
480
+ prompt = f"""请基于以下文档片段回答用户的问题。如果文档片段中的信息不足以完整回答问题,请明确指出。
481
+
482
+ 用户问题: {question}
483
+
484
+ 相关文档片段:
485
+ {''.join(context)}
486
+
487
+ 请提供准确、简洁的回答,并在适当时引用具体的文档来源。
488
+ """
489
+ # 获取模型实例并生成回答
490
+ model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
491
+ response = model.chat(prompt)
492
+
493
+ return response
494
+
495
+ except Exception as e:
496
+ PrettyOutput.print(f"问答失败: {str(e)}", output_type=OutputType.ERROR)
497
+ return None
498
+
499
+ def main():
500
+ """主函数"""
501
+ import argparse
502
+ import sys
503
+
504
+ # 设置标准输出编码为UTF-8
505
+ if sys.stdout.encoding != 'utf-8':
506
+ import codecs
507
+ sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
508
+ sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict')
509
+
510
+ parser = argparse.ArgumentParser(description='文档检索和分析工具')
511
+ parser.add_argument('--dir', type=str, help='要处理的文档目录')
512
+ parser.add_argument('--build', action='store_true', help='构建文档索引')
513
+ parser.add_argument('--search', type=str, help='搜索文档内容')
514
+ parser.add_argument('--ask', type=str, help='询问关于文档的问题')
515
+ args = parser.parse_args()
516
+
517
+ try:
518
+ current_dir = find_git_root()
519
+ rag = RAGTool(current_dir)
520
+
521
+ if args.dir and args.build:
522
+ PrettyOutput.print(f"正在处理目录: {args.dir}", output_type=OutputType.INFO)
523
+ rag.build_index(args.dir)
524
+ return 0
525
+
526
+ if args.search or args.ask:
527
+ if not rag.is_index_built():
528
+ PrettyOutput.print("索引尚未构建,请先使用 --dir 和 --build 参数构建索引", output_type=OutputType.WARNING)
529
+ return 1
530
+
531
+ if args.search:
532
+ results = rag.query(args.search)
533
+ if not results:
534
+ PrettyOutput.print("未找到相关内容", output_type=OutputType.WARNING)
535
+ return 1
536
+
537
+ for doc in results:
538
+ PrettyOutput.print(f"\n文件: {doc.metadata['file_path']}", output_type=OutputType.INFO)
539
+ PrettyOutput.print(f"片段 {doc.metadata['chunk_index'] + 1}/{doc.metadata['total_chunks']}",
540
+ output_type=OutputType.INFO)
541
+ PrettyOutput.print("\n内容:", output_type=OutputType.INFO)
542
+ content = doc.content.encode('utf-8', errors='replace').decode('utf-8')
543
+ PrettyOutput.print(content, output_type=OutputType.INFO)
544
+ return 0
545
+
546
+ if args.ask:
547
+ # 调用 ask 方法
548
+ response = rag.ask(args.ask)
549
+ if not response:
550
+ PrettyOutput.print("未能获取答案", output_type=OutputType.WARNING)
551
+ return 1
552
+
553
+ # 显示回答
554
+ PrettyOutput.print("\n回答:", output_type=OutputType.INFO)
555
+ PrettyOutput.print(response, output_type=OutputType.INFO)
556
+ return 0
557
+
558
+ PrettyOutput.print("请指定操作参数。使用 -h 查看帮助。", output_type=OutputType.WARNING)
559
+ return 1
560
+
561
+ except Exception as e:
562
+ PrettyOutput.print(f"执行失败: {str(e)}", output_type=OutputType.ERROR)
563
+ return 1
564
+
565
+ if __name__ == "__main__":
566
+ main()
jarvis/tools/coder.py CHANGED
@@ -7,21 +7,21 @@ class CoderTool:
7
7
  """代码修改工具"""
8
8
 
9
9
  name = "coder"
10
- description = "用于自动修改和生成代码的工具"
10
+ description = "分析并修改现有代码,用于实现新功能、修复bug、重构代码等。能理解代码上下文并进行精确的代码编辑。"
11
11
  parameters = {
12
12
  "feature": {
13
13
  "type": "string",
14
- "description": "要实现的功能描述",
14
+ "description": "要实现的功能描述或需要修改的内容,例如:'添加日志功能'、'修复内存泄漏'、'优化性能'等",
15
15
  "required": True
16
16
  },
17
17
  "dir": {
18
18
  "type": "string",
19
- "description": "项目根目录",
19
+ "description": "项目根目录,默认为当前目录",
20
20
  "required": False
21
21
  },
22
22
  "language": {
23
23
  "type": "string",
24
- "description": "编程语言",
24
+ "description": "项目的主要编程语言,默认为python",
25
25
  "required": False
26
26
  }
27
27
  }
jarvis/tools/generator.py CHANGED
@@ -42,11 +42,7 @@ class ToolGeneratorTool:
42
42
 
43
43
  def _generate_tool_code(self, tool_name: str, class_name: str, description: str, parameters: Dict) -> str:
44
44
  """使用大模型生成工具代码"""
45
- platform_name = os.getenv("JARVIS_CODEGEN_PLATFORM") or PlatformRegistry.get_global_platform_name()
46
- model = PlatformRegistry.create_platform(platform_name)
47
- model_name = os.getenv("JARVIS_CODEGEN_MODEL")
48
- if model_name:
49
- model.set_model_name(model_name)
45
+ model = PlatformRegistry.get_global_platform_registry().get_codegen_platform()
50
46
 
51
47
  prompt = f"""请生成一个Python工具类的代码,要求如下,除了代码,不要输出任何内容:
52
48
 
jarvis/tools/search.py CHANGED
@@ -8,33 +8,58 @@ from urllib.parse import quote
8
8
  def bing_search(query):
9
9
  try:
10
10
  with sync_playwright() as p:
11
- browser = p.chromium.launch()
12
- page = browser.new_page()
13
- page.goto(
14
- f"https://www.bing.com/search?form=QBRE&q={quote(query)}&cc=US"
11
+ # 启动浏览器时设置参数
12
+ browser = p.chromium.launch(
13
+ headless=True, # 无头模式
14
+ args=['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage']
15
15
  )
16
-
17
- page.wait_for_selector("#b_results", timeout=10000)
18
16
 
17
+ # 创建新页面并设置超时
18
+ page = browser.new_page(
19
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
20
+ viewport={'width': 1920, 'height': 1080}
21
+ )
22
+
23
+ # 设置页面超时
24
+ page.set_default_timeout(60000)
25
+
26
+ # 访问搜索页面
27
+ url = f"https://www.bing.com/search?q={quote(query)}&form=QBLH&sp=-1"
28
+ page.goto(url, wait_until="networkidle")
29
+
30
+ # 等待搜索结果加载
31
+ page.wait_for_selector("#b_results", state="visible", timeout=30000)
32
+
33
+ # 等待一下以确保结果完全加载
34
+ page.wait_for_timeout(1000)
35
+
36
+ # 提取搜索结果
19
37
  summaries = page.evaluate("""() => {
20
- const liElements = Array.from(
21
- document.querySelectorAll("#b_results > .b_algo")
22
- );
23
- return liElements.map((li) => {
24
- const abstractElement = li.querySelector(".b_caption > p");
25
- const linkElement = li.querySelector("a");
26
- const href = linkElement.getAttribute("href");
27
- const title = linkElement.textContent;
28
- const abstract = abstractElement ? abstractElement.textContent : "";
29
- return { href, title, abstract };
30
- });
38
+ const results = [];
39
+ const elements = document.querySelectorAll("#b_results > .b_algo");
40
+
41
+ for (const el of elements) {
42
+ const titleEl = el.querySelector("h2");
43
+ const linkEl = titleEl ? titleEl.querySelector("a") : null;
44
+ const abstractEl = el.querySelector(".b_caption p");
45
+
46
+ if (linkEl) {
47
+ results.push({
48
+ title: titleEl.innerText.trim(),
49
+ href: linkEl.href,
50
+ abstract: abstractEl ? abstractEl.innerText.trim() : ""
51
+ });
52
+ }
53
+ }
54
+ return results;
31
55
  }""")
32
56
 
33
57
  browser.close()
34
- print(summaries)
35
58
  return summaries
59
+
36
60
  except Exception as error:
37
- print("An error occurred:", error)
61
+ PrettyOutput.print(f"搜索出错: {str(error)}", OutputType.ERROR)
62
+ return None
38
63
 
39
64
  class SearchTool:
40
65
  name = "search"
@@ -61,7 +86,7 @@ class SearchTool:
61
86
 
62
87
  def __init__(self):
63
88
  """初始化搜索工具,需要传入语言模型用于信息提取"""
64
- self.model = PlatformRegistry.get_global_platform_registry().get_global_platform()
89
+ self.model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
65
90
  self.webpage_tool = WebpageTool()
66
91
 
67
92
  def _search(self, query: str, max_results: int) -> List[Dict]:
@@ -158,4 +183,46 @@ class SearchTool:
158
183
  return {
159
184
  "success": False,
160
185
  "error": f"搜索失败: {str(e)}"
161
- }
186
+ }
187
+
188
+ def main():
189
+ """命令行直接运行搜索工具"""
190
+ import argparse
191
+ import sys
192
+
193
+ parser = argparse.ArgumentParser(description='Bing搜索工具')
194
+ parser.add_argument('query', help='搜索关键词')
195
+ parser.add_argument('--max', type=int, default=5, help='最大结果数量(默认5)')
196
+ parser.add_argument('--url-only', action='store_true', help='只显示URL')
197
+ args = parser.parse_args()
198
+
199
+ try:
200
+ PrettyOutput.print(f"正在搜索: {args.query}", OutputType.INFO)
201
+
202
+ results = bing_search(args.query)
203
+
204
+ if not results:
205
+ PrettyOutput.print("未找到搜索结果", OutputType.WARNING)
206
+ sys.exit(1)
207
+
208
+ PrettyOutput.print(f"\n找到 {len(results)} 条结果:", OutputType.INFO)
209
+
210
+ for i, result in enumerate(results[:args.max], 1):
211
+ PrettyOutput.print(f"\n{'-'*50}", OutputType.INFO)
212
+ if args.url_only:
213
+ PrettyOutput.print(f"{i}. {result['href']}", OutputType.INFO)
214
+ else:
215
+ PrettyOutput.print(f"{i}. {result['title']}", OutputType.INFO)
216
+ PrettyOutput.print(f"链接: {result['href']}", OutputType.INFO)
217
+ if result['abstract']:
218
+ PrettyOutput.print(f"摘要: {result['abstract']}", OutputType.INFO)
219
+
220
+ except KeyboardInterrupt:
221
+ PrettyOutput.print("\n搜索已取消", OutputType.WARNING)
222
+ sys.exit(1)
223
+ except Exception as e:
224
+ PrettyOutput.print(f"执行出错: {str(e)}", OutputType.ERROR)
225
+ sys.exit(1)
226
+
227
+ if __name__ == "__main__":
228
+ main()
jarvis/utils.py CHANGED
@@ -9,6 +9,7 @@ from colorama import Fore, Style as ColoramaStyle
9
9
  from prompt_toolkit import PromptSession
10
10
  from prompt_toolkit.styles import Style as PromptStyle
11
11
  from prompt_toolkit.formatted_text import FormattedText
12
+ from sentence_transformers import SentenceTransformer
12
13
 
13
14
  # 初始化colorama
14
15
  colorama.init()
@@ -206,4 +207,29 @@ def find_git_root(dir="."):
206
207
  os.chdir(dir)
207
208
  ret = os.popen("git rev-parse --show-toplevel").read().strip()
208
209
  os.chdir(curr_dir)
209
- return ret
210
+ return ret
211
+
212
+ def load_embedding_model():
213
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
214
+ model_name = os.environ.get("JARVIS_EMBEDDING_MODEL", "BAAI/bge-large-zh-v1.5")
215
+ PrettyOutput.print(f"正在加载嵌入模型: {model_name}...", OutputType.INFO)
216
+ try:
217
+ # 首先尝试离线加载
218
+ embedding_model = SentenceTransformer(
219
+ model_name,
220
+ device="cpu",
221
+ cache_folder=os.path.expanduser("~/.cache/huggingface/hub"),
222
+ local_files_only=True
223
+ )
224
+ PrettyOutput.print("使用本地缓存加载模型成功", OutputType.SUCCESS)
225
+ except Exception as local_error:
226
+ PrettyOutput.print(f"本地加载失败,尝试在线下载: {str(local_error)}", OutputType.WARNING)
227
+ # 如果离线加载失败,尝试在线下载
228
+ embedding_model = SentenceTransformer(
229
+ model_name,
230
+ device="cpu",
231
+ cache_folder=os.path.expanduser("~/.cache/huggingface/hub")
232
+ )
233
+ PrettyOutput.print("模型下载并加载成功", OutputType.SUCCESS)
234
+
235
+ return embedding_model
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: jarvis-ai-assistant
3
- Version: 0.1.76
3
+ Version: 0.1.78
4
4
  Summary: Jarvis: An AI assistant that uses tools to interact with the system
5
5
  Home-page: https://github.com/skyfireitdiy/Jarvis
6
6
  Author: skyfire
@@ -35,6 +35,7 @@ Classifier: Programming Language :: Python :: 3.8
35
35
  Classifier: Programming Language :: Python :: 3.9
36
36
  Classifier: Programming Language :: Python :: 3.10
37
37
  Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Operating System :: POSIX :: Linux
38
39
  Requires-Python: >=3.8
39
40
  Description-Content-Type: text/markdown
40
41
  License-File: LICENSE
@@ -48,6 +49,10 @@ Requires-Dist: numpy>=1.24.0
48
49
  Requires-Dist: faiss-cpu>=1.8.0
49
50
  Requires-Dist: sentence-transformers>=2.2.2
50
51
  Requires-Dist: bs4>=0.0.1
52
+ Requires-Dist: PyMuPDF>=1.21.0
53
+ Requires-Dist: python-docx>=0.8.11
54
+ Requires-Dist: tiktoken>=0.3.0
55
+ Requires-Dist: tqdm>=4.65.0
51
56
  Provides-Extra: dev
52
57
  Requires-Dist: pytest; extra == "dev"
53
58
  Requires-Dist: black; extra == "dev"
@@ -124,6 +129,7 @@ Jarvis supports configuration through environment variables that can be set in t
124
129
  |---------|------|--------|------|
125
130
  | JARVIS_PLATFORM | AI platform to use, supports kimi/openai/ai8 etc | kimi | Yes |
126
131
  | JARVIS_MODEL | Model name to use | - | No |
132
+ | JARVIS_THREAD_COUNT | Number of threads for parallel processing | 10 | No |
127
133
  | JARVIS_CODEGEN_PLATFORM | AI platform for code generation | Same as JARVIS_PLATFORM | No |
128
134
  | JARVIS_CODEGEN_MODEL | Model name for code generation | Same as JARVIS_MODEL | No |
129
135
  | JARVIS_CHEAP_PLATFORM | AI platform for cheap operations | Same as JARVIS_PLATFORM | No |
@@ -141,36 +147,47 @@ Jarvis supports configuration through environment variables that can be set in t
141
147
 
142
148
  ## 🎯 Usage
143
149
 
144
- ### Basic Usage
150
+ ### Main Assistant
145
151
  ```bash
146
152
  jarvis
147
153
  ```
148
154
 
149
-
150
- ### With Specific Model
155
+ ### Code Generation
151
156
  ```bash
152
- jarvis -p kimi # Use Kimi platform
153
- jarvis -p openai # Use OpenAI platform
154
- ```
155
-
156
- ### Code Modification
157
- ```bash
158
- jarvis-coder --feature "Add new feature" # Modify code to add new feature
157
+ jarvis-coder
159
158
  ```
160
159
 
161
160
  ### Codebase Search
162
161
  ```bash
163
- jarvis-codebase --search "database connection" # Search codebase
162
+ # Generate codebase index
163
+ jarvis-codebase --generate
164
+
165
+ # Search similar code
166
+ jarvis-codebase --search "your search query"
167
+
168
+ # Ask questions about codebase
169
+ jarvis-codebase --ask "your question"
164
170
  ```
165
171
 
166
- ### Codebase Question
172
+ ### Document Analysis (RAG)
167
173
  ```bash
168
- jarvis-codebase --ask "How to use the database?" # Ask about codebase
174
+ # Build document index
175
+ jarvis-rag --dir /path/to/documents --build
176
+
177
+ # Search documents
178
+ jarvis-rag --query "your search query"
169
179
  ```
170
180
 
171
- ### Keep Chat History
181
+ ### Search Tool
172
182
  ```bash
173
- jarvis --keep-history # Don't delete chat session after completion
183
+ # Basic search
184
+ jarvis-search "your query"
185
+
186
+ # Show only URLs
187
+ jarvis-search "your query" --url-only
188
+
189
+ # Limit results
190
+ jarvis-search "your query" --max 3
174
191
  ```
175
192
 
176
193
  ## 🛠️ Tools
@@ -1,33 +1,35 @@
1
- jarvis/__init__.py,sha256=mqMOFwEAV4m463m3kW2Lz8KGIkWz1vJScSEYFQi4jsc,50
2
- jarvis/agent.py,sha256=10YxntRu9CWp-xZt7PmUYbzj_knwGM0QqyRbM4FaKSk,21473
3
- jarvis/main.py,sha256=7EcSlxa5JFFXBujzKDWdNtwX6axLhFFdJMc2GxTjfdk,6295
4
- jarvis/utils.py,sha256=vZV8sHj0ggZy4Rb8RxIujQhRWgeNEomhqVl4WXmpq7c,7498
1
+ jarvis/__init__.py,sha256=621TTvMVhcxJHyaCKZr7RtlYLVSbouHVNyKql9eFQFY,50
2
+ jarvis/agent.py,sha256=afAGM2SHzuf3ahDMXsIx5kNtfFeAssZVup0GbZVf2iY,19646
3
+ jarvis/main.py,sha256=OJc7e5i988eQLByT7SzX7eoa0HKm4LMg814gZv9hBX8,5487
4
+ jarvis/utils.py,sha256=jvo6ylvrTaSmXWcYY0qTTf14TwCkAhPsCUuIl5WHEuw,8640
5
5
  jarvis/jarvis_codebase/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- jarvis/jarvis_codebase/main.py,sha256=G8ADdTgjZTxEjvP11IBbjHiNWCvuTkFiLkDKq_8UpI0,26947
6
+ jarvis/jarvis_codebase/main.py,sha256=DPMPw8qoKHooydELKwf_bXPa0a3459kdFcZOXdrM-N8,24785
7
7
  jarvis/jarvis_coder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- jarvis/jarvis_coder/main.py,sha256=4YhQ20MD8ntdD5hjmjJbBHYEDPDZ2aPIgAviJCvtOXE,24458
8
+ jarvis/jarvis_coder/main.py,sha256=Hr6T5LHC5n08s9tnOt-2_h8TmzothBUhqcd2-fzwuVI,23790
9
9
  jarvis/models/__init__.py,sha256=mrOt67nselz_H1gX9wdAO4y2DY5WPXzABqJbr5Des8k,63
10
10
  jarvis/models/ai8.py,sha256=vgy-r_3HHxGMAalZrA65VWHC1PuwBTYgtprSgHkCbrk,12557
11
11
  jarvis/models/base.py,sha256=ShV1H8Unee4RMaiFO4idROQA0Hc6wu4dyeRPX5fcszk,1433
12
12
  jarvis/models/kimi.py,sha256=1iTB0Z_WOmCML3Ufsge6jmeKOYvccr7I5lS3JUXymU4,17611
13
13
  jarvis/models/openai.py,sha256=ayaBWAN5VexMcKVrjEPDNB-Q9wx0sCV9Z4BCrvwYJ9w,4315
14
14
  jarvis/models/oyi.py,sha256=X2c5SWDIuQDCCFBcEKbzIWEz3I34eOAi0d1XAFgxlpw,15001
15
- jarvis/models/registry.py,sha256=YpooKSpk5pSWfb5cBDz5wRfPK-abb9uuUZr4WBejqwI,7762
15
+ jarvis/models/registry.py,sha256=Ez3MNCYIlvPxRS5rJrgV7b8BkXtaZ75bCkemPr3L9nw,8156
16
+ jarvis/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ jarvis/rag/main.py,sha256=D3JrRQd4dTyf4ZB0gN131a3D4R933BhlNypimo1ZX7w,20775
16
18
  jarvis/tools/__init__.py,sha256=7Rqyj5hBAv5cWDVr5T9ZTZASO7ssBHeQNm2_4ZARdkA,72
17
19
  jarvis/tools/base.py,sha256=EGRGbdfbLXDLwtyoWdvp9rlxNX7bzc20t0Vc2VkwIEY,652
18
20
  jarvis/tools/codebase_qa.py,sha256=LsowsgL7HBmdBwa7zXcYi_OkwOok4qbnzYWYsuZxHtU,2413
19
- jarvis/tools/coder.py,sha256=ZJfPInKms4Hj3-eQlBwamVsvZ-2nlZ-4jsqJ-tJc6mg,2040
21
+ jarvis/tools/coder.py,sha256=kmotT2Klsug44S51QoSW9DzkxLzcF-XonyYAEoWZV6c,2295
20
22
  jarvis/tools/file_ops.py,sha256=h8g0eT9UvlJf4kt0DLXvdSsjcPj7x19lxWdDApeDfpg,3842
21
- jarvis/tools/generator.py,sha256=vVP3eN5cCDpRXf_fn0skETkPXAW1XZFWx9pt2_ahK48,5999
23
+ jarvis/tools/generator.py,sha256=S1DhHBfhNvF6SrORnlk8Mz210yDiJPuxbfswbX_UACs,5791
22
24
  jarvis/tools/methodology.py,sha256=UG6s5VYRcd9wrKX4cg6f7zJhet5AIcthFGMOAdevBiw,5175
23
25
  jarvis/tools/registry.py,sha256=MeTYNdZNRdhlgABviVxzbDPSgLpwDp2Nx2dGzedRu8U,7212
24
- jarvis/tools/search.py,sha256=1EqOVvLhg2Csh-i03-XeCrusbyfmH69FZ8khwZt8Tow,6131
26
+ jarvis/tools/search.py,sha256=c9dXtyICdl8Lm8shNPNyIx9k67uY0rMF8xnIKu2RsnE,8787
25
27
  jarvis/tools/shell.py,sha256=UPKshPyOaUwTngresUw-ot1jHjQIb4wCY5nkJqa38lU,2520
26
28
  jarvis/tools/sub_agent.py,sha256=rEtAmSVY2ZjFOZEKr5m5wpACOQIiM9Zr_3dT92FhXYU,2621
27
29
  jarvis/tools/webpage.py,sha256=d3w3Jcjcu1ESciezTkz3n3Zf-rp_l91PrVoDEZnckOo,2391
28
- jarvis_ai_assistant-0.1.76.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
29
- jarvis_ai_assistant-0.1.76.dist-info/METADATA,sha256=X1sBPb59GZyR4UjtnFntEQY4SzIe0q_KOgmYt62hMcU,12399
30
- jarvis_ai_assistant-0.1.76.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
31
- jarvis_ai_assistant-0.1.76.dist-info/entry_points.txt,sha256=QNUeqmUJd7nHufel2FO7cRttS1uKFfnbIyObv8eVyOY,140
32
- jarvis_ai_assistant-0.1.76.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
33
- jarvis_ai_assistant-0.1.76.dist-info/RECORD,,
30
+ jarvis_ai_assistant-0.1.78.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
31
+ jarvis_ai_assistant-0.1.78.dist-info/METADATA,sha256=h_0jRKVqkVFOatxdzvE4QkB_mkKxAHONa4dZr5CCXaI,12736
32
+ jarvis_ai_assistant-0.1.78.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
33
+ jarvis_ai_assistant-0.1.78.dist-info/entry_points.txt,sha256=iEvZ0rn-muMxWCNH5QEvw_mTTy_EVhyQyxDxWXqQQVo,174
34
+ jarvis_ai_assistant-0.1.78.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
35
+ jarvis_ai_assistant-0.1.78.dist-info/RECORD,,
@@ -2,3 +2,4 @@
2
2
  jarvis = jarvis.main:main
3
3
  jarvis-codebase = jarvis.jarvis_codebase.main:main
4
4
  jarvis-coder = jarvis.jarvis_coder.main:main
5
+ jarvis-rag = jarvis.rag.main:main