jarvis-ai-assistant 0.1.66__py3-none-any.whl → 0.1.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jarvis/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Jarvis AI Assistant"""
2
2
 
3
- __version__ = "0.1.66"
3
+ __version__ = "0.1.72"
@@ -1,7 +1,5 @@
1
1
  import hashlib
2
2
  import os
3
- import sqlite3
4
- import time
5
3
  import numpy as np
6
4
  import faiss
7
5
  from typing import List, Tuple, Optional
@@ -13,24 +11,29 @@ from jarvis.utils import OutputType, PrettyOutput, find_git_root
13
11
  from jarvis.utils import load_env_from_file
14
12
  import argparse
15
13
  from sentence_transformers import SentenceTransformer
14
+ import pickle
16
15
 
17
16
  class CodeBase:
18
- def __init__(self, root_dir: str, thread_count: int = 10):
17
+ def __init__(self, root_dir: str):
19
18
  load_env_from_file()
20
19
  self.root_dir = root_dir
21
20
  os.chdir(self.root_dir)
22
- self.thread_count = thread_count
21
+ self.thread_count = int(os.environ.get("JARVIS_THREAD_COUNT") or 10)
23
22
  self.cheap_platform = os.environ.get("JARVIS_CHEAP_PLATFORM") or os.environ.get("JARVIS_PLATFORM") or "kimi"
24
23
  self.cheap_model = os.environ.get("JARVIS_CHEAP_MODEL") or os.environ.get("JARVIS_MODEL") or "kimi"
25
24
  self.normal_platform = os.environ.get("JARVIS_PLATFORM") or "kimi"
25
+ self.codegen_platform = os.environ.get("JARVIS_CODEGEN_PLATFORM") or os.environ.get("JARVIS_PLATFORM") or "kimi"
26
+ self.codegen_model = os.environ.get("JARVIS_CODEGEN_MODEL") or os.environ.get("JARVIS_MODEL") or "kimi"
26
27
  self.normal_model = os.environ.get("JARVIS_MODEL") or "kimi"
27
28
  self.embedding_model_name = os.environ.get("JARVIS_EMBEDDING_MODEL") or "BAAI/bge-large-zh-v1.5"
28
- if not self.cheap_platform or not self.cheap_model or not self.embedding_model_name or not self.normal_platform or not self.normal_model:
29
- raise ValueError("JARVIS_CHEAP_PLATFORM or JARVIS_CHEAP_MODEL or JARVIS_EMBEDDING_MODEL or JARVIS_PLATFORM or JARVIS_MODEL is not set")
29
+ if not self.cheap_platform or not self.cheap_model or not self.codegen_platform or not self.codegen_model or not self.embedding_model_name or not self.normal_platform or not self.normal_model:
30
+ raise ValueError("JARVIS_CHEAP_PLATFORM or JARVIS_CHEAP_MODEL or JARVIS_CODEGEN_PLATFORM or JARVIS_CODEGEN_MODEL or JARVIS_EMBEDDING_MODEL or JARVIS_PLATFORM or JARVIS_MODEL is not set")
30
31
 
31
32
  PrettyOutput.print(f"廉价模型使用平台: {self.cheap_platform} 模型: {self.cheap_model}", output_type=OutputType.INFO)
33
+ PrettyOutput.print(f"代码生成模型使用平台: {self.codegen_platform} 模型: {self.codegen_model}", output_type=OutputType.INFO)
32
34
  PrettyOutput.print(f"分析模型使用平台: {self.normal_platform} 模型: {self.normal_model}", output_type=OutputType.INFO)
33
35
  PrettyOutput.print(f"嵌入模型: {self.embedding_model_name}", output_type=OutputType.INFO)
36
+ PrettyOutput.print(f"索引建立线程数: {self.thread_count}", output_type=OutputType.INFO)
34
37
  PrettyOutput.print(f"检索算法:分层导航小世界算法", output_type=OutputType.INFO)
35
38
 
36
39
  # 初始化数据目录
@@ -40,6 +43,7 @@ class CodeBase:
40
43
 
41
44
  # 初始化嵌入模型,使用系统默认缓存目录
42
45
  try:
46
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
43
47
  PrettyOutput.print("正在加载/下载模型,请稍候...", output_type=OutputType.INFO)
44
48
  self.embedding_model = SentenceTransformer(self.embedding_model_name)
45
49
 
@@ -59,59 +63,37 @@ class CodeBase:
59
63
 
60
64
  self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
61
65
 
62
-
63
- self.db_path = os.path.join(self.data_dir, "codebase.db")
64
- if not os.path.exists(self.db_path):
65
- self.create_db()
66
66
  self.git_file_list = self.get_git_file_list()
67
67
  self.platform_registry = PlatformRegistry().get_global_platform_registry()
68
- self.index_path = os.path.join(self.data_dir, "vectors.index")
69
- self.index = None
70
- if os.path.exists(self.index_path):
71
- PrettyOutput.print("正在加载向量数据库", output_type=OutputType.INFO)
72
- self.index = faiss.read_index(self.index_path)
68
+
69
+ # 初始化缓存和索引
70
+ self.cache_path = os.path.join(self.data_dir, "cache.pkl")
71
+ self.vector_cache = {}
72
+ self.file_paths = []
73
+
74
+ # 加载缓存
75
+ if os.path.exists(self.cache_path):
76
+ try:
77
+ with open(self.cache_path, 'rb') as f:
78
+ cache_data = pickle.load(f)
79
+ self.vector_cache = cache_data["vectors"]
80
+ self.file_paths = cache_data["file_paths"]
81
+ PrettyOutput.print(f"加载了 {len(self.vector_cache)} 个向量缓存",
82
+ output_type=OutputType.INFO)
83
+ # 从缓存重建索引
84
+ self.build_index()
85
+ except Exception as e:
86
+ PrettyOutput.print(f"加载缓存失败: {str(e)}",
87
+ output_type=OutputType.WARNING)
88
+ self.vector_cache = {}
89
+ self.file_paths = []
90
+ self.index = None
73
91
 
74
92
  def get_git_file_list(self):
75
- return os.popen("git ls-files").read().splitlines()
76
-
77
- def get_db_connection(self):
78
- """创建并返回一个新的数据库连接"""
79
- return sqlite3.connect(self.db_path)
80
-
81
- def clean_db(self) -> bool:
82
- """清理数据库和向量索引中的过期记录"""
83
- db = self.get_db_connection()
84
- try:
85
- # 获取所有数据库记录
86
- all_records = db.execute("SELECT path FROM codebase").fetchall()
87
- files_to_delete = []
88
-
89
- # 找出需要删除的文件
90
- for row in all_records:
91
- if row[0] not in self.git_file_list:
92
- files_to_delete.append(row[0])
93
-
94
- if not files_to_delete:
95
- return False
96
-
97
- for file_path in files_to_delete:
98
- db.execute("DELETE FROM codebase WHERE path = ?", (file_path,))
99
-
100
- db.commit()
101
-
102
- PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的记录",
103
- output_type=OutputType.INFO)
104
- return True
105
- finally:
106
- db.close()
107
-
108
- def create_db(self):
109
- db = self.get_db_connection()
110
- try:
111
- db.execute("CREATE TABLE IF NOT EXISTS codebase (path TEXT, md5 TEXT ,description TEXT)")
112
- db.commit()
113
- finally:
114
- db.close()
93
+ """获取 git 仓库中的文件列表,排除 .jarvis-codebase 目录"""
94
+ files = os.popen("git ls-files").read().splitlines()
95
+ # 过滤掉 .jarvis-codebase 目录下的文件
96
+ return [f for f in files if not f.startswith(".jarvis-codebase/")]
115
97
 
116
98
  def is_text_file(self, file_path: str):
117
99
  with open(file_path, "r", encoding="utf-8") as f:
@@ -144,6 +126,74 @@ class CodeBase:
144
126
  response = model.chat(prompt)
145
127
  return response
146
128
 
129
+ def save_cache(self):
130
+ """保存缓存数据"""
131
+ try:
132
+ cache_data = {
133
+ "vectors": self.vector_cache,
134
+ "file_paths": self.file_paths
135
+ }
136
+ with open(self.cache_path, 'wb') as f:
137
+ pickle.dump(cache_data, f)
138
+ PrettyOutput.print(f"保存了 {len(self.vector_cache)} 个向量缓存",
139
+ output_type=OutputType.INFO)
140
+ except Exception as e:
141
+ PrettyOutput.print(f"保存缓存失败: {str(e)}",
142
+ output_type=OutputType.ERROR)
143
+
144
+ def get_cached_vector(self, file_path: str, description: str) -> Optional[np.ndarray]:
145
+ """从缓存获取文件的向量表示"""
146
+ if file_path not in self.vector_cache:
147
+ return None
148
+
149
+ # 检查文件是否被修改
150
+ try:
151
+ with open(file_path, "rb") as f:
152
+ current_md5 = hashlib.md5(f.read()).hexdigest()
153
+ except Exception as e:
154
+ PrettyOutput.print(f"计算文件MD5失败 {file_path}: {str(e)}",
155
+ output_type=OutputType.ERROR)
156
+ return None
157
+
158
+ cached_data = self.vector_cache[file_path]
159
+ if cached_data["md5"] != current_md5:
160
+ return None
161
+
162
+ # 检查描述是否变化
163
+ if cached_data["description"] != description:
164
+ return None
165
+
166
+ return cached_data["vector"]
167
+
168
+ def cache_vector(self, file_path: str, vector: np.ndarray, description: str):
169
+ """缓存文件的向量表示"""
170
+ try:
171
+ with open(file_path, "rb") as f:
172
+ file_md5 = hashlib.md5(f.read()).hexdigest()
173
+ except Exception as e:
174
+ PrettyOutput.print(f"计算文件MD5失败 {file_path}: {str(e)}",
175
+ output_type=OutputType.ERROR)
176
+ file_md5 = ""
177
+
178
+ self.vector_cache[file_path] = {
179
+ "path": file_path, # 保存文件路径
180
+ "md5": file_md5, # 保存文件MD5
181
+ "description": description, # 保存文件描述
182
+ "vector": vector # 保存向量
183
+ }
184
+
185
+ # 保存缓存到文件
186
+ try:
187
+ with open(self.cache_path, 'wb') as f:
188
+ cache_data = {
189
+ "vectors": self.vector_cache,
190
+ "file_paths": self.file_paths
191
+ }
192
+ pickle.dump(cache_data, f)
193
+ except Exception as e:
194
+ PrettyOutput.print(f"保存向量缓存失败: {str(e)}",
195
+ output_type=OutputType.ERROR)
196
+
147
197
  def get_embedding(self, text: str) -> np.ndarray:
148
198
  """使用 transformers 模型获取文本的向量表示"""
149
199
  # 对长文本进行截断
@@ -152,163 +202,291 @@ class CodeBase:
152
202
 
153
203
  # 获取嵌入向量
154
204
  embedding = self.embedding_model.encode(text,
155
- normalize_embeddings=True, # L2归一化
156
- show_progress_bar=False)
157
- return np.array(embedding, dtype=np.float32)
205
+ normalize_embeddings=True, # L2归一化
206
+ show_progress_bar=False)
207
+ vector = np.array(embedding, dtype=np.float32)
208
+ return vector
158
209
 
159
210
  def vectorize_file(self, file_path: str, description: str) -> np.ndarray:
160
211
  """将文件内容和描述向量化"""
161
- try:
212
+ try:
213
+ # 先尝试从缓存获取
214
+ cached_vector = self.get_cached_vector(file_path, description)
215
+ if cached_vector is not None:
216
+ return cached_vector
217
+
162
218
  # 组合文件信息
163
219
  combined_text = f"""
164
220
  文件路径: {file_path}
165
221
  文件描述: {description}
166
222
  """
167
- return self.get_embedding(combined_text)
223
+ vector = self.get_embedding(combined_text)
224
+
225
+ # 保存到缓存
226
+ self.cache_vector(file_path, vector, description)
227
+ return vector
168
228
  except Exception as e:
169
229
  PrettyOutput.print(f"Error vectorizing file {file_path}: {str(e)}",
170
230
  output_type=OutputType.ERROR)
171
231
  return np.zeros(self.vector_dim, dtype=np.float32)
172
232
 
173
- def process_file(self, file):
174
- """处理单个文件的辅助方法"""
175
- db = self.get_db_connection()
233
+ def clean_cache(self) -> bool:
234
+ """清理过期的缓存记录,返回是否有文件被删除"""
235
+ files_to_delete = []
236
+ for file_path in list(self.vector_cache.keys()):
237
+ if file_path not in self.git_file_list:
238
+ del self.vector_cache[file_path]
239
+ files_to_delete.append(file_path)
240
+
241
+ if files_to_delete:
242
+ self.save_cache()
243
+ PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的缓存",
244
+ output_type=OutputType.INFO)
245
+ return True
246
+ return False
247
+
248
+ def process_file(self, file_path: str):
249
+ """处理单个文件"""
176
250
  try:
177
- if not self.is_text_file(file):
251
+ # 跳过不存在的文件
252
+ if not os.path.exists(file_path):
178
253
  return None
179
- md5 = hashlib.md5(open(file, "rb").read()).hexdigest()
180
- if db.execute("SELECT path FROM codebase WHERE md5 = ?", (md5,)).fetchone():
254
+
255
+ if not self.is_text_file(file_path):
181
256
  return None
182
- description = self.make_description(file)
183
- return (file, md5, description)
184
- finally:
185
- db.close()
257
+
258
+ md5 = hashlib.md5(open(file_path, "rb").read()).hexdigest()
259
+
260
+ # 检查文件是否已经处理过且内容未变
261
+ if file_path in self.vector_cache:
262
+ if self.vector_cache[file_path].get("md5") == md5:
263
+ return None
264
+
265
+ description = self.make_description(file_path)
266
+ vector = self.vectorize_file(file_path, description)
267
+
268
+ # 保存到缓存,使用实际文件路径作为键
269
+ self.vector_cache[file_path] = {
270
+ "vector": vector,
271
+ "description": description,
272
+ "md5": md5
273
+ }
274
+
275
+ return file_path
276
+
277
+ except Exception as e:
278
+ PrettyOutput.print(f"处理文件失败 {file_path}: {str(e)}",
279
+ output_type=OutputType.ERROR,
280
+ traceback=True)
281
+ return None
186
282
 
187
- def gen_vector_db_from_sqlite(self):
188
- self.index = faiss.IndexHNSWFlat(self.vector_dim, 16)
189
- self.index.hnsw.efConstruction = 40
190
- self.index.hnsw.efSearch = 16
191
- db = self.get_db_connection()
192
- try:
193
- all_records = db.execute("SELECT path, description FROM codebase").fetchall()
194
- for row in all_records:
195
- file, description = row
196
- PrettyOutput.print(f"正在向量化文件: {file}", output_type=OutputType.INFO)
197
- vector = self.vectorize_file(file, description)
198
- vector = vector.reshape(1, -1)
199
- self.index.add(vector)
200
- faiss.write_index(self.index, self.index_path)
201
- finally:
202
- db.close()
283
+ def build_index(self):
284
+ """从向量缓存构建 faiss 索引"""
285
+ # 创建底层 HNSW 索引
286
+ hnsw_index = faiss.IndexHNSWFlat(self.vector_dim, 16)
287
+ hnsw_index.hnsw.efConstruction = 40
288
+ hnsw_index.hnsw.efSearch = 16
289
+
290
+ # IndexIDMap 包装 HNSW 索引
291
+ self.index = faiss.IndexIDMap(hnsw_index)
292
+
293
+ vectors = []
294
+ ids = []
295
+ self.file_paths = [] # 重置文件路径列表
296
+
297
+ for i, (file_path, data) in enumerate(self.vector_cache.items()):
298
+ vectors.append(data["vector"].reshape(1, -1))
299
+ ids.append(i)
300
+ self.file_paths.append(file_path)
301
+
302
+ if vectors:
303
+ vectors = np.vstack(vectors)
304
+ self.index.add_with_ids(vectors, np.array(ids))
305
+ else:
306
+ self.index = None
307
+
308
+ def gen_vector_db_from_cache(self):
309
+ """从缓存生成向量数据库"""
310
+ self.build_index()
311
+ self.save_cache()
203
312
 
204
313
  def generate_codebase(self):
205
- updated =self.clean_db()
206
- db_lock = Lock()
207
- processed_files = [] # 用于跟踪已处理的文件
208
-
209
- def process_and_save(file):
210
- result = self.process_file(file)
211
- if result:
212
- file, md5, description = result
213
- db = self.get_db_connection()
214
- try:
215
- with db_lock:
216
- db.execute("DELETE FROM codebase WHERE path = ?", (file,))
217
- db.execute("INSERT INTO codebase (path, md5, description) VALUES (?, ?, ?)",
218
- (file, md5, description))
219
- db.commit()
220
- PrettyOutput.print(f"索引文件: {file}", output_type=OutputType.INFO)
221
- processed_files.append(file)
222
- finally:
223
- db.close()
314
+ """生成代码库索引"""
315
+ files_deleted = self.clean_cache() # 清理过期缓存
316
+ processed_files = []
224
317
 
225
- # 使用 ThreadPoolExecutor 并等待所有任务完成
318
+ # 使用线程池处理文件
226
319
  with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
227
- futures = [executor.submit(process_and_save, file) for file in self.git_file_list]
228
- # 等待所有任务完成
229
- concurrent.futures.wait(futures)
320
+ futures = [executor.submit(self.process_file, file) for file in self.git_file_list]
321
+ for future in concurrent.futures.as_completed(futures):
322
+ result = future.result()
323
+ if result:
324
+ processed_files.append(result)
325
+ PrettyOutput.print(f"索引文件: {result}", output_type=OutputType.INFO)
230
326
 
231
- if updated or len(processed_files) > 0:
232
- PrettyOutput.print("有新的文件被删除或添加,正在重新生成向量数据库", output_type=OutputType.INFO)
233
- self.gen_vector_db_from_sqlite()
327
+ if files_deleted or processed_files:
328
+ PrettyOutput.print("重新生成向量数据库", output_type=OutputType.INFO)
329
+ self.gen_vector_db_from_cache()
234
330
  else:
235
- PrettyOutput.print("没有新的文件被删除或添加,跳过向量数据库生成", output_type=OutputType.INFO)
331
+ PrettyOutput.print("没有新的文件变更,跳过向量数据库生成", output_type=OutputType.INFO)
236
332
 
237
- PrettyOutput.print(f"成功索引 {len(processed_files)} 个文件", output_type=OutputType.INFO)
333
+ PrettyOutput.print(f"成功为 {len(processed_files)} 个文件生成索引", output_type=OutputType.INFO)
238
334
 
239
- def search_similar(self, query: str, top_k: int = 5) -> List[Tuple[str, float, str]]:
240
- """搜索与查询最相似的文件
241
-
242
- Args:
243
- query: 查询文本
244
- top_k: 返回结果数量
335
+ def rerank_results(self, query: str, initial_results: List[Tuple[str, float, str]]) -> List[Tuple[str, float, str]]:
336
+ """使用大模型对搜索结果重新排序"""
337
+ if not initial_results:
338
+ return []
339
+
340
+ model = self.platform_registry.create_platform(self.normal_platform)
341
+ model.set_model_name(self.normal_model)
342
+ model.set_suppress_output(True)
343
+
344
+ try:
345
+ # 构建重排序的prompt
346
+ prompt = f"""请根据用户的查询,对以下代码文件进行相关性排序。对每个文件给出0-100的相关性分数,分数越高表示越相关。
347
+ 只需要输出每个文件的分数,格式为:
348
+ <RERANK_START>
349
+ 文件路径: 分数
350
+ 文件路径: 分数
351
+ <RERANK_END>
352
+
353
+ 用户查询: {query}
354
+
355
+ 待评估文件:
356
+ """
357
+ for path, _, desc in initial_results:
358
+ prompt += f"""
359
+ 文件: {path}
360
+ 描述: {desc}
361
+ ---
362
+ """
363
+
364
+ response = model.chat(prompt)
365
+
366
+ # 提取<RERANK_START>和<RERANK_END>之间的内容
367
+ start_tag = "<RERANK_START>"
368
+ end_tag = "<RERANK_END>"
369
+ if start_tag in response and end_tag in response:
370
+ response = response[response.find(start_tag) + len(start_tag):response.find(end_tag)]
371
+
372
+ # 解析响应,提取文件路径和分数
373
+ scored_results = []
374
+ for line in response.split('\n'):
375
+ if ':' not in line:
376
+ continue
377
+ try:
378
+ file_path, score_str = line.split(':', 1)
379
+ file_path = file_path.strip()
380
+ score = float(score_str.strip())
381
+ # 找到对应的原始描述
382
+ desc = next((desc for p, _, desc in initial_results if p == file_path), "")
383
+ scored_results.append((file_path, score/100.0, desc))
384
+ except:
385
+ continue
245
386
 
246
- Returns:
247
- List of (file_path, similarity_score, description) tuples
248
- """
249
- # 获取查询文本的向量表示
250
- query_vector = self.get_embedding(query)
251
- query_vector = query_vector.reshape(1, -1)
252
-
253
- # 搜索最相似的向量
254
- distances, indices = self.index.search(query_vector, top_k)
387
+ # 按分数降序排序
388
+ return sorted(scored_results, key=lambda x: x[1], reverse=True)
389
+
390
+ finally:
391
+ model.delete_chat()
392
+
393
+ return initial_results
394
+
395
+ def search_similar(self, query: str, top_k: int = 20) -> List[Tuple[str, float, str]]:
396
+ """搜索相似文件"""
397
+ model = self.platform_registry.create_platform(self.normal_platform)
398
+ model.set_model_name(self.normal_model)
399
+ model.set_suppress_output(True)
255
400
 
256
- # 获取对应的文件信息
257
- db = self.get_db_connection()
258
401
  try:
259
- results = []
260
- for i, distance in zip(indices[0], distances[0]):
261
- if i == -1: # faiss返回-1表示无效结果
262
- continue
263
-
264
- # 将numpy.int64转换为Python int
265
- offset = int(i)
266
- # 获取文件路径和描述
267
- cursor = db.execute("SELECT path, description FROM codebase LIMIT 1 OFFSET ?", (offset,))
268
- row = cursor.fetchone()
269
- if row:
270
- path, description = row
271
- # 将distance转换为相似度分数(0-1之间)
272
- similarity = 1.0 / (1.0 + float(distance)) # 确保使用Python float
273
- results.append((path, similarity, description))
402
+ prompt = f"""请根据以下查询,生成意思完全相同的另一个表述。这个表述将用于代码搜索,所以要保持专业性和准确性。
403
+ 原始查询: {query}
404
+
405
+ 请直接输出新表述,不要有编号或其他标记。
406
+ """
407
+
408
+ query = model.chat(prompt)
274
409
 
275
- return results
276
410
  finally:
277
- db.close()
411
+ model.delete_chat()
412
+
413
+ PrettyOutput.print(f"查询: {query}", output_type=OutputType.INFO)
414
+
415
+ # 为每个查询获取相似文件
416
+ all_results = {} # 文件路径 -> (总分数, 出现次数, 描述)
417
+
418
+ q_vector = self.get_embedding(query)
419
+ q_vector = q_vector.reshape(1, -1)
420
+
421
+ distances, indices = self.index.search(q_vector, top_k)
422
+
423
+ PrettyOutput.print(f"查询 {query} 的结果: ", output_type=OutputType.INFO)
424
+
425
+ initial_results = []
426
+
427
+ for i, distance in zip(indices[0], distances[0]):
428
+ if i == -1: # faiss返回-1表示无效结果
429
+ continue
430
+
431
+ similarity = 1.0 / (1.0 + float(distance))
432
+ PrettyOutput.print(f" {self.file_paths[i]} : 距离 {distance:.3f}, 相似度 {similarity:.3f}",
433
+ output_type=OutputType.INFO)
434
+
435
+ file_path = self.file_paths[i]
436
+ data = self.vector_cache[file_path]
437
+ initial_results.append((file_path, similarity, data["description"]))
438
+
439
+ # 使用大模型重新排序
440
+ PrettyOutput.print("使用大模型重新排序...", output_type=OutputType.INFO)
441
+ reranked_results = self.rerank_results(query, initial_results)
442
+
443
+ return reranked_results
278
444
 
279
- def ask_codebase(self, query: str, top_k: int = 5) -> List[Tuple[str, float, str]]:
280
- """Ask a question about the codebase"""
281
- # 使用搜索函数获取相似文件
445
+ def ask_codebase(self, query: str, top_k: int=20) -> str:
446
+ """查询代码库"""
282
447
  results = self.search_similar(query, top_k)
283
- PrettyOutput.print(f"找到的关联文件: ", output_type=OutputType.INFO)
448
+ PrettyOutput.print(f"找到的关联文件: ", output_type=OutputType.SUCCESS)
284
449
  for path, score, _ in results:
285
- PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}", output_type=OutputType.INFO)
450
+ PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}",
451
+ output_type=OutputType.INFO)
286
452
 
287
453
  prompt = f"""你是一个代码专家,请根据以下文件信息回答用户的问题:
288
454
  """
289
455
  for path, _, _ in results:
290
- content = open(path, "r", encoding="utf-8").read()
291
- prompt += f"""
292
- 文件路径: {path}
456
+ try:
457
+ if len(prompt) > 30 * 1024:
458
+ PrettyOutput.print(f"避免上下文超限,丢弃低相关度文件:{path}", OutputType.WARNING)
459
+ continue
460
+ content = open(path, "r", encoding="utf-8").read()
461
+ prompt += f"""
462
+ 文件路径: {path}prompt
293
463
  文件内容:
294
464
  {content}
295
465
  ========================================
296
466
  """
467
+ except Exception as e:
468
+ PrettyOutput.print(f"读取文件失败 {path}: {str(e)}",
469
+ output_type=OutputType.ERROR)
470
+ continue
471
+
297
472
  prompt += f"""
298
473
  用户问题: {query}
299
474
 
300
475
  请用专业的语言回答用户的问题,如果给出的文件内容不足以回答用户的问题,请告诉用户,绝对不要胡编乱造。
301
476
  """
302
- model = self.platform_registry.create_platform(self.normal_platform)
303
- model.set_model_name(self.normal_model)
304
- response = model.chat(prompt)
305
- return response
477
+ model = self.platform_registry.create_platform(self.codegen_platform)
478
+ model.set_model_name(self.codegen_model)
479
+ try:
480
+ response = model.chat(prompt)
481
+ return response
482
+ finally:
483
+ model.delete_chat()
306
484
 
307
485
 
308
486
  def main():
309
487
  parser = argparse.ArgumentParser(description='Codebase management and search tool')
310
488
  parser.add_argument('--search', type=str, help='Search query to find similar code files')
311
- parser.add_argument('--top-k', type=int, default=5, help='Number of results to return (default: 5)')
489
+ parser.add_argument('--top-k', type=int, default=20, help='Number of results to return (default: 20)')
312
490
  parser.add_argument('--ask', type=str, help='Ask a question about the codebase')
313
491
  args = parser.parse_args()
314
492
 
File without changes
@@ -1,11 +1,8 @@
1
- from concurrent.futures import ThreadPoolExecutor, as_completed
2
- import hashlib
3
1
  import os
4
2
  import re
5
- import sqlite3
6
3
  import threading
7
4
  import time
8
- from typing import Dict, Any, List, Optional, Tuple
5
+ from typing import Dict, Any, List, Tuple
9
6
 
10
7
  import yaml
11
8
  from jarvis.models.base import BasePlatform
@@ -127,7 +124,7 @@ class JarvisCoder:
127
124
  return [patch.replace('<PATCH_START>', '').replace('<PATCH_END>', '').strip()
128
125
  for patch in patches if patch.strip()]
129
126
  except Exception as e:
130
- PrettyOutput.print(f"解析patch失败: {str(e)}", OutputType.ERROR)
127
+ PrettyOutput.print(f"解析patch失败: {str(e)}", OutputType.WARNING)
131
128
  return []
132
129
 
133
130
  def _make_patch(self, related_files: List[Dict], feature: str) -> List[str]:
@@ -141,20 +138,31 @@ class JarvisCoder:
141
138
  要替换的内容
142
139
  =======
143
140
  新的内容
144
- <<<<<<
141
+ >>>>>>
145
142
  <PATCH_END>
146
143
 
147
- 2. 如果是新文件,格式如下:
144
+ 2. 如果是新文件或者替换整个文件内容,格式如下:
148
145
  <PATCH_START>
149
146
  >>>>>> path/to/new/file
150
147
  =======
151
148
  新文件的完整内容
152
- <<<<<<
149
+ >>>>>>
150
+ <PATCH_END>
151
+
152
+ 3. 如果要删除文件中的某一段,格式如下:
153
+ <PATCH_START>
154
+ >>>>>> path/to/file
155
+ 要删除的内容
156
+ =======
157
+ >>>>>>
153
158
  <PATCH_END>
154
159
 
155
160
  文件列表如下:
156
161
  """
157
162
  for i, file in enumerate(related_files):
163
+ if len(prompt) > 30 * 1024:
164
+ PrettyOutput.print(f'避免上下文超限,丢弃低相关度文件:{file["file_path"]}', OutputType.WARNING)
165
+ continue
158
166
  prompt += f"""{i}. {file["file_path"]}\n"""
159
167
  prompt += f"""文件内容:\n"""
160
168
  prompt += f"<FILE_CONTENT_START>\n"
@@ -180,7 +188,7 @@ class JarvisCoder:
180
188
  return [patch.replace('<PATCH_START>', '').replace('<PATCH_END>', '').strip()
181
189
  for patch in patches if patch.strip()]
182
190
  except Exception as e:
183
- PrettyOutput.print(f"解析patch失败: {str(e)}", OutputType.ERROR)
191
+ PrettyOutput.print(f"解析patch失败: {str(e)}", OutputType.WARNING)
184
192
  return []
185
193
 
186
194
  def _apply_patch(self, related_files: List[Dict], patches: List[str]) -> Tuple[bool, str]:
@@ -219,7 +227,7 @@ class JarvisCoder:
219
227
  return False, "\n".join(error_info)
220
228
 
221
229
  old_content = parts[0]
222
- new_content = parts[1].split("<<<<<<")[0]
230
+ new_content = parts[1].split(">>>>>>")[0]
223
231
 
224
232
  # 处理新文件
225
233
  if not old_content:
@@ -308,9 +316,9 @@ class JarvisCoder:
308
316
  def _load_related_files(self, feature: str) -> List[Dict]:
309
317
  """加载相关文件内容"""
310
318
  ret = []
311
- related_files = self._codebase.search_similar(feature, top_k=5)
319
+ related_files = self._codebase.search_similar(feature)
312
320
  for file, score, _ in related_files:
313
- PrettyOutput.print(f"相关文件: {file} 相关度: {score:.3f}", OutputType.INFO)
321
+ PrettyOutput.print(f"相关文件: {file} 相关度: {score:.3f}", OutputType.SUCCESS)
314
322
  with open(file, "r", encoding="utf-8") as f:
315
323
  content = f.read()
316
324
  ret.append({"file_path": file, "file_content": content})
@@ -441,7 +449,7 @@ def main():
441
449
  PrettyOutput.print(result["stdout"], OutputType.SUCCESS)
442
450
  else:
443
451
  if result["stderr"]:
444
- PrettyOutput.print(result["stderr"], OutputType.ERROR)
452
+ PrettyOutput.print(result["stderr"], OutputType.WARNING)
445
453
  if result["error"]:
446
454
  PrettyOutput.print(f"错误类型: {type(result['error']).__name__}", OutputType.WARNING)
447
455
 
jarvis/models/registry.py CHANGED
@@ -121,7 +121,7 @@ class PlatformRegistry:
121
121
  # 检查平台实现
122
122
  if not PlatformRegistry.check_platform_implementation(obj):
123
123
  continue
124
-
124
+ PrettyOutput.print(f"从 {os.path.join(directory, filename)} 加载平台:{obj.platform_name}", OutputType.SUCCESS)
125
125
  platforms[obj.platform_name] = obj
126
126
  break
127
127
  except Exception as e:
jarvis/tools/__init__.py CHANGED
@@ -1,9 +1,6 @@
1
1
  from .registry import ToolRegistry
2
- from jarvis.tools.codebase_qa import CodebaseQATool
3
2
 
4
3
  __all__ = [
5
4
  'ToolRegistry',
6
5
  ]
7
6
 
8
- def register_tools():
9
- register_tool(CodebaseQATool())
jarvis/tools/registry.py CHANGED
@@ -99,7 +99,7 @@ class ToolRegistry:
99
99
  parameters=tool_instance.parameters,
100
100
  func=tool_instance.execute
101
101
  )
102
- PrettyOutput.print(f"从 {file_path} 加载工具: {tool_instance.name}: {tool_instance.description}", OutputType.INFO)
102
+ PrettyOutput.print(f"从 {file_path} 加载工具: {tool_instance.name}: {tool_instance.description}", OutputType.SUCCESS)
103
103
  tool_found = True
104
104
  break
105
105
 
jarvis/utils.py CHANGED
@@ -158,7 +158,7 @@ def get_multiline_input(tip: str) -> str:
158
158
  lines.append(line)
159
159
 
160
160
  except KeyboardInterrupt:
161
- PrettyOutput.print("\n输入已取消", OutputType.ERROR)
161
+ PrettyOutput.print("\n输入已取消", OutputType.INFO)
162
162
  return "__interrupt__"
163
163
 
164
164
  return "\n".join(lines)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: jarvis-ai-assistant
3
- Version: 0.1.66
3
+ Version: 0.1.72
4
4
  Summary: Jarvis: An AI assistant that uses tools to interact with the system
5
5
  Home-page: https://github.com/skyfireitdiy/Jarvis
6
6
  Author: skyfire
@@ -44,9 +44,10 @@ Requires-Dist: colorama>=0.4.6
44
44
  Requires-Dist: prompt_toolkit>=3.0.0
45
45
  Requires-Dist: openai>=1.20.0
46
46
  Requires-Dist: playwright>=1.41.1
47
- Requires-Dist: numpy>=1.26.0
48
- Requires-Dist: faiss-cpu>=1.8.1
47
+ Requires-Dist: numpy>=1.24.0
48
+ Requires-Dist: faiss-cpu>=1.8.0
49
49
  Requires-Dist: sentence-transformers>=2.2.2
50
+ Requires-Dist: bs4>=0.0.1
50
51
  Provides-Extra: dev
51
52
  Requires-Dist: pytest; extra == "dev"
52
53
  Requires-Dist: black; extra == "dev"
@@ -123,7 +124,6 @@ Jarvis supports configuration through environment variables that can be set in t
123
124
  |---------|------|--------|------|
124
125
  | JARVIS_PLATFORM | AI platform to use, supports kimi/openai/ai8 etc | kimi | Yes |
125
126
  | JARVIS_MODEL | Model name to use | - | No |
126
-
127
127
  | JARVIS_CODEGEN_PLATFORM | AI platform for code generation | Same as JARVIS_PLATFORM | No |
128
128
  | JARVIS_CODEGEN_MODEL | Model name for code generation | Same as JARVIS_MODEL | No |
129
129
  | JARVIS_CHEAP_PLATFORM | AI platform for cheap operations | Same as JARVIS_PLATFORM | No |
@@ -155,17 +155,17 @@ jarvis -p openai # Use OpenAI platform
155
155
 
156
156
  ### Code Modification
157
157
  ```bash
158
- jarvis coder --feature "Add new feature" # Modify code to add new feature
158
+ jarvis-coder --feature "Add new feature" # Modify code to add new feature
159
159
  ```
160
160
 
161
161
  ### Codebase Search
162
162
  ```bash
163
- jarvis codebase --search "database connection" # Search codebase
163
+ jarvis-codebase --search "database connection" # Search codebase
164
164
  ```
165
165
 
166
166
  ### Codebase Question
167
167
  ```bash
168
- jarvis codebase --ask "How to use the database?" # Ask about codebase
168
+ jarvis-codebase --ask "How to use the database?" # Ask about codebase
169
169
  ```
170
170
 
171
171
  ### Keep Chat History
@@ -1,32 +1,33 @@
1
- jarvis/__init__.py,sha256=XwQusgz9_jvC5jEMwbV21qnY-dFIO3yH5W9FEENNxsU,50
1
+ jarvis/__init__.py,sha256=MyFCehMdftT6QiIVeyWoT3cnA4reDk6ObCHGRDEr7JY,50
2
2
  jarvis/agent.py,sha256=kl6pwNrluzb-9eZKgwmsk5Jh4CpWi4F8B3RvEQNvc5U,14921
3
3
  jarvis/main.py,sha256=7EcSlxa5JFFXBujzKDWdNtwX6axLhFFdJMc2GxTjfdk,6295
4
- jarvis/utils.py,sha256=bjC0PAR58RvcXHgabIFmNmYL1L_GhhiEwMFytWurcN4,7499
4
+ jarvis/utils.py,sha256=vZV8sHj0ggZy4Rb8RxIujQhRWgeNEomhqVl4WXmpq7c,7498
5
5
  jarvis/jarvis_codebase/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- jarvis/jarvis_codebase/main.py,sha256=bncfOXKFthcr21cV9tONXzcchSWaaYXmo0o11R5hPzc,14799
7
- jarvis/jarvis_coder/main.py,sha256=mK68MJyOerVnY7Fr9ibQ1swQLVIWyBAFmmVj08SHKyk,21825
6
+ jarvis/jarvis_codebase/main.py,sha256=A-KY9WzAIsCF0QPndm4b8-rAKEz4nLlm4NkfUMpnQUM,21965
7
+ jarvis/jarvis_coder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ jarvis/jarvis_coder/main.py,sha256=uQ7aQ3hmnxecacjLSBOV0rSLkacExGLgQRc9zD1ar1E,22085
8
9
  jarvis/models/__init__.py,sha256=mrOt67nselz_H1gX9wdAO4y2DY5WPXzABqJbr5Des8k,63
9
10
  jarvis/models/ai8.py,sha256=vgy-r_3HHxGMAalZrA65VWHC1PuwBTYgtprSgHkCbrk,12557
10
11
  jarvis/models/base.py,sha256=ShV1H8Unee4RMaiFO4idROQA0Hc6wu4dyeRPX5fcszk,1433
11
12
  jarvis/models/kimi.py,sha256=1iTB0Z_WOmCML3Ufsge6jmeKOYvccr7I5lS3JUXymU4,17611
12
13
  jarvis/models/openai.py,sha256=ayaBWAN5VexMcKVrjEPDNB-Q9wx0sCV9Z4BCrvwYJ9w,4315
13
14
  jarvis/models/oyi.py,sha256=X2c5SWDIuQDCCFBcEKbzIWEz3I34eOAi0d1XAFgxlpw,15001
14
- jarvis/models/registry.py,sha256=hJyaROiOF_TkbtIXsjOD8-ArOvAvtxviawyqBFfLV6s,7617
15
- jarvis/tools/__init__.py,sha256=xmROdzJTZz6JDLLuAbwVLjUD4xfUUYb6D1Ssu_desaE,183
15
+ jarvis/models/registry.py,sha256=YpooKSpk5pSWfb5cBDz5wRfPK-abb9uuUZr4WBejqwI,7762
16
+ jarvis/tools/__init__.py,sha256=7Rqyj5hBAv5cWDVr5T9ZTZASO7ssBHeQNm2_4ZARdkA,72
16
17
  jarvis/tools/base.py,sha256=EGRGbdfbLXDLwtyoWdvp9rlxNX7bzc20t0Vc2VkwIEY,652
17
18
  jarvis/tools/codebase_qa.py,sha256=AEpusYxyWtALVVwPk1DMUH9cVI73mE1e3WFHJXDpXto,2333
18
19
  jarvis/tools/coder.py,sha256=ZJfPInKms4Hj3-eQlBwamVsvZ-2nlZ-4jsqJ-tJc6mg,2040
19
20
  jarvis/tools/file_ops.py,sha256=h8g0eT9UvlJf4kt0DLXvdSsjcPj7x19lxWdDApeDfpg,3842
20
21
  jarvis/tools/generator.py,sha256=vVP3eN5cCDpRXf_fn0skETkPXAW1XZFWx9pt2_ahK48,5999
21
22
  jarvis/tools/methodology.py,sha256=UG6s5VYRcd9wrKX4cg6f7zJhet5AIcthFGMOAdevBiw,5175
22
- jarvis/tools/registry.py,sha256=mlOAmUq3yzRz-7yvwrrCwbe5Lmw8eh1v8-_Fa5sezwI,7209
23
+ jarvis/tools/registry.py,sha256=MeTYNdZNRdhlgABviVxzbDPSgLpwDp2Nx2dGzedRu8U,7212
23
24
  jarvis/tools/search.py,sha256=1EqOVvLhg2Csh-i03-XeCrusbyfmH69FZ8khwZt8Tow,6131
24
25
  jarvis/tools/shell.py,sha256=UPKshPyOaUwTngresUw-ot1jHjQIb4wCY5nkJqa38lU,2520
25
26
  jarvis/tools/sub_agent.py,sha256=rEtAmSVY2ZjFOZEKr5m5wpACOQIiM9Zr_3dT92FhXYU,2621
26
27
  jarvis/tools/webpage.py,sha256=d3w3Jcjcu1ESciezTkz3n3Zf-rp_l91PrVoDEZnckOo,2391
27
- jarvis_ai_assistant-0.1.66.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
28
- jarvis_ai_assistant-0.1.66.dist-info/METADATA,sha256=O3UdZLfScFyhR3ARPYaKu1d5hemg45HFN6bZKclTfYk,12374
29
- jarvis_ai_assistant-0.1.66.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
30
- jarvis_ai_assistant-0.1.66.dist-info/entry_points.txt,sha256=QNUeqmUJd7nHufel2FO7cRttS1uKFfnbIyObv8eVyOY,140
31
- jarvis_ai_assistant-0.1.66.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
32
- jarvis_ai_assistant-0.1.66.dist-info/RECORD,,
28
+ jarvis_ai_assistant-0.1.72.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
29
+ jarvis_ai_assistant-0.1.72.dist-info/METADATA,sha256=4YOsbaLCJw40e43wAL34myfveTRLOmC-MIpBrkrt2zs,12399
30
+ jarvis_ai_assistant-0.1.72.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
31
+ jarvis_ai_assistant-0.1.72.dist-info/entry_points.txt,sha256=QNUeqmUJd7nHufel2FO7cRttS1uKFfnbIyObv8eVyOY,140
32
+ jarvis_ai_assistant-0.1.72.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
33
+ jarvis_ai_assistant-0.1.72.dist-info/RECORD,,